From: Shiyang Ruan <ruansy.fnst@fujitsu.com> To: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>, <linux-nvdimm@lists.01.org>, <linux-fsdevel@vger.kernel.org> Cc: jack@suse.cz, darrick.wong@oracle.com, david@fromorbit.com, ocfs2-devel@oss.oracle.com, viro@zeniv.linux.org.uk, dan.j.williams@intel.com, linux-btrfs@vger.kernel.org Subject: [Ocfs2-devel] [PATCH v3 01/10] fsdax: Factor helpers to simplify dax fault code Date: Fri, 19 Mar 2021 09:52:28 +0800 [thread overview] Message-ID: <20210319015237.993880-2-ruansy.fnst@fujitsu.com> (raw) In-Reply-To: <20210319015237.993880-1-ruansy.fnst@fujitsu.com> The dax page fault code is too long and a bit difficult to read. And it is hard to understand when we trying to add new features. Some of the PTE/PMD codes have similar logic. So, factor them as helper functions to simplify the code. Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com> Reviewed-by: Christoph Hellwig <hch@lst.de> --- fs/dax.c | 152 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 84 insertions(+), 68 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 26d5dcd2d69e..7031e4302b13 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1243,6 +1243,52 @@ static bool dax_fault_is_synchronous(unsigned long flags, && (iomap->flags & IOMAP_F_DIRTY); } +/* + * If we are doing synchronous page fault and inode needs fsync, we can insert + * PTE/PMD into page tables only after that happens. Skip insertion for now and + * return the pfn so that caller can insert it after fsync is done. + */ +static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn) +{ + if (WARN_ON_ONCE(!pfnp)) + return VM_FAULT_SIGBUS; + + *pfnp = pfn; + return VM_FAULT_NEEDDSYNC; +} + +static int dax_fault_cow_page(struct vm_fault *vmf, struct iomap *iomap, + loff_t pos, vm_fault_t *ret) +{ + int error = 0; + unsigned long vaddr = vmf->address; + sector_t sector = dax_iomap_sector(iomap, pos); + + switch (iomap->type) { + case IOMAP_HOLE: + case IOMAP_UNWRITTEN: + clear_user_highpage(vmf->cow_page, vaddr); + break; + case IOMAP_MAPPED: + error = copy_cow_page_dax(iomap->bdev, iomap->dax_dev, + sector, vmf->cow_page, vaddr); + break; + default: + WARN_ON_ONCE(1); + error = -EIO; + break; + } + + if (error) + return error; + + __SetPageUptodate(vmf->cow_page); + *ret = finish_fault(vmf); + if (!*ret) + *ret = VM_FAULT_DONE_COW; + return 0; +} + static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) { @@ -1311,30 +1357,9 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, } if (vmf->cow_page) { - sector_t sector = dax_iomap_sector(&iomap, pos); - - switch (iomap.type) { - case IOMAP_HOLE: - case IOMAP_UNWRITTEN: - clear_user_highpage(vmf->cow_page, vaddr); - break; - case IOMAP_MAPPED: - error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev, - sector, vmf->cow_page, vaddr); - break; - default: - WARN_ON_ONCE(1); - error = -EIO; - break; - } - + error = dax_fault_cow_page(vmf, &iomap, pos, &ret); if (error) - goto error_finish_iomap; - - __SetPageUptodate(vmf->cow_page); - ret = finish_fault(vmf); - if (!ret) - ret = VM_FAULT_DONE_COW; + ret = dax_fault_return(error); goto finish_iomap; } @@ -1354,19 +1379,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, 0, write && !sync); - /* - * If we are doing synchronous page fault and inode needs fsync, - * we can insert PTE into page tables only after that happens. - * Skip insertion for now and return the pfn so that caller can - * insert it after fsync is done. - */ if (sync) { - if (WARN_ON_ONCE(!pfnp)) { - error = -EIO; - goto error_finish_iomap; - } - *pfnp = pfn; - ret = VM_FAULT_NEEDDSYNC | major; + ret = dax_fault_synchronous_pfnp(pfnp, pfn); goto finish_iomap; } trace_dax_insert_mapping(inode, vmf, entry); @@ -1465,13 +1479,45 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, return VM_FAULT_FALLBACK; } +static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas, + pgoff_t max_pgoff) +{ + unsigned long pmd_addr = vmf->address & PMD_MASK; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + /* + * Make sure that the faulting address's PMD offset (color) matches + * the PMD offset from the start of the file. This is necessary so + * that a PMD range in the page table overlaps exactly with a PMD + * range in the page cache. + */ + if ((vmf->pgoff & PG_PMD_COLOUR) != + ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) + return true; + + /* Fall back to PTEs if we're going to COW */ + if (write && !(vmf->vma->vm_flags & VM_SHARED)) + return true; + + /* If the PMD would extend outside the VMA */ + if (pmd_addr < vmf->vma->vm_start) + return true; + if ((pmd_addr + PMD_SIZE) > vmf->vma->vm_end) + return true; + + /* If the PMD would extend beyond the file size */ + if ((xas->xa_index | PG_PMD_COLOUR) >= max_pgoff) + return true; + + return false; +} + static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, const struct iomap_ops *ops) { struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER); - unsigned long pmd_addr = vmf->address & PMD_MASK; bool write = vmf->flags & FAULT_FLAG_WRITE; bool sync; unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; @@ -1494,33 +1540,12 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); - /* - * Make sure that the faulting address's PMD offset (color) matches - * the PMD offset from the start of the file. This is necessary so - * that a PMD range in the page table overlaps exactly with a PMD - * range in the page cache. - */ - if ((vmf->pgoff & PG_PMD_COLOUR) != - ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) - goto fallback; - - /* Fall back to PTEs if we're going to COW */ - if (write && !(vma->vm_flags & VM_SHARED)) - goto fallback; - - /* If the PMD would extend outside the VMA */ - if (pmd_addr < vma->vm_start) - goto fallback; - if ((pmd_addr + PMD_SIZE) > vma->vm_end) - goto fallback; - if (xas.xa_index >= max_pgoff) { result = VM_FAULT_SIGBUS; goto out; } - /* If the PMD would extend beyond the file size */ - if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff) + if (dax_fault_check_fallback(vmf, &xas, max_pgoff)) goto fallback; /* @@ -1572,17 +1597,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, DAX_PMD, write && !sync); - /* - * If we are doing synchronous page fault and inode needs fsync, - * we can insert PMD into page tables only after that happens. - * Skip insertion for now and return the pfn so that caller can - * insert it after fsync is done. - */ if (sync) { - if (WARN_ON_ONCE(!pfnp)) - goto finish_iomap; - *pfnp = pfn; - result = VM_FAULT_NEEDDSYNC; + result = dax_fault_synchronous_pfnp(pfnp, pfn); goto finish_iomap; } -- 2.30.1 _______________________________________________ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com https://oss.oracle.com/mailman/listinfo/ocfs2-devel
next prev parent reply other threads:[~2021-03-19 1:53 UTC|newest] Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-03-19 1:52 [Ocfs2-devel] [PATCH v3 00/10] fsdax, xfs: Add reflink&dedupe support for fsdax Shiyang Ruan 2021-03-19 1:52 ` Shiyang Ruan [this message] 2021-03-23 15:33 ` [Ocfs2-devel] [PATCH v3 01/10] fsdax: Factor helpers to simplify dax fault code Ritesh Harjani 2021-03-19 1:52 ` [Ocfs2-devel] [PATCH v3 02/10] fsdax: Factor helper: dax_fault_actor() Shiyang Ruan 2021-03-23 15:48 ` Ritesh Harjani 2021-03-31 3:57 ` ruansy.fnst 2021-04-02 7:47 ` Christoph Hellwig 2021-03-19 1:52 ` [Ocfs2-devel] [PATCH v3 03/10] fsdax: Output address in dax_iomap_pfn() and rename it Shiyang Ruan 2021-03-23 15:54 ` Ritesh Harjani 2021-03-19 1:52 ` [Ocfs2-devel] [PATCH v3 04/10] fsdax: Introduce dax_iomap_cow_copy() Shiyang Ruan 2021-03-23 16:08 ` Ritesh Harjani 2021-03-19 1:52 ` [Ocfs2-devel] [PATCH v3 05/10] fsdax: Replace mmap entry in case of CoW Shiyang Ruan 2021-04-01 6:39 ` Ritesh Harjani 2021-04-01 7:03 ` ruansy.fnst 2021-03-19 1:52 ` [Ocfs2-devel] [PATCH v3 06/10] fsdax: Add dax_iomap_cow_copy() for dax_iomap_zero Shiyang Ruan 2021-04-01 6:45 ` Ritesh Harjani 2021-04-01 7:00 ` ruansy.fnst 2021-03-19 1:52 ` [Ocfs2-devel] [PATCH v3 07/10] iomap: Introduce iomap_apply2() for operations on two files Shiyang Ruan 2021-04-01 7:12 ` Ritesh Harjani 2021-03-19 1:52 ` [Ocfs2-devel] [PATCH v3 08/10] fsdax: Dedup file range to use a compare function Shiyang Ruan 2021-04-01 11:11 ` Ritesh Harjani 2021-04-08 3:21 ` ruansy.fnst 2021-03-19 1:52 ` [Ocfs2-devel] [PATCH v3 09/10] fs/xfs: Handle CoW for fsdax write() path Shiyang Ruan 2021-03-19 1:52 ` [Ocfs2-devel] [PATCH v3 10/10] fs/xfs: Add dedupe support for fsdax Shiyang Ruan 2021-03-23 15:27 ` [Ocfs2-devel] [PATCH v3 00/10] fsdax, xfs: Add reflink&dedupe " Ritesh Harjani 2021-04-02 7:49 ` Christoph Hellwig 2021-04-02 8:18 ` ruansy.fnst
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210319015237.993880-2-ruansy.fnst@fujitsu.com \ --to=ruansy.fnst@fujitsu.com \ --cc=dan.j.williams@intel.com \ --cc=darrick.wong@oracle.com \ --cc=david@fromorbit.com \ --cc=jack@suse.cz \ --cc=linux-btrfs@vger.kernel.org \ --cc=linux-fsdevel@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-nvdimm@lists.01.org \ --cc=linux-xfs@vger.kernel.org \ --cc=ocfs2-devel@oss.oracle.com \ --cc=viro@zeniv.linux.org.uk \ --subject='Re: [Ocfs2-devel] [PATCH v3 01/10] fsdax: Factor helpers to simplify dax fault code' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).