From: Shiyang Ruan <ruansy.fnst@fujitsu.com> To: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>, <linux-nvdimm@lists.01.org>, <linux-fsdevel@vger.kernel.org> Cc: darrick.wong@oracle.com, willy@infradead.org, jack@suse.cz, viro@zeniv.linux.org.uk, linux-btrfs@vger.kernel.org, david@fromorbit.com, hch@lst.de, rgoldwyn@suse.de Subject: [PATCH v2 1/3] fsdax: Factor helpers to simplify dax fault code Date: Wed, 7 Apr 2021 21:38:21 +0800 [thread overview] Message-ID: <20210407133823.828176-2-ruansy.fnst@fujitsu.com> (raw) In-Reply-To: <20210407133823.828176-1-ruansy.fnst@fujitsu.com> The dax page fault code is too long and a bit difficult to read. And it is hard to understand when we trying to add new features. Some of the PTE/PMD codes have similar logic. So, factor them as helper functions to simplify the code. Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com> --- fs/dax.c | 153 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 84 insertions(+), 69 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index b3d27fdc6775..f843fb8fbbf1 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1244,6 +1244,53 @@ static bool dax_fault_is_synchronous(unsigned long flags, && (iomap->flags & IOMAP_F_DIRTY); } +/* + * If we are doing synchronous page fault and inode needs fsync, we can insert + * PTE/PMD into page tables only after that happens. Skip insertion for now and + * return the pfn so that caller can insert it after fsync is done. + */ +static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn) +{ + if (WARN_ON_ONCE(!pfnp)) + return VM_FAULT_SIGBUS; + + *pfnp = pfn; + return VM_FAULT_NEEDDSYNC; +} + +static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, struct iomap *iomap, + loff_t pos) +{ + int error = 0; + vm_fault_t ret; + unsigned long vaddr = vmf->address; + sector_t sector = dax_iomap_sector(iomap, pos); + + switch (iomap->type) { + case IOMAP_HOLE: + case IOMAP_UNWRITTEN: + clear_user_highpage(vmf->cow_page, vaddr); + break; + case IOMAP_MAPPED: + error = copy_cow_page_dax(iomap->bdev, iomap->dax_dev, + sector, vmf->cow_page, vaddr); + break; + default: + WARN_ON_ONCE(1); + error = -EIO; + break; + } + + if (error) + return dax_fault_return(error); + + __SetPageUptodate(vmf->cow_page); + ret = finish_fault(vmf); + if (!ret) + ret = VM_FAULT_DONE_COW; + return ret; +} + static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) { @@ -1312,30 +1359,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, } if (vmf->cow_page) { - sector_t sector = dax_iomap_sector(&iomap, pos); - - switch (iomap.type) { - case IOMAP_HOLE: - case IOMAP_UNWRITTEN: - clear_user_highpage(vmf->cow_page, vaddr); - break; - case IOMAP_MAPPED: - error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev, - sector, vmf->cow_page, vaddr); - break; - default: - WARN_ON_ONCE(1); - error = -EIO; - break; - } - - if (error) - goto error_finish_iomap; - - __SetPageUptodate(vmf->cow_page); - ret = finish_fault(vmf); - if (!ret) - ret = VM_FAULT_DONE_COW; + ret = dax_fault_cow_page(vmf, &iomap, pos); goto finish_iomap; } @@ -1355,19 +1379,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, 0, write && !sync); - /* - * If we are doing synchronous page fault and inode needs fsync, - * we can insert PTE into page tables only after that happens. - * Skip insertion for now and return the pfn so that caller can - * insert it after fsync is done. - */ if (sync) { - if (WARN_ON_ONCE(!pfnp)) { - error = -EIO; - goto error_finish_iomap; - } - *pfnp = pfn; - ret = VM_FAULT_NEEDDSYNC | major; + ret = dax_fault_synchronous_pfnp(pfnp, pfn); goto finish_iomap; } trace_dax_insert_mapping(inode, vmf, entry); @@ -1466,13 +1479,45 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, return VM_FAULT_FALLBACK; } +static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas, + pgoff_t max_pgoff) +{ + unsigned long pmd_addr = vmf->address & PMD_MASK; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + /* + * Make sure that the faulting address's PMD offset (color) matches + * the PMD offset from the start of the file. This is necessary so + * that a PMD range in the page table overlaps exactly with a PMD + * range in the page cache. + */ + if ((vmf->pgoff & PG_PMD_COLOUR) != + ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) + return true; + + /* Fall back to PTEs if we're going to COW */ + if (write && !(vmf->vma->vm_flags & VM_SHARED)) + return true; + + /* If the PMD would extend outside the VMA */ + if (pmd_addr < vmf->vma->vm_start) + return true; + if ((pmd_addr + PMD_SIZE) > vmf->vma->vm_end) + return true; + + /* If the PMD would extend beyond the file size */ + if ((xas->xa_index | PG_PMD_COLOUR) >= max_pgoff) + return true; + + return false; +} + static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, const struct iomap_ops *ops) { struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER); - unsigned long pmd_addr = vmf->address & PMD_MASK; bool write = vmf->flags & FAULT_FLAG_WRITE; bool sync; unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; @@ -1495,33 +1540,12 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); - /* - * Make sure that the faulting address's PMD offset (color) matches - * the PMD offset from the start of the file. This is necessary so - * that a PMD range in the page table overlaps exactly with a PMD - * range in the page cache. - */ - if ((vmf->pgoff & PG_PMD_COLOUR) != - ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) - goto fallback; - - /* Fall back to PTEs if we're going to COW */ - if (write && !(vma->vm_flags & VM_SHARED)) - goto fallback; - - /* If the PMD would extend outside the VMA */ - if (pmd_addr < vma->vm_start) - goto fallback; - if ((pmd_addr + PMD_SIZE) > vma->vm_end) - goto fallback; - if (xas.xa_index >= max_pgoff) { result = VM_FAULT_SIGBUS; goto out; } - /* If the PMD would extend beyond the file size */ - if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff) + if (dax_fault_check_fallback(vmf, &xas, max_pgoff)) goto fallback; /* @@ -1573,17 +1597,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, DAX_PMD, write && !sync); - /* - * If we are doing synchronous page fault and inode needs fsync, - * we can insert PMD into page tables only after that happens. - * Skip insertion for now and return the pfn so that caller can - * insert it after fsync is done. - */ if (sync) { - if (WARN_ON_ONCE(!pfnp)) - goto finish_iomap; - *pfnp = pfn; - result = VM_FAULT_NEEDDSYNC; + result = dax_fault_synchronous_pfnp(pfnp, pfn); goto finish_iomap; } -- 2.31.0 _______________________________________________ Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
WARNING: multiple messages have this Message-ID (diff)
From: Shiyang Ruan <ruansy.fnst@fujitsu.com> To: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>, <linux-nvdimm@lists.01.org>, <linux-fsdevel@vger.kernel.org> Cc: <darrick.wong@oracle.com>, <dan.j.williams@intel.com>, <willy@infradead.org>, <jack@suse.cz>, <viro@zeniv.linux.org.uk>, <linux-btrfs@vger.kernel.org>, <david@fromorbit.com>, <hch@lst.de>, <rgoldwyn@suse.de>, Ritesh Harjani <riteshh@linux.ibm.com> Subject: [PATCH v2 1/3] fsdax: Factor helpers to simplify dax fault code Date: Wed, 7 Apr 2021 21:38:21 +0800 [thread overview] Message-ID: <20210407133823.828176-2-ruansy.fnst@fujitsu.com> (raw) In-Reply-To: <20210407133823.828176-1-ruansy.fnst@fujitsu.com> The dax page fault code is too long and a bit difficult to read. And it is hard to understand when we trying to add new features. Some of the PTE/PMD codes have similar logic. So, factor them as helper functions to simplify the code. Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com> --- fs/dax.c | 153 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 84 insertions(+), 69 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index b3d27fdc6775..f843fb8fbbf1 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1244,6 +1244,53 @@ static bool dax_fault_is_synchronous(unsigned long flags, && (iomap->flags & IOMAP_F_DIRTY); } +/* + * If we are doing synchronous page fault and inode needs fsync, we can insert + * PTE/PMD into page tables only after that happens. Skip insertion for now and + * return the pfn so that caller can insert it after fsync is done. + */ +static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn) +{ + if (WARN_ON_ONCE(!pfnp)) + return VM_FAULT_SIGBUS; + + *pfnp = pfn; + return VM_FAULT_NEEDDSYNC; +} + +static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, struct iomap *iomap, + loff_t pos) +{ + int error = 0; + vm_fault_t ret; + unsigned long vaddr = vmf->address; + sector_t sector = dax_iomap_sector(iomap, pos); + + switch (iomap->type) { + case IOMAP_HOLE: + case IOMAP_UNWRITTEN: + clear_user_highpage(vmf->cow_page, vaddr); + break; + case IOMAP_MAPPED: + error = copy_cow_page_dax(iomap->bdev, iomap->dax_dev, + sector, vmf->cow_page, vaddr); + break; + default: + WARN_ON_ONCE(1); + error = -EIO; + break; + } + + if (error) + return dax_fault_return(error); + + __SetPageUptodate(vmf->cow_page); + ret = finish_fault(vmf); + if (!ret) + ret = VM_FAULT_DONE_COW; + return ret; +} + static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) { @@ -1312,30 +1359,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, } if (vmf->cow_page) { - sector_t sector = dax_iomap_sector(&iomap, pos); - - switch (iomap.type) { - case IOMAP_HOLE: - case IOMAP_UNWRITTEN: - clear_user_highpage(vmf->cow_page, vaddr); - break; - case IOMAP_MAPPED: - error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev, - sector, vmf->cow_page, vaddr); - break; - default: - WARN_ON_ONCE(1); - error = -EIO; - break; - } - - if (error) - goto error_finish_iomap; - - __SetPageUptodate(vmf->cow_page); - ret = finish_fault(vmf); - if (!ret) - ret = VM_FAULT_DONE_COW; + ret = dax_fault_cow_page(vmf, &iomap, pos); goto finish_iomap; } @@ -1355,19 +1379,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, 0, write && !sync); - /* - * If we are doing synchronous page fault and inode needs fsync, - * we can insert PTE into page tables only after that happens. - * Skip insertion for now and return the pfn so that caller can - * insert it after fsync is done. - */ if (sync) { - if (WARN_ON_ONCE(!pfnp)) { - error = -EIO; - goto error_finish_iomap; - } - *pfnp = pfn; - ret = VM_FAULT_NEEDDSYNC | major; + ret = dax_fault_synchronous_pfnp(pfnp, pfn); goto finish_iomap; } trace_dax_insert_mapping(inode, vmf, entry); @@ -1466,13 +1479,45 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, return VM_FAULT_FALLBACK; } +static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas, + pgoff_t max_pgoff) +{ + unsigned long pmd_addr = vmf->address & PMD_MASK; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + /* + * Make sure that the faulting address's PMD offset (color) matches + * the PMD offset from the start of the file. This is necessary so + * that a PMD range in the page table overlaps exactly with a PMD + * range in the page cache. + */ + if ((vmf->pgoff & PG_PMD_COLOUR) != + ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) + return true; + + /* Fall back to PTEs if we're going to COW */ + if (write && !(vmf->vma->vm_flags & VM_SHARED)) + return true; + + /* If the PMD would extend outside the VMA */ + if (pmd_addr < vmf->vma->vm_start) + return true; + if ((pmd_addr + PMD_SIZE) > vmf->vma->vm_end) + return true; + + /* If the PMD would extend beyond the file size */ + if ((xas->xa_index | PG_PMD_COLOUR) >= max_pgoff) + return true; + + return false; +} + static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, const struct iomap_ops *ops) { struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER); - unsigned long pmd_addr = vmf->address & PMD_MASK; bool write = vmf->flags & FAULT_FLAG_WRITE; bool sync; unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; @@ -1495,33 +1540,12 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); - /* - * Make sure that the faulting address's PMD offset (color) matches - * the PMD offset from the start of the file. This is necessary so - * that a PMD range in the page table overlaps exactly with a PMD - * range in the page cache. - */ - if ((vmf->pgoff & PG_PMD_COLOUR) != - ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) - goto fallback; - - /* Fall back to PTEs if we're going to COW */ - if (write && !(vma->vm_flags & VM_SHARED)) - goto fallback; - - /* If the PMD would extend outside the VMA */ - if (pmd_addr < vma->vm_start) - goto fallback; - if ((pmd_addr + PMD_SIZE) > vma->vm_end) - goto fallback; - if (xas.xa_index >= max_pgoff) { result = VM_FAULT_SIGBUS; goto out; } - /* If the PMD would extend beyond the file size */ - if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff) + if (dax_fault_check_fallback(vmf, &xas, max_pgoff)) goto fallback; /* @@ -1573,17 +1597,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, DAX_PMD, write && !sync); - /* - * If we are doing synchronous page fault and inode needs fsync, - * we can insert PMD into page tables only after that happens. - * Skip insertion for now and return the pfn so that caller can - * insert it after fsync is done. - */ if (sync) { - if (WARN_ON_ONCE(!pfnp)) - goto finish_iomap; - *pfnp = pfn; - result = VM_FAULT_NEEDDSYNC; + result = dax_fault_synchronous_pfnp(pfnp, pfn); goto finish_iomap; } -- 2.31.0
next prev parent reply other threads:[~2021-04-07 13:38 UTC|newest] Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-04-07 13:38 [PATCH v2 0/3] fsdax: Factor helper functions to simplify the code Shiyang Ruan 2021-04-07 13:38 ` Shiyang Ruan 2021-04-07 13:38 ` Shiyang Ruan [this message] 2021-04-07 13:38 ` [PATCH v2 1/3] fsdax: Factor helpers to simplify dax fault code Shiyang Ruan 2021-04-26 22:48 ` Ira Weiny 2021-04-26 22:48 ` Ira Weiny 2021-04-07 13:38 ` [PATCH v2 2/3] fsdax: Factor helper: dax_fault_actor() Shiyang Ruan 2021-04-07 13:38 ` Shiyang Ruan 2021-04-08 21:10 ` Darrick J. Wong 2021-04-08 21:10 ` Darrick J. Wong 2021-04-09 2:03 ` ruansy.fnst 2021-04-09 2:03 ` ruansy.fnst 2021-04-07 13:38 ` [PATCH v2 3/3] fsdax: Output address in dax_iomap_pfn() and rename it Shiyang Ruan 2021-04-07 13:38 ` Shiyang Ruan
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210407133823.828176-2-ruansy.fnst@fujitsu.com \ --to=ruansy.fnst@fujitsu.com \ --cc=darrick.wong@oracle.com \ --cc=david@fromorbit.com \ --cc=hch@lst.de \ --cc=jack@suse.cz \ --cc=linux-btrfs@vger.kernel.org \ --cc=linux-fsdevel@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-nvdimm@lists.01.org \ --cc=linux-xfs@vger.kernel.org \ --cc=rgoldwyn@suse.de \ --cc=viro@zeniv.linux.org.uk \ --cc=willy@infradead.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.