From: Dan Williams <dan.j.williams@intel.com> To: linux-nvdimm@lists.01.org Cc: linux-fsdevel@vger.kernel.org, linux-mm@kvack.org, Jan Kara <jack@suse.cz>, hch@lst.de, linux-kernel@vger.kernel.org Subject: [PATCH v6 02/13] device-dax: Enable page_mapping() Date: Fri, 13 Jul 2018 21:49:40 -0700 [thread overview] Message-ID: <153154377993.34503.14114016873062907260.stgit@dwillia2-desk3.amr.corp.intel.com> (raw) In-Reply-To: <153154376846.34503.15480221419473501643.stgit@dwillia2-desk3.amr.corp.intel.com> In support of enabling memory_failure() handling for device-dax mappings, set the ->mapping association of pages backing device-dax mappings. The rmap implementation requires page_mapping() to return the address_space hosting the vmas that map the page. The ->mapping pointer is never cleared. There is no possibility for the page to become associated with another address_space while the device is enabled. When the device is disabled the 'struct page' array for the device is destroyed / later reinitialized to zero. Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- drivers/dax/device.c | 55 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index ad5e7b4a15dc..95cfcfd612df 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -245,12 +245,11 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, } static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { struct device *dev = &dev_dax->dev; struct dax_region *dax_region; phys_addr_t phys; - pfn_t pfn; unsigned int fault_size = PAGE_SIZE; if (check_vma(dev_dax, vmf->vma, __func__)) @@ -272,20 +271,19 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_mixed(vmf->vma, vmf->address, pfn); + return vmf_insert_mixed(vmf->vma, vmf->address, *pfn); } static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { unsigned long pmd_addr = vmf->address & PMD_MASK; struct device *dev = &dev_dax->dev; struct dax_region *dax_region; phys_addr_t phys; pgoff_t pgoff; - pfn_t pfn; unsigned int fault_size = PMD_SIZE; if (check_vma(dev_dax, vmf->vma, __func__)) @@ -321,22 +319,21 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn, + return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { unsigned long pud_addr = vmf->address & PUD_MASK; struct device *dev = &dev_dax->dev; struct dax_region *dax_region; phys_addr_t phys; pgoff_t pgoff; - pfn_t pfn; unsigned int fault_size = PUD_SIZE; @@ -373,14 +370,14 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, pfn, + return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #else static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { return VM_FAULT_FALLBACK; } @@ -389,8 +386,10 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { - int rc, id; struct file *filp = vmf->vma->vm_file; + unsigned long fault_size; + int rc, id; + pfn_t pfn; struct dev_dax *dev_dax = filp->private_data; dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, @@ -400,17 +399,39 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, id = dax_read_lock(); switch (pe_size) { case PE_SIZE_PTE: - rc = __dev_dax_pte_fault(dev_dax, vmf); + fault_size = PAGE_SIZE; + rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn); break; case PE_SIZE_PMD: - rc = __dev_dax_pmd_fault(dev_dax, vmf); + fault_size = PMD_SIZE; + rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn); break; case PE_SIZE_PUD: - rc = __dev_dax_pud_fault(dev_dax, vmf); + fault_size = PUD_SIZE; + rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); break; default: rc = VM_FAULT_SIGBUS; } + + if (rc == VM_FAULT_NOPAGE) { + unsigned long i; + + /* + * In the device-dax case the only possibility for a + * VM_FAULT_NOPAGE result is when device-dax capacity is + * mapped. No need to consider the zero page, or racing + * conflicting mappings. + */ + for (i = 0; i < fault_size / PAGE_SIZE; i++) { + struct page *page; + + page = pfn_to_page(pfn_t_to_pfn(pfn) + i); + if (page->mapping) + continue; + page->mapping = filp->f_mapping; + } + } dax_read_unlock(id); return rc; _______________________________________________ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com> To: linux-nvdimm@lists.01.org Cc: Jan Kara <jack@suse.cz>, hch@lst.de, linux-fsdevel@vger.kernel.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org Subject: [PATCH v6 02/13] device-dax: Enable page_mapping() Date: Fri, 13 Jul 2018 21:49:40 -0700 [thread overview] Message-ID: <153154377993.34503.14114016873062907260.stgit@dwillia2-desk3.amr.corp.intel.com> (raw) In-Reply-To: <153154376846.34503.15480221419473501643.stgit@dwillia2-desk3.amr.corp.intel.com> In support of enabling memory_failure() handling for device-dax mappings, set the ->mapping association of pages backing device-dax mappings. The rmap implementation requires page_mapping() to return the address_space hosting the vmas that map the page. The ->mapping pointer is never cleared. There is no possibility for the page to become associated with another address_space while the device is enabled. When the device is disabled the 'struct page' array for the device is destroyed / later reinitialized to zero. Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- drivers/dax/device.c | 55 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index ad5e7b4a15dc..95cfcfd612df 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -245,12 +245,11 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, } static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { struct device *dev = &dev_dax->dev; struct dax_region *dax_region; phys_addr_t phys; - pfn_t pfn; unsigned int fault_size = PAGE_SIZE; if (check_vma(dev_dax, vmf->vma, __func__)) @@ -272,20 +271,19 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_mixed(vmf->vma, vmf->address, pfn); + return vmf_insert_mixed(vmf->vma, vmf->address, *pfn); } static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { unsigned long pmd_addr = vmf->address & PMD_MASK; struct device *dev = &dev_dax->dev; struct dax_region *dax_region; phys_addr_t phys; pgoff_t pgoff; - pfn_t pfn; unsigned int fault_size = PMD_SIZE; if (check_vma(dev_dax, vmf->vma, __func__)) @@ -321,22 +319,21 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn, + return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { unsigned long pud_addr = vmf->address & PUD_MASK; struct device *dev = &dev_dax->dev; struct dax_region *dax_region; phys_addr_t phys; pgoff_t pgoff; - pfn_t pfn; unsigned int fault_size = PUD_SIZE; @@ -373,14 +370,14 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, pfn, + return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #else static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { return VM_FAULT_FALLBACK; } @@ -389,8 +386,10 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { - int rc, id; struct file *filp = vmf->vma->vm_file; + unsigned long fault_size; + int rc, id; + pfn_t pfn; struct dev_dax *dev_dax = filp->private_data; dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, @@ -400,17 +399,39 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, id = dax_read_lock(); switch (pe_size) { case PE_SIZE_PTE: - rc = __dev_dax_pte_fault(dev_dax, vmf); + fault_size = PAGE_SIZE; + rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn); break; case PE_SIZE_PMD: - rc = __dev_dax_pmd_fault(dev_dax, vmf); + fault_size = PMD_SIZE; + rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn); break; case PE_SIZE_PUD: - rc = __dev_dax_pud_fault(dev_dax, vmf); + fault_size = PUD_SIZE; + rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); break; default: rc = VM_FAULT_SIGBUS; } + + if (rc == VM_FAULT_NOPAGE) { + unsigned long i; + + /* + * In the device-dax case the only possibility for a + * VM_FAULT_NOPAGE result is when device-dax capacity is + * mapped. No need to consider the zero page, or racing + * conflicting mappings. + */ + for (i = 0; i < fault_size / PAGE_SIZE; i++) { + struct page *page; + + page = pfn_to_page(pfn_t_to_pfn(pfn) + i); + if (page->mapping) + continue; + page->mapping = filp->f_mapping; + } + } dax_read_unlock(id); return rc;
next prev parent reply other threads:[~2018-07-14 4:59 UTC|newest] Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-07-14 4:49 [PATCH v6 00/13] mm: Teach memory_failure() about ZONE_DEVICE pages Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-14 4:49 ` [PATCH v6 01/13] device-dax: Convert to vmf_insert_mixed and vm_fault_t Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-14 4:49 ` Dan Williams [this message] 2018-07-14 4:49 ` [PATCH v6 02/13] device-dax: Enable page_mapping() Dan Williams 2018-07-14 4:49 ` [PATCH v6 03/13] device-dax: Set page->index Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-14 4:49 ` [PATCH v6 04/13] filesystem-dax: " Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-14 4:49 ` [PATCH v6 05/13] mm, madvise_inject_error: Disable MADV_SOFT_OFFLINE for ZONE_DEVICE pages Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-17 6:47 ` Naoya Horiguchi 2018-07-17 6:47 ` Naoya Horiguchi 2018-07-14 4:50 ` [PATCH v6 06/13] mm, dev_pagemap: Do not clear ->mapping on final put Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-23 16:12 ` Dave Jiang 2018-07-23 16:12 ` Dave Jiang 2018-07-23 16:12 ` Dave Jiang 2018-07-23 16:23 ` Jerome Glisse 2018-07-23 16:23 ` Jerome Glisse 2018-07-23 16:23 ` Jerome Glisse 2018-07-23 16:23 ` Jerome Glisse 2018-07-14 4:50 ` [PATCH v6 07/13] mm, madvise_inject_error: Let memory_failure() optionally take a page reference Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-17 6:52 ` Naoya Horiguchi 2018-07-14 4:50 ` [PATCH v6 08/13] mm, memory_failure: Collect mapping size in collect_procs() Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-14 4:50 ` [PATCH v6 09/13] filesystem-dax: Introduce dax_lock_mapping_entry() Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-08-06 9:21 ` Jan Kara 2018-08-06 9:21 ` Jan Kara 2018-07-14 4:50 ` [PATCH v6 10/13] mm, memory_failure: Teach memory_failure() about dev_pagemap pages Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-08-06 9:27 ` Jan Kara 2018-08-06 9:27 ` Jan Kara 2018-08-06 9:27 ` Jan Kara 2018-08-06 9:27 ` Jan Kara 2018-07-14 4:50 ` [PATCH v6 11/13] x86/mm/pat: Prepare {reserve, free}_memtype() for "decoy" addresses Dan Williams 2018-07-14 4:50 ` [v6,11/13] " Dan Williams 2018-07-14 4:50 ` [PATCH v6 11/13] " Dan Williams 2018-07-24 7:36 ` Ingo Molnar 2018-07-24 7:36 ` [v6,11/13] " Ingo Molnar 2018-07-24 7:36 ` [PATCH v6 11/13] " Ingo Molnar 2018-07-24 15:46 ` Dave Jiang 2018-07-24 15:46 ` [v6,11/13] " Dave Jiang 2018-07-24 15:46 ` [PATCH v6 11/13] " Dave Jiang 2018-07-14 4:50 ` [PATCH v6 12/13] x86/memory_failure: Introduce {set, clear}_mce_nospec() Dan Williams 2018-07-14 4:50 ` [v6,12/13] " Dan Williams 2018-07-14 4:50 ` [PATCH v6 12/13] " Dan Williams 2018-07-14 4:50 ` [PATCH v6 13/13] libnvdimm, pmem: Restore page attributes when clearing errors Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-19 17:57 ` [PATCH v6 00/13] mm: Teach memory_failure() about ZONE_DEVICE pages Dave Jiang 2018-07-19 17:57 ` Dave Jiang 2018-07-24 7:39 ` Ingo Molnar 2018-07-24 7:39 ` Ingo Molnar
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=153154377993.34503.14114016873062907260.stgit@dwillia2-desk3.amr.corp.intel.com \ --to=dan.j.williams@intel.com \ --cc=hch@lst.de \ --cc=jack@suse.cz \ --cc=linux-fsdevel@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=linux-nvdimm@lists.01.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.