All of lore.kernel.org
 help / color / mirror / Atom feed
From: jglisse@redhat.com
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Dan Williams" <dan.j.williams@intel.com>,
	"Ralph Campbell" <rcampbell@nvidia.com>,
	"John Hubbard" <jhubbard@nvidia.com>
Subject: [PATCH 09/10] mm/hmm: allow to mirror vma of a file on a DAX backed filesystem
Date: Tue, 29 Jan 2019 11:54:27 -0500	[thread overview]
Message-ID: <20190129165428.3931-10-jglisse@redhat.com> (raw)
In-Reply-To: <20190129165428.3931-1-jglisse@redhat.com>

From: Jérôme Glisse <jglisse@redhat.com>

This add support to mirror vma which is an mmap of a file which is on
a filesystem that using a DAX block device. There is no reason not to
support that case.

Note that unlike GUP code we do not take page reference hence when we
back-off we have nothing to undo.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
---
 mm/hmm.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 112 insertions(+), 21 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 8b87e1813313..1a444885404e 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -334,6 +334,7 @@ EXPORT_SYMBOL(hmm_mirror_unregister);
 
 struct hmm_vma_walk {
 	struct hmm_range	*range;
+	struct dev_pagemap	*pgmap;
 	unsigned long		last;
 	bool			fault;
 	bool			block;
@@ -508,6 +509,15 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
 				range->flags[HMM_PFN_VALID];
 }
 
+static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
+{
+	if (!pud_present(pud))
+		return 0;
+	return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
+				range->flags[HMM_PFN_WRITE] :
+				range->flags[HMM_PFN_VALID];
+}
+
 static int hmm_vma_handle_pmd(struct mm_walk *walk,
 			      unsigned long addr,
 			      unsigned long end,
@@ -529,8 +539,19 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
 		return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 
 	pfn = pmd_pfn(pmd) + pte_index(addr);
-	for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
+	for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) {
+		if (pmd_devmap(pmd)) {
+			hmm_vma_walk->pgmap = get_dev_pagemap(pfn,
+					      hmm_vma_walk->pgmap);
+			if (unlikely(!hmm_vma_walk->pgmap))
+				return -EBUSY;
+		}
 		pfns[i] = hmm_pfn_from_pfn(range, pfn) | cpu_flags;
+	}
+	if (hmm_vma_walk->pgmap) {
+		put_dev_pagemap(hmm_vma_walk->pgmap);
+		hmm_vma_walk->pgmap = NULL;
+	}
 	hmm_vma_walk->last = end;
 	return 0;
 }
@@ -617,10 +638,24 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 	if (fault || write_fault)
 		goto fault;
 
+	if (pte_devmap(pte)) {
+		hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte),
+					      hmm_vma_walk->pgmap);
+		if (unlikely(!hmm_vma_walk->pgmap))
+			return -EBUSY;
+	} else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) {
+		*pfn = range->values[HMM_PFN_SPECIAL];
+		return -EFAULT;
+	}
+
 	*pfn = hmm_pfn_from_pfn(range, pte_pfn(pte)) | cpu_flags;
 	return 0;
 
 fault:
+	if (hmm_vma_walk->pgmap) {
+		put_dev_pagemap(hmm_vma_walk->pgmap);
+		hmm_vma_walk->pgmap = NULL;
+	}
 	pte_unmap(ptep);
 	/* Fault any virtual address we were asked to fault */
 	return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
@@ -708,12 +743,84 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 			return r;
 		}
 	}
+	if (hmm_vma_walk->pgmap) {
+		put_dev_pagemap(hmm_vma_walk->pgmap);
+		hmm_vma_walk->pgmap = NULL;
+	}
 	pte_unmap(ptep - 1);
 
 	hmm_vma_walk->last = addr;
 	return 0;
 }
 
+static int hmm_vma_walk_pud(pud_t *pudp,
+			    unsigned long start,
+			    unsigned long end,
+			    struct mm_walk *walk)
+{
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	struct vm_area_struct *vma = walk->vma;
+	unsigned long addr = start, next;
+	pmd_t *pmdp;
+	pud_t pud;
+	int ret;
+
+again:
+	pud = READ_ONCE(*pudp);
+	if (pud_none(pud))
+		return hmm_vma_walk_hole(start, end, walk);
+
+	if (pud_huge(pud) && pud_devmap(pud)) {
+		unsigned long i, npages, pfn;
+		uint64_t *pfns, cpu_flags;
+		bool fault, write_fault;
+
+		if (!pud_present(pud))
+			return hmm_vma_walk_hole(start, end, walk);
+
+		i = (addr - range->start) >> PAGE_SHIFT;
+		npages = (end - addr) >> PAGE_SHIFT;
+		pfns = &range->pfns[i];
+
+		cpu_flags = pud_to_hmm_pfn_flags(range, pud);
+		hmm_range_need_fault(hmm_vma_walk, pfns, npages,
+				     cpu_flags, &fault, &write_fault);
+		if (fault || write_fault)
+			return hmm_vma_walk_hole_(addr, end, fault,
+						write_fault, walk);
+
+		pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+		for (i = 0; i < npages; ++i, ++pfn) {
+			hmm_vma_walk->pgmap = get_dev_pagemap(pfn,
+					      hmm_vma_walk->pgmap);
+			if (unlikely(!hmm_vma_walk->pgmap))
+				return -EBUSY;
+			pfns[i] = hmm_pfn_from_pfn(range, pfn) | cpu_flags;
+		}
+		if (hmm_vma_walk->pgmap) {
+			put_dev_pagemap(hmm_vma_walk->pgmap);
+			hmm_vma_walk->pgmap = NULL;
+		}
+		hmm_vma_walk->last = end;
+		return 0;
+	}
+
+	split_huge_pud(vma, pudp, addr);
+	if (pud_none(*pudp))
+		goto again;
+
+	pmdp = pmd_offset(pudp, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		ret = hmm_vma_walk_pmd(pmdp, addr, next, walk);
+		if (ret)
+			return ret;
+	} while (pmdp++, addr = next, addr != end);
+
+	return 0;
+}
+
 static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
 				      unsigned long start, unsigned long end,
 				      struct mm_walk *walk)
@@ -786,14 +893,6 @@ static void hmm_pfns_clear(struct hmm_range *range,
 		*pfns = range->values[HMM_PFN_NONE];
 }
 
-static void hmm_pfns_special(struct hmm_range *range)
-{
-	unsigned long addr = range->start, i = 0;
-
-	for (; addr < range->end; addr += PAGE_SIZE, i++)
-		range->pfns[i] = range->values[HMM_PFN_SPECIAL];
-}
-
 /*
  * hmm_range_register() - start tracking change to CPU page table over a range
  * @range: range
@@ -911,12 +1010,6 @@ long hmm_range_snapshot(struct hmm_range *range)
 		if (vma == NULL || (vma->vm_flags & device_vma))
 			return -EFAULT;
 
-		/* FIXME support dax */
-		if (vma_is_dax(vma)) {
-			hmm_pfns_special(range);
-			return -EINVAL;
-		}
-
 		if (is_vm_hugetlb_page(vma)) {
 			struct hstate *h = hstate_vma(vma);
 
@@ -940,6 +1033,7 @@ long hmm_range_snapshot(struct hmm_range *range)
 		}
 
 		range->vma = vma;
+		hmm_vma_walk.pgmap = NULL;
 		hmm_vma_walk.last = start;
 		hmm_vma_walk.fault = false;
 		hmm_vma_walk.range = range;
@@ -951,6 +1045,7 @@ long hmm_range_snapshot(struct hmm_range *range)
 		mm_walk.pte_entry = NULL;
 		mm_walk.test_walk = NULL;
 		mm_walk.hugetlb_entry = NULL;
+		mm_walk.pud_entry = hmm_vma_walk_pud;
 		mm_walk.pmd_entry = hmm_vma_walk_pmd;
 		mm_walk.pte_hole = hmm_vma_walk_hole;
 		mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
@@ -1018,12 +1113,6 @@ long hmm_range_fault(struct hmm_range *range, bool block)
 		if (vma == NULL || (vma->vm_flags & device_vma))
 			return -EFAULT;
 
-		/* FIXME support dax */
-		if (vma_is_dax(vma)) {
-			hmm_pfns_special(range);
-			return -EINVAL;
-		}
-
 		if (is_vm_hugetlb_page(vma)) {
 			struct hstate *h = hstate_vma(vma);
 
@@ -1047,6 +1136,7 @@ long hmm_range_fault(struct hmm_range *range, bool block)
 		}
 
 		range->vma = vma;
+		hmm_vma_walk.pgmap = NULL;
 		hmm_vma_walk.last = start;
 		hmm_vma_walk.fault = true;
 		hmm_vma_walk.block = block;
@@ -1059,6 +1149,7 @@ long hmm_range_fault(struct hmm_range *range, bool block)
 		mm_walk.pte_entry = NULL;
 		mm_walk.test_walk = NULL;
 		mm_walk.hugetlb_entry = NULL;
+		mm_walk.pud_entry = hmm_vma_walk_pud;
 		mm_walk.pmd_entry = hmm_vma_walk_pmd;
 		mm_walk.pte_hole = hmm_vma_walk_hole;
 		mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
-- 
2.17.2


  parent reply	other threads:[~2019-01-29 16:55 UTC|newest]

Thread overview: 115+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-29 16:54 [PATCH 00/10] HMM updates for 5.1 jglisse
2019-01-29 16:54 ` [PATCH 01/10] mm/hmm: use reference counting for HMM struct jglisse
2019-02-20 23:47   ` John Hubbard
2019-02-20 23:59     ` Jerome Glisse
2019-02-21  0:06       ` John Hubbard
2019-02-21  0:15         ` Jerome Glisse
2019-02-21  0:32           ` John Hubbard
2019-02-21  0:37             ` Jerome Glisse
2019-02-21  0:42               ` John Hubbard
2019-01-29 16:54 ` [PATCH 02/10] mm/hmm: do not erase snapshot when a range is invalidated jglisse
2019-02-20 23:58   ` John Hubbard
2019-01-29 16:54 ` [PATCH 03/10] mm/hmm: improve and rename hmm_vma_get_pfns() to hmm_range_snapshot() jglisse
2019-02-21  0:25   ` John Hubbard
2019-02-21  0:28     ` Jerome Glisse
2019-01-29 16:54 ` [PATCH 04/10] mm/hmm: improve and rename hmm_vma_fault() to hmm_range_fault() jglisse
2019-01-29 16:54 ` [PATCH 05/10] mm/hmm: improve driver API to work and wait over a range jglisse
2019-01-29 16:54 ` [PATCH 06/10] mm/hmm: add default fault flags to avoid the need to pre-fill pfns arrays jglisse
2019-01-29 16:54 ` [PATCH 07/10] mm/hmm: add an helper function that fault pages and map them to a device jglisse
2019-03-18 20:21   ` Dan Williams
2019-03-18 20:21     ` Dan Williams
2019-03-18 20:41     ` Jerome Glisse
2019-03-18 21:30       ` Dan Williams
2019-03-18 21:30         ` Dan Williams
2019-03-18 22:15         ` Jerome Glisse
2019-03-19  3:29           ` Dan Williams
2019-03-19  3:29             ` Dan Williams
2019-03-19 13:30             ` Jerome Glisse
2019-03-19  8:44               ` Ira Weiny
2019-03-19 17:10                 ` Jerome Glisse
2019-03-19 14:10                   ` Ira Weiny
2019-01-29 16:54 ` [PATCH 08/10] mm/hmm: support hugetlbfs (snap shoting, faulting and DMA mapping) jglisse
2019-01-29 16:54 ` jglisse [this message]
2019-01-29 18:41   ` [PATCH 09/10] mm/hmm: allow to mirror vma of a file on a DAX backed filesystem Dan Williams
2019-01-29 18:41     ` Dan Williams
2019-01-29 19:31     ` Jerome Glisse
2019-01-29 20:51       ` Dan Williams
2019-01-29 20:51         ` Dan Williams
2019-01-29 21:21         ` Jerome Glisse
2019-01-30  2:32           ` Dan Williams
2019-01-30  2:32             ` Dan Williams
2019-01-30  3:03             ` Jerome Glisse
2019-01-30 17:25               ` Dan Williams
2019-01-30 17:25                 ` Dan Williams
2019-01-30 18:36                 ` Jerome Glisse
2019-01-31  3:28                   ` Dan Williams
2019-01-31  3:28                     ` Dan Williams
2019-01-31  4:16                     ` Jerome Glisse
2019-01-31  5:44                       ` Dan Williams
2019-01-31  5:44                         ` Dan Williams
2019-03-05 22:16                         ` Andrew Morton
2019-03-06  4:20                           ` Dan Williams
2019-03-06  4:20                             ` Dan Williams
2019-03-06 15:51                             ` Jerome Glisse
2019-03-06 15:57                               ` Dan Williams
2019-03-06 15:57                                 ` Dan Williams
2019-03-06 16:03                                 ` Jerome Glisse
2019-03-06 16:06                                   ` Dan Williams
2019-03-06 16:06                                     ` Dan Williams
2019-03-07 17:46                             ` Andrew Morton
2019-03-07 18:56                               ` Jerome Glisse
2019-03-12  3:13                                 ` Dan Williams
2019-03-12  3:13                                   ` Dan Williams
2019-03-12 15:25                                   ` Jerome Glisse
2019-03-12 16:06                                     ` Dan Williams
2019-03-12 16:06                                       ` Dan Williams
2019-03-12 19:06                                       ` Jerome Glisse
2019-03-12 19:30                                         ` Dan Williams
2019-03-12 19:30                                           ` Dan Williams
2019-03-12 20:34                                           ` Dave Chinner
2019-03-13  1:06                                             ` Dan Williams
2019-03-13  1:06                                               ` Dan Williams
2019-03-12 21:52                                           ` Andrew Morton
2019-03-13  0:10                                             ` Jerome Glisse
2019-03-13  0:46                                               ` Dan Williams
2019-03-13  0:46                                                 ` Dan Williams
2019-03-13  1:00                                                 ` Jerome Glisse
2019-03-13 16:06                                               ` Andrew Morton
2019-03-13 18:39                                                 ` Jerome Glisse
2019-03-06 15:49                           ` Jerome Glisse
2019-03-06 22:18                             ` Andrew Morton
2019-03-07  0:36                               ` Jerome Glisse
2019-01-29 16:54 ` [PATCH 10/10] mm/hmm: add helpers for driver to safely take the mmap_sem jglisse
2019-02-20 21:59   ` John Hubbard
2019-02-20 22:19     ` Jerome Glisse
2019-02-20 22:40       ` John Hubbard
2019-02-20 23:09         ` Jerome Glisse
2019-02-20 23:17 ` [PATCH 00/10] HMM updates for 5.1 John Hubbard
2019-02-20 23:36   ` Jerome Glisse
2019-02-22 23:31 ` Ralph Campbell
2019-03-13  1:27 ` Jerome Glisse
2019-03-13 16:10   ` Andrew Morton
2019-03-13 18:01     ` Jason Gunthorpe
2019-03-13 18:33     ` Jerome Glisse
2019-03-18 17:00     ` Kuehling, Felix
2019-03-18 17:04     ` Jerome Glisse
2019-03-18 18:30       ` Dan Williams
2019-03-18 18:54         ` Jerome Glisse
2019-03-18 19:18           ` Dan Williams
2019-03-18 19:28             ` Jerome Glisse
2019-03-18 19:36               ` Dan Williams
2019-03-19 16:40       ` Andrew Morton
2019-03-19 16:58         ` Jerome Glisse
2019-03-19 17:12           ` Andrew Morton
2019-03-19 17:18             ` Jerome Glisse
2019-03-19 17:33               ` Dan Williams
2019-03-19 17:45                 ` Jerome Glisse
2019-03-19 18:42                   ` Dan Williams
2019-03-19 19:05                     ` Jerome Glisse
2019-03-19 19:13                       ` Dan Williams
2019-03-19 14:18                         ` Ira Weiny
2019-03-19 22:24                           ` Jerome Glisse
2019-03-19 19:18                         ` Jerome Glisse
2019-03-19 20:25                           ` Jerome Glisse
2019-03-19 21:51             ` Stephen Rothwell
2019-03-19 18:51           ` Deucher, Alexander

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190129165428.3931-10-jglisse@redhat.com \
    --to=jglisse@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rcampbell@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.