netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Add gup fast + longterm and use it in HFI1
@ 2019-02-11 20:16 ira.weiny
  2019-02-11 20:16 ` [PATCH 1/3] mm/gup: Change "write" parameter to flags ira.weiny
                   ` (5 more replies)
  0 siblings, 6 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-11 20:16 UTC (permalink / raw)
  To: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	Davidlohr Bueso, netdev
  Cc: Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Jason Gunthorpe, Andrew Morton, Kirill A. Shutemov, Dan Williams,
	Ira Weiny

From: Ira Weiny <ira.weiny@intel.com>

NOTE: This series depends on my clean up patch to remove the write parameter
from gup_fast_permitted()[1]

HFI1 uses get_user_pages_fast() due to it performance advantages.  Like RDMA,
HFI1 pages can be held for a significant time.  But get_user_pages_fast() does
not protect against mapping of FS DAX pages.

Introduce a get_user_pages_fast_longterm() which retains the performance while
also adding the FS DAX checks.  XDP has also shown interest in using this
functionality.[2]

[1] https://lkml.org/lkml/2019/2/11/237
[2] https://lkml.org/lkml/2019/2/11/1789

Ira Weiny (3):
  mm/gup: Change "write" parameter to flags
  mm/gup: Introduce get_user_pages_fast_longterm()
  IB/HFI1: Use new get_user_pages_fast_longterm()

 drivers/infiniband/hw/hfi1/user_pages.c |   2 +-
 include/linux/mm.h                      |   8 ++
 mm/gup.c                                | 152 ++++++++++++++++--------
 3 files changed, 114 insertions(+), 48 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/3] mm/gup: Change "write" parameter to flags
  2019-02-11 20:16 [PATCH 0/3] Add gup fast + longterm and use it in HFI1 ira.weiny
@ 2019-02-11 20:16 ` ira.weiny
  2019-02-11 20:16 ` [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm() ira.weiny
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-11 20:16 UTC (permalink / raw)
  To: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	Davidlohr Bueso, netdev
  Cc: Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Jason Gunthorpe, Andrew Morton, Kirill A. Shutemov, Dan Williams,
	Ira Weiny

From: Ira Weiny <ira.weiny@intel.com>

In order to support more options in the GUP fast walk, change the
write parameter to flags throughout the call stack.

This patch does not change functionality and passes FOLL_WRITE
where write was previously used.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 mm/gup.c | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index b63e88eca31b..894ab014bd1e 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1395,7 +1395,7 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
 
 #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
-			 int write, struct page **pages, int *nr)
+			 unsigned int flags, struct page **pages, int *nr)
 {
 	struct dev_pagemap *pgmap = NULL;
 	int nr_start = *nr, ret = 0;
@@ -1413,7 +1413,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 		if (pte_protnone(pte))
 			goto pte_unmap;
 
-		if (!pte_access_permitted(pte, write))
+		if (!pte_access_permitted(pte, flags & FOLL_WRITE))
 			goto pte_unmap;
 
 		if (pte_devmap(pte)) {
@@ -1465,7 +1465,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
  * useful to have gup_huge_pmd even if we can't operate on ptes.
  */
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
-			 int write, struct page **pages, int *nr)
+			 unsigned int flags, struct page **pages, int *nr)
 {
 	return 0;
 }
@@ -1548,12 +1548,12 @@ static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
 #endif
 
 static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
+		unsigned long end, unsigned int flags, struct page **pages, int *nr)
 {
 	struct page *head, *page;
 	int refs;
 
-	if (!pmd_access_permitted(orig, write))
+	if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
 	if (pmd_devmap(orig))
@@ -1586,12 +1586,12 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 }
 
 static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
+		unsigned long end, unsigned int flags, struct page **pages, int *nr)
 {
 	struct page *head, *page;
 	int refs;
 
-	if (!pud_access_permitted(orig, write))
+	if (!pud_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
 	if (pud_devmap(orig))
@@ -1624,13 +1624,13 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
 }
 
 static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
-			unsigned long end, int write,
+			unsigned long end, unsigned int flags,
 			struct page **pages, int *nr)
 {
 	int refs;
 	struct page *head, *page;
 
-	if (!pgd_access_permitted(orig, write))
+	if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
 	BUILD_BUG_ON(pgd_devmap(orig));
@@ -1661,7 +1661,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
 }
 
 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
-		int write, struct page **pages, int *nr)
+		unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pmd_t *pmdp;
@@ -1683,7 +1683,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			if (pmd_protnone(pmd))
 				return 0;
 
-			if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
+			if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
 				pages, nr))
 				return 0;
 
@@ -1693,9 +1693,9 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			 * pmd format and THP pmd format
 			 */
 			if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
-					 PMD_SHIFT, next, write, pages, nr))
+					 PMD_SHIFT, next, flags, pages, nr))
 				return 0;
-		} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+		} else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
 			return 0;
 	} while (pmdp++, addr = next, addr != end);
 
@@ -1703,7 +1703,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 }
 
 static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
-			 int write, struct page **pages, int *nr)
+			 unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pud_t *pudp;
@@ -1716,14 +1716,14 @@ static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
 		if (pud_none(pud))
 			return 0;
 		if (unlikely(pud_huge(pud))) {
-			if (!gup_huge_pud(pud, pudp, addr, next, write,
+			if (!gup_huge_pud(pud, pudp, addr, next, flags,
 					  pages, nr))
 				return 0;
 		} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
 			if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
-					 PUD_SHIFT, next, write, pages, nr))
+					 PUD_SHIFT, next, flags, pages, nr))
 				return 0;
-		} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+		} else if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
 			return 0;
 	} while (pudp++, addr = next, addr != end);
 
@@ -1731,7 +1731,7 @@ static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
 }
 
 static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
-			 int write, struct page **pages, int *nr)
+			 unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	p4d_t *p4dp;
@@ -1746,9 +1746,9 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 		BUILD_BUG_ON(p4d_huge(p4d));
 		if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
 			if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
-					 P4D_SHIFT, next, write, pages, nr))
+					 P4D_SHIFT, next, flags, pages, nr))
 				return 0;
-		} else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
+		} else if (!gup_pud_range(p4d, addr, next, flags, pages, nr))
 			return 0;
 	} while (p4dp++, addr = next, addr != end);
 
@@ -1756,7 +1756,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 }
 
 static void gup_pgd_range(unsigned long addr, unsigned long end,
-		int write, struct page **pages, int *nr)
+		unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pgd_t *pgdp;
@@ -1769,14 +1769,14 @@ static void gup_pgd_range(unsigned long addr, unsigned long end,
 		if (pgd_none(pgd))
 			return;
 		if (unlikely(pgd_huge(pgd))) {
-			if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+			if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
 					  pages, nr))
 				return;
 		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
 			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
-					 PGDIR_SHIFT, next, write, pages, nr))
+					 PGDIR_SHIFT, next, flags, pages, nr))
 				return;
-		} else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
+		} else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr))
 			return;
 	} while (pgdp++, addr = next, addr != end);
 }
@@ -1830,7 +1830,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 
 	if (gup_fast_permitted(start, nr_pages)) {
 		local_irq_save(flags);
-		gup_pgd_range(start, end, write, pages, &nr);
+		gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr);
 		local_irq_restore(flags);
 	}
 
@@ -1872,7 +1872,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 
 	if (gup_fast_permitted(start, nr_pages)) {
 		local_irq_disable();
-		gup_pgd_range(addr, end, write, pages, &nr);
+		gup_pgd_range(addr, end, write ? FOLL_WRITE : 0, pages, &nr);
 		local_irq_enable();
 		ret = nr;
 	}
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 20:16 [PATCH 0/3] Add gup fast + longterm and use it in HFI1 ira.weiny
  2019-02-11 20:16 ` [PATCH 1/3] mm/gup: Change "write" parameter to flags ira.weiny
@ 2019-02-11 20:16 ` ira.weiny
  2019-02-11 20:39   ` Jason Gunthorpe
  2019-02-11 20:16 ` [PATCH 3/3] IB/HFI1: Use new get_user_pages_fast_longterm() ira.weiny
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 34+ messages in thread
From: ira.weiny @ 2019-02-11 20:16 UTC (permalink / raw)
  To: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	Davidlohr Bueso, netdev
  Cc: Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Jason Gunthorpe, Andrew Morton, Kirill A. Shutemov, Dan Williams,
	Ira Weiny

From: Ira Weiny <ira.weiny@intel.com>

Users of get_user_pages_fast are not protected against mapping
pages within FS DAX.  Introduce a call which protects them.

We do this by checking for DEVMAP pages during the fast walk and
falling back to the longterm gup call to check for FS DAX if needed.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 include/linux/mm.h |   8 ++++
 mm/gup.c           | 102 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 88 insertions(+), 22 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80bb6408fe73..8f831c823630 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1540,6 +1540,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
 			    unsigned int gup_flags, struct page **pages,
 			    struct vm_area_struct **vmas);
+int get_user_pages_fast_longterm(unsigned long start, int nr_pages, bool write,
+				 struct page **pages);
 #else
 static inline long get_user_pages_longterm(unsigned long start,
 		unsigned long nr_pages, unsigned int gup_flags,
@@ -1547,6 +1549,11 @@ static inline long get_user_pages_longterm(unsigned long start,
 {
 	return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
 }
+static inline int get_user_pages_fast_longterm(unsigned long start, int nr_pages,
+					       bool write, struct page **pages)
+{
+	return get_user_pages_fast(start, nr_pages, write, pages);
+}
 #endif /* CONFIG_FS_DAX */
 
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
@@ -2615,6 +2622,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
 #define FOLL_COW	0x4000	/* internal GUP flag */
 #define FOLL_ANON	0x8000	/* don't do file mappings */
+#define FOLL_LONGTERM	0x10000	/* mapping is intended for a long term pin */
 
 static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
 {
diff --git a/mm/gup.c b/mm/gup.c
index 894ab014bd1e..f7d86a304405 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1190,6 +1190,21 @@ long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
 EXPORT_SYMBOL(get_user_pages_longterm);
 #endif /* CONFIG_FS_DAX */
 
+static long get_user_pages_longterm_unlocked(unsigned long start,
+					     unsigned long nr_pages,
+					     struct page **pages,
+					     unsigned int gup_flags)
+{
+	struct mm_struct *mm = current->mm;
+	long ret;
+
+	down_read(&mm->mmap_sem);
+	ret = get_user_pages_longterm(start, nr_pages, gup_flags, pages, NULL);
+	up_read(&mm->mmap_sem);
+
+	return ret;
+}
+
 /**
  * populate_vma_page_range() -  populate a range of pages in the vma.
  * @vma:   target vma
@@ -1417,6 +1432,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 			goto pte_unmap;
 
 		if (pte_devmap(pte)) {
+			if (flags & FOLL_LONGTERM)
+				goto pte_unmap;
+
 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
 			if (unlikely(!pgmap)) {
 				undo_dev_pagemap(nr, nr_start, pages);
@@ -1556,8 +1574,12 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 	if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
-	if (pmd_devmap(orig))
+	if (pmd_devmap(orig)) {
+		if (flags & FOLL_LONGTERM)
+			return 0;
+
 		return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
+	}
 
 	refs = 0;
 	page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
@@ -1837,24 +1859,9 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	return nr;
 }
 
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start:	starting user address
- * @nr_pages:	number of pages from start to pin
- * @write:	whether pages will be written to
- * @pages:	array that receives pointers to the pages pinned.
- *		Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages)
+static int __get_user_pages_fast_flags(unsigned long start, int nr_pages,
+				       unsigned int gup_flags,
+				       struct page **pages)
 {
 	unsigned long addr, len, end;
 	int nr = 0, ret = 0;
@@ -1872,7 +1879,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 
 	if (gup_fast_permitted(start, nr_pages)) {
 		local_irq_disable();
-		gup_pgd_range(addr, end, write ? FOLL_WRITE : 0, pages, &nr);
+		gup_pgd_range(addr, end, gup_flags, pages, &nr);
 		local_irq_enable();
 		ret = nr;
 	}
@@ -1882,8 +1889,14 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 		start += nr << PAGE_SHIFT;
 		pages += nr;
 
-		ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
-				write ? FOLL_WRITE : 0);
+		if (gup_flags & FOLL_LONGTERM)
+			ret = get_user_pages_longterm_unlocked(start,
+							       nr_pages - nr,
+							       pages,
+							       gup_flags);
+		else
+			ret = get_user_pages_unlocked(start, nr_pages - nr,
+						      pages, gup_flags);
 
 		/* Have to be a bit careful with return values */
 		if (nr > 0) {
@@ -1897,4 +1910,49 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	return ret;
 }
 
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start:	starting user address
+ * @nr_pages:	number of pages from start to pin
+ * @write:	whether pages will be written to
+ * @pages:	array that receives pointers to the pages pinned.
+ *		Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages)
+{
+	return __get_user_pages_fast_flags(start, nr_pages,
+					   write ? FOLL_WRITE : 0,
+					   pages);
+}
+
+#ifdef CONFIG_FS_DAX
+/**
+ * get_user_pages_fast_longterm() - pin user pages in memory
+ *
+ * Exactly the same semantics as get_user_pages_fast() except fails mappings
+ * device mapped pages (such as DAX pages) which then fall back to checking for
+ * FS DAX pages with get_user_pages_longterm().
+ */
+int get_user_pages_fast_longterm(unsigned long start, int nr_pages, bool write,
+				 struct page **pages)
+{
+	unsigned int gup_flags = FOLL_LONGTERM;
+
+	if (write)
+		gup_flags |= FOLL_WRITE;
+
+	return __get_user_pages_fast_flags(start, nr_pages, gup_flags, pages);
+}
+EXPORT_SYMBOL(get_user_pages_fast_longterm);
+#endif /* CONFIG_FS_DAX */
+
 #endif /* CONFIG_HAVE_GENERIC_GUP */
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 3/3] IB/HFI1: Use new get_user_pages_fast_longterm()
  2019-02-11 20:16 [PATCH 0/3] Add gup fast + longterm and use it in HFI1 ira.weiny
  2019-02-11 20:16 ` [PATCH 1/3] mm/gup: Change "write" parameter to flags ira.weiny
  2019-02-11 20:16 ` [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm() ira.weiny
@ 2019-02-11 20:16 ` ira.weiny
  2019-02-11 20:34 ` [PATCH 0/3] Add gup fast + longterm and use it in HFI1 Davidlohr Bueso
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-11 20:16 UTC (permalink / raw)
  To: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	Davidlohr Bueso, netdev
  Cc: Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Jason Gunthorpe, Andrew Morton, Kirill A. Shutemov, Dan Williams,
	Ira Weiny

From: Ira Weiny <ira.weiny@intel.com>

Use the new get_user_pages_fast_longterm() call to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/infiniband/hw/hfi1/user_pages.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 24b592c6522e..b94ab5385a09 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -105,7 +105,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
 {
 	int ret;
 
-	ret = get_user_pages_fast(vaddr, npages, writable, pages);
+	ret = get_user_pages_fast_longterm(vaddr, npages, writable, pages);
 	if (ret < 0)
 		return ret;
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 20:16 [PATCH 0/3] Add gup fast + longterm and use it in HFI1 ira.weiny
                   ` (2 preceding siblings ...)
  2019-02-11 20:16 ` [PATCH 3/3] IB/HFI1: Use new get_user_pages_fast_longterm() ira.weiny
@ 2019-02-11 20:34 ` Davidlohr Bueso
  2019-02-11 20:47   ` Jason Gunthorpe
  2019-02-11 21:29   ` Ira Weiny
  2019-02-11 20:40 ` Jason Gunthorpe
  2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
  5 siblings, 2 replies; 34+ messages in thread
From: Davidlohr Bueso @ 2019-02-11 20:34 UTC (permalink / raw)
  To: ira.weiny
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann, netdev,
	Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Jason Gunthorpe, Andrew Morton, Kirill A. Shutemov, Dan Williams

On Mon, 11 Feb 2019, ira.weiny@intel.com wrote:
>Ira Weiny (3):
>  mm/gup: Change "write" parameter to flags
>  mm/gup: Introduce get_user_pages_fast_longterm()
>  IB/HFI1: Use new get_user_pages_fast_longterm()

Out of curiosity, are you planning on having all rdma drivers
use get_user_pages_fast_longterm()? Ie:

hw/mthca/mthca_memfree.c:       ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);
hw/qib/qib_user_sdma.c:         ret = get_user_pages_fast(addr, j, 0, pages);

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 20:16 ` [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm() ira.weiny
@ 2019-02-11 20:39   ` Jason Gunthorpe
  2019-02-11 21:13     ` John Hubbard
  0 siblings, 1 reply; 34+ messages in thread
From: Jason Gunthorpe @ 2019-02-11 20:39 UTC (permalink / raw)
  To: ira.weiny
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	Davidlohr Bueso, netdev, Mike Marciniszyn, Dennis Dalessandro,
	Doug Ledford, Andrew Morton, Kirill A. Shutemov, Dan Williams

On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> Users of get_user_pages_fast are not protected against mapping
> pages within FS DAX.  Introduce a call which protects them.
> 
> We do this by checking for DEVMAP pages during the fast walk and
> falling back to the longterm gup call to check for FS DAX if needed.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>  include/linux/mm.h |   8 ++++
>  mm/gup.c           | 102 +++++++++++++++++++++++++++++++++++----------
>  2 files changed, 88 insertions(+), 22 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 80bb6408fe73..8f831c823630 100644
> +++ b/include/linux/mm.h
> @@ -1540,6 +1540,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>  long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
>  			    unsigned int gup_flags, struct page **pages,
>  			    struct vm_area_struct **vmas);
> +int get_user_pages_fast_longterm(unsigned long start, int nr_pages, bool write,
> +				 struct page **pages);
>  #else
>  static inline long get_user_pages_longterm(unsigned long start,
>  		unsigned long nr_pages, unsigned int gup_flags,
> @@ -1547,6 +1549,11 @@ static inline long get_user_pages_longterm(unsigned long start,
>  {
>  	return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
>  }
> +static inline int get_user_pages_fast_longterm(unsigned long start, int nr_pages,
> +					       bool write, struct page **pages)
> +{
> +	return get_user_pages_fast(start, nr_pages, write, pages);
> +}
>  #endif /* CONFIG_FS_DAX */
>  
>  int get_user_pages_fast(unsigned long start, int nr_pages, int write,
> @@ -2615,6 +2622,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
>  #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
>  #define FOLL_COW	0x4000	/* internal GUP flag */
>  #define FOLL_ANON	0x8000	/* don't do file mappings */
> +#define FOLL_LONGTERM	0x10000	/* mapping is intended for a long term pin */

If we are adding a new flag, maybe we should get rid of the 'longterm'
entry points and just rely on the callers to pass the flag?

Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 20:16 [PATCH 0/3] Add gup fast + longterm and use it in HFI1 ira.weiny
                   ` (3 preceding siblings ...)
  2019-02-11 20:34 ` [PATCH 0/3] Add gup fast + longterm and use it in HFI1 Davidlohr Bueso
@ 2019-02-11 20:40 ` Jason Gunthorpe
  2019-02-11 21:14   ` Weiny, Ira
  2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
  5 siblings, 1 reply; 34+ messages in thread
From: Jason Gunthorpe @ 2019-02-11 20:40 UTC (permalink / raw)
  To: ira.weiny
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	Davidlohr Bueso, netdev, Mike Marciniszyn, Dennis Dalessandro,
	Doug Ledford, Andrew Morton, Kirill A. Shutemov, Dan Williams

On Mon, Feb 11, 2019 at 12:16:40PM -0800, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> NOTE: This series depends on my clean up patch to remove the write parameter
> from gup_fast_permitted()[1]
> 
> HFI1 uses get_user_pages_fast() due to it performance advantages.  Like RDMA,
> HFI1 pages can be held for a significant time.  But get_user_pages_fast() does
> not protect against mapping of FS DAX pages.

If HFI1 can use the _fast varient, can't all the general RDMA stuff
use it too? 

What is the guidance on when fast vs not fast should be use?

Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 20:34 ` [PATCH 0/3] Add gup fast + longterm and use it in HFI1 Davidlohr Bueso
@ 2019-02-11 20:47   ` Jason Gunthorpe
  2019-02-11 21:42     ` Ira Weiny
  2019-02-11 21:29   ` Ira Weiny
  1 sibling, 1 reply; 34+ messages in thread
From: Jason Gunthorpe @ 2019-02-11 20:47 UTC (permalink / raw)
  To: ira.weiny, linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	netdev, Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Andrew Morton, Kirill A. Shutemov, Dan Williams

On Mon, Feb 11, 2019 at 12:34:17PM -0800, Davidlohr Bueso wrote:
> On Mon, 11 Feb 2019, ira.weiny@intel.com wrote:
> > Ira Weiny (3):
> >  mm/gup: Change "write" parameter to flags
> >  mm/gup: Introduce get_user_pages_fast_longterm()
> >  IB/HFI1: Use new get_user_pages_fast_longterm()
> 
> Out of curiosity, are you planning on having all rdma drivers
> use get_user_pages_fast_longterm()? Ie:
> 
> hw/mthca/mthca_memfree.c:       ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);

This one is certainly a mistake - this should be done with a umem.

Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 20:39   ` Jason Gunthorpe
@ 2019-02-11 21:13     ` John Hubbard
  2019-02-11 21:26       ` Ira Weiny
  0 siblings, 1 reply; 34+ messages in thread
From: John Hubbard @ 2019-02-11 21:13 UTC (permalink / raw)
  To: Jason Gunthorpe, ira.weiny
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	Davidlohr Bueso, netdev, Mike Marciniszyn, Dennis Dalessandro,
	Doug Ledford, Andrew Morton, Kirill A. Shutemov, Dan Williams

On 2/11/19 12:39 PM, Jason Gunthorpe wrote:
> On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com wrote:
>> From: Ira Weiny <ira.weiny@intel.com>
[...]
>> +static inline int get_user_pages_fast_longterm(unsigned long start, int nr_pages,
>> +					       bool write, struct page **pages)
>> +{
>> +	return get_user_pages_fast(start, nr_pages, write, pages);
>> +}
>>  #endif /* CONFIG_FS_DAX */
>>  
>>  int get_user_pages_fast(unsigned long start, int nr_pages, int write,
>> @@ -2615,6 +2622,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
>>  #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
>>  #define FOLL_COW	0x4000	/* internal GUP flag */
>>  #define FOLL_ANON	0x8000	/* don't do file mappings */
>> +#define FOLL_LONGTERM	0x10000	/* mapping is intended for a long term pin */
> 
> If we are adding a new flag, maybe we should get rid of the 'longterm'
> entry points and just rely on the callers to pass the flag?
> 
> Jason
> 

+1, I agree that the overall get_user_pages*() API family will be cleaner
*without* get_user_pages_longterm*() calls. And this new flag makes that possible.
So I'd like to see the "longerm" call replaced with just passing this flag. Maybe
even as part of this patchset, but either way.

Taking a moment to reflect on where I think this might go eventually (the notes
below do not need to affect your patchset here, but this seems like a good place
to mention this):

It seems to me that the longterm vs. short-term is of questionable value.
It's actually better to just call get_user_pages(), and then if it really is
long-term enough to matter internally, we'll see the pages marked as gup-pinned.
If the gup pages are released before anyone (filesystem, that is) notices, then
it must have been short term.

Doing it that way is self-maintaining. Of course, this assumes that we end up with
a design that doesn't require being told, by the call sites, that a given gup
call is intended for "long term" use. So I could be wrong about this direction, but
let's please consider the possibility.

thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 20:40 ` Jason Gunthorpe
@ 2019-02-11 21:14   ` Weiny, Ira
  2019-02-11 22:23     ` Jason Gunthorpe
  0 siblings, 1 reply; 34+ messages in thread
From: Weiny, Ira @ 2019-02-11 21:14 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	Davidlohr Bueso, netdev, Marciniszyn, Mike, Dalessandro, Dennis,
	Doug Ledford, Andrew Morton, Kirill A. Shutemov, Williams, Dan J

> 
> On Mon, Feb 11, 2019 at 12:16:40PM -0800, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> >
> > NOTE: This series depends on my clean up patch to remove the write
> > parameter from gup_fast_permitted()[1]
> >
> > HFI1 uses get_user_pages_fast() due to it performance advantages.
> > Like RDMA,
> > HFI1 pages can be held for a significant time.  But
> > get_user_pages_fast() does not protect against mapping of FS DAX pages.
> 
> If HFI1 can use the _fast varient, can't all the general RDMA stuff use it too?
> 
> What is the guidance on when fast vs not fast should be use?

Right now it can't because it holds mmap_sem across the call.  Once Shiraz's patches are accepted removing the umem->hugetlb flag I think we can change  umem.c.

Also, it specifies FOLL_FORCE which can't currently be specified with gup fast.  One idea I had was to change get_user_pages_fast() to use gup_flags instead of a single write flag.  But that proved to be a very big cosmetic change across a lot of callers so I went this way.

Ira

> 
> Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 21:13     ` John Hubbard
@ 2019-02-11 21:26       ` Ira Weiny
  2019-02-11 21:39         ` John Hubbard
  0 siblings, 1 reply; 34+ messages in thread
From: Ira Weiny @ 2019-02-11 21:26 UTC (permalink / raw)
  To: John Hubbard
  Cc: Jason Gunthorpe, linux-rdma, linux-kernel, linux-mm,
	Daniel Borkmann, Davidlohr Bueso, netdev, Mike Marciniszyn,
	Dennis Dalessandro, Doug Ledford, Andrew Morton,
	Kirill A. Shutemov, Dan Williams

On Mon, Feb 11, 2019 at 01:13:56PM -0800, John Hubbard wrote:
> On 2/11/19 12:39 PM, Jason Gunthorpe wrote:
> > On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com wrote:
> >> From: Ira Weiny <ira.weiny@intel.com>
> [...]
> >> +static inline int get_user_pages_fast_longterm(unsigned long start, int nr_pages,
> >> +					       bool write, struct page **pages)
> >> +{
> >> +	return get_user_pages_fast(start, nr_pages, write, pages);
> >> +}
> >>  #endif /* CONFIG_FS_DAX */
> >>  
> >>  int get_user_pages_fast(unsigned long start, int nr_pages, int write,
> >> @@ -2615,6 +2622,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
> >>  #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
> >>  #define FOLL_COW	0x4000	/* internal GUP flag */
> >>  #define FOLL_ANON	0x8000	/* don't do file mappings */
> >> +#define FOLL_LONGTERM	0x10000	/* mapping is intended for a long term pin */
> > 
> > If we are adding a new flag, maybe we should get rid of the 'longterm'
> > entry points and just rely on the callers to pass the flag?
> > 
> > Jason
> > 
> 
> +1, I agree that the overall get_user_pages*() API family will be cleaner
> *without* get_user_pages_longterm*() calls. And this new flag makes that possible.
> So I'd like to see the "longerm" call replaced with just passing this flag. Maybe
> even as part of this patchset, but either way.

Yes I've thought about this as well.  I have a couple of different versions of
this series which I've been mulling over and this was one of the other
variations.  But see below...

> 
> Taking a moment to reflect on where I think this might go eventually (the notes
> below do not need to affect your patchset here, but this seems like a good place
> to mention this):
> 
> It seems to me that the longterm vs. short-term is of questionable value.

This is exactly why I did not post this before.  I've been waiting our other
discussions on how GUP pins are going to be handled to play out.  But with the
netdev thread today[1] it seems like we need to make sure we have a "safe" fast
variant for a while.  Introducing FOLL_LONGTERM seemed like the cleanest way to
do that even if we will not need the distinction in the future...  :-(

> It's actually better to just call get_user_pages(), and then if it really is
> long-term enough to matter internally, we'll see the pages marked as gup-pinned.
> If the gup pages are released before anyone (filesystem, that is) notices, then
> it must have been short term.
> 
> Doing it that way is self-maintaining. Of course, this assumes that we end up with
> a design that doesn't require being told, by the call sites, that a given gup
> call is intended for "long term" use. So I could be wrong about this direction, but
> let's please consider the possibility.

This is why I've been holding these patches.  I'm also not 100% sure if we will
need the longterm flag in the future.

This is also why I did not change the get_user_pages_longterm because we could
be ripping this all out by the end of the year...  (I hope. :-)

So while this does "pollute" the GUP family of calls I'm hoping it is not
forever.

Ira

[1] https://lkml.org/lkml/2019/2/11/1789

> 
> thanks,
> -- 
> John Hubbard
> NVIDIA

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 20:34 ` [PATCH 0/3] Add gup fast + longterm and use it in HFI1 Davidlohr Bueso
  2019-02-11 20:47   ` Jason Gunthorpe
@ 2019-02-11 21:29   ` Ira Weiny
  1 sibling, 0 replies; 34+ messages in thread
From: Ira Weiny @ 2019-02-11 21:29 UTC (permalink / raw)
  To: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann, netdev,
	Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Jason Gunthorpe, Andrew Morton, Kirill A. Shutemov, Dan Williams

On Mon, Feb 11, 2019 at 12:34:17PM -0800, Davidlohr Bueso wrote:
> On Mon, 11 Feb 2019, ira.weiny@intel.com wrote:
> > Ira Weiny (3):
> >  mm/gup: Change "write" parameter to flags
> >  mm/gup: Introduce get_user_pages_fast_longterm()
> >  IB/HFI1: Use new get_user_pages_fast_longterm()
> 
> Out of curiosity, are you planning on having all rdma drivers
> use get_user_pages_fast_longterm()? Ie:
> 
> hw/mthca/mthca_memfree.c:       ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);
> hw/qib/qib_user_sdma.c:         ret = get_user_pages_fast(addr, j, 0, pages);

I missed that when I change the other qib call to longterm...  :-(

Yes both of these should be changed.  Although I need to look into Jasons
comment WRT the mthca call.

Ira

> 
> Thanks,
> Davidlohr

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 21:26       ` Ira Weiny
@ 2019-02-11 21:39         ` John Hubbard
  2019-02-11 21:45           ` Dan Williams
  2019-02-11 21:52           ` Ira Weiny
  0 siblings, 2 replies; 34+ messages in thread
From: John Hubbard @ 2019-02-11 21:39 UTC (permalink / raw)
  To: Ira Weiny
  Cc: Jason Gunthorpe, linux-rdma, linux-kernel, linux-mm,
	Daniel Borkmann, Davidlohr Bueso, netdev, Mike Marciniszyn,
	Dennis Dalessandro, Doug Ledford, Andrew Morton,
	Kirill A. Shutemov, Dan Williams

On 2/11/19 1:26 PM, Ira Weiny wrote:
> On Mon, Feb 11, 2019 at 01:13:56PM -0800, John Hubbard wrote:
>> On 2/11/19 12:39 PM, Jason Gunthorpe wrote:
>>> On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com wrote:
>>>> From: Ira Weiny <ira.weiny@intel.com>
>> [...]
>> It seems to me that the longterm vs. short-term is of questionable value.
> 
> This is exactly why I did not post this before.  I've been waiting our other
> discussions on how GUP pins are going to be handled to play out.  But with the
> netdev thread today[1] it seems like we need to make sure we have a "safe" fast
> variant for a while.  Introducing FOLL_LONGTERM seemed like the cleanest way to
> do that even if we will not need the distinction in the future...  :-(

Yes, I agree. Below...

> [...]
> This is also why I did not change the get_user_pages_longterm because we could
> be ripping this all out by the end of the year...  (I hope. :-)
> 
> So while this does "pollute" the GUP family of calls I'm hoping it is not
> forever.
> 
> Ira
> 
> [1] https://lkml.org/lkml/2019/2/11/1789
> 

Yes, and to be clear, I think your patchset here is fine. It is easy to find
the FOLL_LONGTERM callers if and when we want to change anything. I just think
also it's appopriate to go a bit further, and use FOLL_LONGTERM all by itself.

That's because in either design outcome, it's better that way:

-- If we keep the concept of "I'm a long-term gup call site", then FOLL_LONGTERM
is just right. The gup API already has _fast and non-fast variants, and once
you get past a couple, you end up with a multiplication of names that really
work better as flags. We're there.

-- If we drop the concept, then you've already done part of the work, by removing
the _longterm API variants.



thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 20:47   ` Jason Gunthorpe
@ 2019-02-11 21:42     ` Ira Weiny
  2019-02-11 22:22       ` Jason Gunthorpe
  0 siblings, 1 reply; 34+ messages in thread
From: Ira Weiny @ 2019-02-11 21:42 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann, netdev,
	Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Andrew Morton, Kirill A. Shutemov, Dan Williams

On Mon, Feb 11, 2019 at 01:47:10PM -0700, Jason Gunthorpe wrote:
> On Mon, Feb 11, 2019 at 12:34:17PM -0800, Davidlohr Bueso wrote:
> > On Mon, 11 Feb 2019, ira.weiny@intel.com wrote:
> > > Ira Weiny (3):
> > >  mm/gup: Change "write" parameter to flags
> > >  mm/gup: Introduce get_user_pages_fast_longterm()
> > >  IB/HFI1: Use new get_user_pages_fast_longterm()
> > 
> > Out of curiosity, are you planning on having all rdma drivers
> > use get_user_pages_fast_longterm()? Ie:
> > 
> > hw/mthca/mthca_memfree.c:       ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);
> 
> This one is certainly a mistake - this should be done with a umem.

It looks like this is mapping a page allocated by user space for a doorbell?!?!
And that this is supporting the old memory free cards.  I remember that these
cards used system memory instead of memory on the cards but why it expects user
space to allocate that memory and how it all works is way too old for me to
even try to remember.

This does not seem to be allocating memory regions.  Jason, do you want a patch
to just convert these calls and consider it legacy code?

Ira

> 
> Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 21:39         ` John Hubbard
@ 2019-02-11 21:45           ` Dan Williams
  2019-02-11 21:52           ` Ira Weiny
  1 sibling, 0 replies; 34+ messages in thread
From: Dan Williams @ 2019-02-11 21:45 UTC (permalink / raw)
  To: John Hubbard
  Cc: Ira Weiny, Jason Gunthorpe, linux-rdma,
	Linux Kernel Mailing List, Linux MM, Daniel Borkmann,
	Davidlohr Bueso, Netdev, Mike Marciniszyn, Dennis Dalessandro,
	Doug Ledford, Andrew Morton, Kirill A. Shutemov

On Mon, Feb 11, 2019 at 1:39 PM John Hubbard <jhubbard@nvidia.com> wrote:
>
> On 2/11/19 1:26 PM, Ira Weiny wrote:
> > On Mon, Feb 11, 2019 at 01:13:56PM -0800, John Hubbard wrote:
> >> On 2/11/19 12:39 PM, Jason Gunthorpe wrote:
> >>> On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com wrote:
> >>>> From: Ira Weiny <ira.weiny@intel.com>
> >> [...]
> >> It seems to me that the longterm vs. short-term is of questionable value.
> >
> > This is exactly why I did not post this before.  I've been waiting our other
> > discussions on how GUP pins are going to be handled to play out.  But with the
> > netdev thread today[1] it seems like we need to make sure we have a "safe" fast
> > variant for a while.  Introducing FOLL_LONGTERM seemed like the cleanest way to
> > do that even if we will not need the distinction in the future...  :-(
>
> Yes, I agree. Below...
>
> > [...]
> > This is also why I did not change the get_user_pages_longterm because we could
> > be ripping this all out by the end of the year...  (I hope. :-)
> >
> > So while this does "pollute" the GUP family of calls I'm hoping it is not
> > forever.
> >
> > Ira
> >
> > [1] https://lkml.org/lkml/2019/2/11/1789
> >
>
> Yes, and to be clear, I think your patchset here is fine. It is easy to find
> the FOLL_LONGTERM callers if and when we want to change anything. I just think
> also it's appopriate to go a bit further, and use FOLL_LONGTERM all by itself.
>
> That's because in either design outcome, it's better that way:
>
> -- If we keep the concept of "I'm a long-term gup call site", then FOLL_LONGTERM
> is just right. The gup API already has _fast and non-fast variants, and once
> you get past a couple, you end up with a multiplication of names that really
> work better as flags. We're there.
>
> -- If we drop the concept, then you've already done part of the work, by removing
> the _longterm API variants.
>

A problem I now see with the _longterm name is that it hides its true
intent. It's really a "dax can't use page cache tricks to make it seem
like this page is ok to access indefinitely, if the system needs this
page back your pin would prevent the forward progress of the system
state.". If the discussion results in a need to have an explicit file
state (immutable or lease) then we'll continue to need a gup pin type
distinction. If the discussion resolves to one of the silent options
(fail truncate, lie about truncate) then FOLL_LONGTERM might be able
to die at that point.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 21:39         ` John Hubbard
  2019-02-11 21:45           ` Dan Williams
@ 2019-02-11 21:52           ` Ira Weiny
  2019-02-11 22:01             ` John Hubbard
  2019-02-11 22:06             ` Jason Gunthorpe
  1 sibling, 2 replies; 34+ messages in thread
From: Ira Weiny @ 2019-02-11 21:52 UTC (permalink / raw)
  To: John Hubbard
  Cc: Jason Gunthorpe, linux-rdma, linux-kernel, linux-mm,
	Daniel Borkmann, Davidlohr Bueso, netdev, Mike Marciniszyn,
	Dennis Dalessandro, Doug Ledford, Andrew Morton,
	Kirill A. Shutemov, Dan Williams

On Mon, Feb 11, 2019 at 01:39:12PM -0800, John Hubbard wrote:
> On 2/11/19 1:26 PM, Ira Weiny wrote:
> > On Mon, Feb 11, 2019 at 01:13:56PM -0800, John Hubbard wrote:
> >> On 2/11/19 12:39 PM, Jason Gunthorpe wrote:
> >>> On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com wrote:
> >>>> From: Ira Weiny <ira.weiny@intel.com>
> >> [...]
> >> It seems to me that the longterm vs. short-term is of questionable value.
> > 
> > This is exactly why I did not post this before.  I've been waiting our other
> > discussions on how GUP pins are going to be handled to play out.  But with the
> > netdev thread today[1] it seems like we need to make sure we have a "safe" fast
> > variant for a while.  Introducing FOLL_LONGTERM seemed like the cleanest way to
> > do that even if we will not need the distinction in the future...  :-(
> 
> Yes, I agree. Below...
> 
> > [...]
> > This is also why I did not change the get_user_pages_longterm because we could
> > be ripping this all out by the end of the year...  (I hope. :-)
> > 
> > So while this does "pollute" the GUP family of calls I'm hoping it is not
> > forever.
> > 
> > Ira
> > 
> > [1] https://lkml.org/lkml/2019/2/11/1789
> > 
> 
> Yes, and to be clear, I think your patchset here is fine. It is easy to find
> the FOLL_LONGTERM callers if and when we want to change anything. I just think
> also it's appopriate to go a bit further, and use FOLL_LONGTERM all by itself.
> 
> That's because in either design outcome, it's better that way:
> 
> -- If we keep the concept of "I'm a long-term gup call site", then FOLL_LONGTERM
> is just right. The gup API already has _fast and non-fast variants, and once
> you get past a couple, you end up with a multiplication of names that really
> work better as flags. We're there.
> 
> -- If we drop the concept, then you've already done part of the work, by removing
> the _longterm API variants.

Fair enough.   But to do that correctly I think we will need to convert
get_user_pages_fast() to use flags as well.  I have a version of this series
which includes a patch does this, but the patch touched a lot of subsystems and
a couple of different architectures...[1]

I can't test them all.  If we want to go that way I'm up for submitting the
patch...  But if we remove longterm in the future we may be left with a
get_user_pages_fast() which really only needs 1 flag.  But perhaps overall we
would be better off?

Ira


[1] mm/gup.c: Change GUP fast to use flags rather than write bool

To facilitate additional options to get_user_pages_fast change the
singular write parameter to be the more generic gup_flags.

This patch currently does not change any functionality.  New
functionality will follow in subsequent patches.

Many of the get_user_pages_fast call sites were unchanged because they
already used FOLL_WRITE or 0 as appropriate.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 arch/mips/mm/gup.c                         | 11 ++++++-----
 arch/powerpc/kvm/book3s_64_mmu_hv.c        |  4 ++--
 arch/powerpc/kvm/e500_mmu.c                |  2 +-
 arch/powerpc/mm/mmu_context_iommu.c        |  4 ++--
 arch/s390/kvm/interrupt.c                  |  2 +-
 arch/s390/mm/gup.c                         | 12 ++++++------
 arch/sh/mm/gup.c                           | 11 ++++++-----
 arch/sparc/mm/gup.c                        |  9 +++++----
 arch/x86/kvm/paging_tmpl.h                 |  2 +-
 arch/x86/kvm/svm.c                         |  2 +-
 drivers/fpga/dfl-afu-dma-region.c          |  2 +-
 drivers/gpu/drm/via/via_dmablit.c          |  3 ++-
 drivers/infiniband/hw/hfi1/user_pages.c    |  3 ++-
 drivers/misc/genwqe/card_utils.c           |  2 +-
 drivers/misc/vmw_vmci/vmci_host.c          |  2 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c    |  6 ++++--
 drivers/platform/goldfish/goldfish_pipe.c  |  3 ++-
 drivers/rapidio/devices/rio_mport_cdev.c   |  4 +++-
 drivers/sbus/char/oradax.c                 |  2 +-
 drivers/scsi/st.c                          |  3 ++-
 drivers/staging/gasket/gasket_page_table.c |  4 ++--
 drivers/tee/tee_shm.c                      |  2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c        |  3 ++-
 drivers/vhost/vhost.c                      |  2 +-
 drivers/video/fbdev/pvr2fb.c               |  2 +-
 drivers/virt/fsl_hypervisor.c              |  2 +-
 drivers/xen/gntdev.c                       |  2 +-
 fs/orangefs/orangefs-bufmap.c              |  2 +-
 include/linux/mm.h                         |  4 ++--
 kernel/futex.c                             |  2 +-
 lib/iov_iter.c                             |  7 +++++--
 mm/gup.c                                   | 10 +++++-----
 mm/util.c                                  |  8 ++++----
 net/ceph/pagevec.c                         |  2 +-
 net/rds/info.c                             |  2 +-
 net/rds/rdma.c                             |  3 ++-
 36 files changed, 81 insertions(+), 65 deletions(-)



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 21:52           ` Ira Weiny
@ 2019-02-11 22:01             ` John Hubbard
  2019-02-11 22:06             ` Jason Gunthorpe
  1 sibling, 0 replies; 34+ messages in thread
From: John Hubbard @ 2019-02-11 22:01 UTC (permalink / raw)
  To: Ira Weiny
  Cc: Jason Gunthorpe, linux-rdma, linux-kernel, linux-mm,
	Daniel Borkmann, Davidlohr Bueso, netdev, Mike Marciniszyn,
	Dennis Dalessandro, Doug Ledford, Andrew Morton,
	Kirill A. Shutemov, Dan Williams

On 2/11/19 1:52 PM, Ira Weiny wrote:
> On Mon, Feb 11, 2019 at 01:39:12PM -0800, John Hubbard wrote:
>> On 2/11/19 1:26 PM, Ira Weiny wrote:
>>> On Mon, Feb 11, 2019 at 01:13:56PM -0800, John Hubbard wrote:
>>>> On 2/11/19 12:39 PM, Jason Gunthorpe wrote:
>>>>> On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com wrote:
>>>>>> From: Ira Weiny <ira.weiny@intel.com>
>>>> [...]
> Fair enough.   But to do that correctly I think we will need to convert
> get_user_pages_fast() to use flags as well.  I have a version of this series
> which includes a patch does this, but the patch touched a lot of subsystems and
> a couple of different architectures...[1]
> 
> I can't test them all.  If we want to go that way I'm up for submitting the

I have a similar problem, and a similar list of call sites, for the
put_user_pages() conversion, so that file list looks familiar. And the
arch-specific gup implementations are about to complicate my life too. :)

> patch...  But if we remove longterm in the future we may be left with a
> get_user_pages_fast() which really only needs 1 flag.  But perhaps overall we
> would be better off?
> 
> Ira

I certainly think so, yes.


thanks,
-- 
John Hubbard
NVIDIA
> 
> 
> [1] mm/gup.c: Change GUP fast to use flags rather than write bool
> 
> To facilitate additional options to get_user_pages_fast change the
> singular write parameter to be the more generic gup_flags.
> 
> This patch currently does not change any functionality.  New
> functionality will follow in subsequent patches.
> 
> Many of the get_user_pages_fast call sites were unchanged because they
> already used FOLL_WRITE or 0 as appropriate.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> ---
>  arch/mips/mm/gup.c                         | 11 ++++++-----
>  arch/powerpc/kvm/book3s_64_mmu_hv.c        |  4 ++--
>  arch/powerpc/kvm/e500_mmu.c                |  2 +-
>  arch/powerpc/mm/mmu_context_iommu.c        |  4 ++--
>  arch/s390/kvm/interrupt.c                  |  2 +-
>  arch/s390/mm/gup.c                         | 12 ++++++------
>  arch/sh/mm/gup.c                           | 11 ++++++-----
>  arch/sparc/mm/gup.c                        |  9 +++++----
>  arch/x86/kvm/paging_tmpl.h                 |  2 +-
>  arch/x86/kvm/svm.c                         |  2 +-
>  drivers/fpga/dfl-afu-dma-region.c          |  2 +-
>  drivers/gpu/drm/via/via_dmablit.c          |  3 ++-
>  drivers/infiniband/hw/hfi1/user_pages.c    |  3 ++-
>  drivers/misc/genwqe/card_utils.c           |  2 +-
>  drivers/misc/vmw_vmci/vmci_host.c          |  2 +-
>  drivers/misc/vmw_vmci/vmci_queue_pair.c    |  6 ++++--
>  drivers/platform/goldfish/goldfish_pipe.c  |  3 ++-
>  drivers/rapidio/devices/rio_mport_cdev.c   |  4 +++-
>  drivers/sbus/char/oradax.c                 |  2 +-
>  drivers/scsi/st.c                          |  3 ++-
>  drivers/staging/gasket/gasket_page_table.c |  4 ++--
>  drivers/tee/tee_shm.c                      |  2 +-
>  drivers/vfio/vfio_iommu_spapr_tce.c        |  3 ++-
>  drivers/vhost/vhost.c                      |  2 +-
>  drivers/video/fbdev/pvr2fb.c               |  2 +-
>  drivers/virt/fsl_hypervisor.c              |  2 +-
>  drivers/xen/gntdev.c                       |  2 +-
>  fs/orangefs/orangefs-bufmap.c              |  2 +-
>  include/linux/mm.h                         |  4 ++--
>  kernel/futex.c                             |  2 +-
>  lib/iov_iter.c                             |  7 +++++--
>  mm/gup.c                                   | 10 +++++-----
>  mm/util.c                                  |  8 ++++----
>  net/ceph/pagevec.c                         |  2 +-
>  net/rds/info.c                             |  2 +-
>  net/rds/rdma.c                             |  3 ++-
>  36 files changed, 81 insertions(+), 65 deletions(-)
> 
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 21:52           ` Ira Weiny
  2019-02-11 22:01             ` John Hubbard
@ 2019-02-11 22:06             ` Jason Gunthorpe
  2019-02-11 22:55               ` Dan Williams
  1 sibling, 1 reply; 34+ messages in thread
From: Jason Gunthorpe @ 2019-02-11 22:06 UTC (permalink / raw)
  To: Ira Weiny
  Cc: John Hubbard, linux-rdma, linux-kernel, linux-mm,
	Daniel Borkmann, Davidlohr Bueso, netdev, Mike Marciniszyn,
	Dennis Dalessandro, Doug Ledford, Andrew Morton,
	Kirill A. Shutemov, Dan Williams

On Mon, Feb 11, 2019 at 01:52:38PM -0800, Ira Weiny wrote:
> On Mon, Feb 11, 2019 at 01:39:12PM -0800, John Hubbard wrote:
> > On 2/11/19 1:26 PM, Ira Weiny wrote:
> > > On Mon, Feb 11, 2019 at 01:13:56PM -0800, John Hubbard wrote:
> > >> On 2/11/19 12:39 PM, Jason Gunthorpe wrote:
> > >>> On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com wrote:
> > >>>> From: Ira Weiny <ira.weiny@intel.com>
> > >> [...]
> > >> It seems to me that the longterm vs. short-term is of questionable value.
> > > 
> > > This is exactly why I did not post this before.  I've been waiting our other
> > > discussions on how GUP pins are going to be handled to play out.  But with the
> > > netdev thread today[1] it seems like we need to make sure we have a "safe" fast
> > > variant for a while.  Introducing FOLL_LONGTERM seemed like the cleanest way to
> > > do that even if we will not need the distinction in the future...  :-(
> > 
> > Yes, I agree. Below...
> > 
> > > [...]
> > > This is also why I did not change the get_user_pages_longterm because we could
> > > be ripping this all out by the end of the year...  (I hope. :-)
> > > 
> > > So while this does "pollute" the GUP family of calls I'm hoping it is not
> > > forever.
> > > 
> > > Ira
> > > 
> > > [1] https://lkml.org/lkml/2019/2/11/1789
> > > 
> > 
> > Yes, and to be clear, I think your patchset here is fine. It is easy to find
> > the FOLL_LONGTERM callers if and when we want to change anything. I just think
> > also it's appopriate to go a bit further, and use FOLL_LONGTERM all by itself.
> > 
> > That's because in either design outcome, it's better that way:
> > 
> > is just right. The gup API already has _fast and non-fast variants, and once
> > you get past a couple, you end up with a multiplication of names that really
> > work better as flags. We're there.
> > 
> > the _longterm API variants.
> 
> Fair enough.   But to do that correctly I think we will need to convert
> get_user_pages_fast() to use flags as well.  I have a version of this series
> which includes a patch does this, but the patch touched a lot of subsystems and
> a couple of different architectures...[1]

I think this should be done anyhow, it is trouble the two basically
identical interfaces have different signatures. This already caused a
bug in vfio..

I also wonder if someone should think about making fast into a flag
too..

But I'm not sure when fast should be used vs when it shouldn't :(

Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 21:42     ` Ira Weiny
@ 2019-02-11 22:22       ` Jason Gunthorpe
  2019-02-11 22:40         ` Weiny, Ira
  0 siblings, 1 reply; 34+ messages in thread
From: Jason Gunthorpe @ 2019-02-11 22:22 UTC (permalink / raw)
  To: Ira Weiny
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann, netdev,
	Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Andrew Morton, Kirill A. Shutemov, Dan Williams

On Mon, Feb 11, 2019 at 01:42:57PM -0800, Ira Weiny wrote:
> On Mon, Feb 11, 2019 at 01:47:10PM -0700, Jason Gunthorpe wrote:
> > On Mon, Feb 11, 2019 at 12:34:17PM -0800, Davidlohr Bueso wrote:
> > > On Mon, 11 Feb 2019, ira.weiny@intel.com wrote:
> > > > Ira Weiny (3):
> > > >  mm/gup: Change "write" parameter to flags
> > > >  mm/gup: Introduce get_user_pages_fast_longterm()
> > > >  IB/HFI1: Use new get_user_pages_fast_longterm()
> > > 
> > > Out of curiosity, are you planning on having all rdma drivers
> > > use get_user_pages_fast_longterm()? Ie:
> > > 
> > > hw/mthca/mthca_memfree.c:       ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);
> > 
> > This one is certainly a mistake - this should be done with a umem.
> 
> It looks like this is mapping a page allocated by user space for a
> doorbell?!?!

Many drivers do this, the 'doorbell' is a PCI -> CPU thing of some sort

> This does not seem to be allocating memory regions.  Jason, do you
> want a patch to just convert these calls and consider it legacy
> code?

It needs to use umem like all the other drivers on this path.
Otherwise it doesn't get the page pinning logic right

There is also something else rotten with these longterm callsites,
they seem to have very different ideas how to handle RLIMIT_MEMLOCK.

ie vfio doesn't even touch pinned_vm.. and rdma is applying
RLIMIT_MEMLOCK to mm->pinned_vm, while vfio is using locked_vm.. No
idea which is right, but they should be the same, and this pattern
should probably be in core code someplace.

Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 21:14   ` Weiny, Ira
@ 2019-02-11 22:23     ` Jason Gunthorpe
  0 siblings, 0 replies; 34+ messages in thread
From: Jason Gunthorpe @ 2019-02-11 22:23 UTC (permalink / raw)
  To: Weiny, Ira
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann,
	Davidlohr Bueso, netdev, Marciniszyn, Mike, Dalessandro, Dennis,
	Doug Ledford, Andrew Morton, Kirill A. Shutemov, Williams, Dan J

On Mon, Feb 11, 2019 at 09:14:56PM +0000, Weiny, Ira wrote:
> > 
> > On Mon, Feb 11, 2019 at 12:16:40PM -0800, ira.weiny@intel.com wrote:
> > > From: Ira Weiny <ira.weiny@intel.com>
> > >
> > > NOTE: This series depends on my clean up patch to remove the write
> > > parameter from gup_fast_permitted()[1]
> > >
> > > HFI1 uses get_user_pages_fast() due to it performance advantages.
> > > Like RDMA,
> > > HFI1 pages can be held for a significant time.  But
> > > get_user_pages_fast() does not protect against mapping of FS DAX pages.
> > 
> > If HFI1 can use the _fast varient, can't all the general RDMA stuff use it too?
> > 
> > What is the guidance on when fast vs not fast should be use?
> 
> Right now it can't because it holds mmap_sem across the call.  Once
> Shiraz's patches are accepted removing the umem->hugetlb flag I
> think we can change umem.c.

Okay, that make sense, we should change it when Shiraz's patches are
merged
 
> Also, it specifies FOLL_FORCE which can't currently be specified
> with gup fast.  One idea I had was to change get_user_pages_fast()
> to use gup_flags instead of a single write flag.  But that proved to
> be a very big cosmetic change across a lot of callers so I went this
> way.

I think you should do it.. :)

Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 22:22       ` Jason Gunthorpe
@ 2019-02-11 22:40         ` Weiny, Ira
  2019-02-11 22:50           ` Jason Gunthorpe
  0 siblings, 1 reply; 34+ messages in thread
From: Weiny, Ira @ 2019-02-11 22:40 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann, netdev,
	Marciniszyn, Mike, Dalessandro, Dennis, Doug Ledford,
	Andrew Morton, Kirill A. Shutemov, Williams, Dan J

> On Mon, Feb 11, 2019 at 01:42:57PM -0800, Ira Weiny wrote:
> > On Mon, Feb 11, 2019 at 01:47:10PM -0700, Jason Gunthorpe wrote:
> > > On Mon, Feb 11, 2019 at 12:34:17PM -0800, Davidlohr Bueso wrote:
> > > > On Mon, 11 Feb 2019, ira.weiny@intel.com wrote:
> > > > > Ira Weiny (3):
> > > > >  mm/gup: Change "write" parameter to flags
> > > > >  mm/gup: Introduce get_user_pages_fast_longterm()
> > > > >  IB/HFI1: Use new get_user_pages_fast_longterm()
> > > >
> > > > Out of curiosity, are you planning on having all rdma drivers use
> > > > get_user_pages_fast_longterm()? Ie:
> > > >
> > > > hw/mthca/mthca_memfree.c:       ret = get_user_pages_fast(uaddr &
> PAGE_MASK, 1, FOLL_WRITE, pages);
> > >
> > > This one is certainly a mistake - this should be done with a umem.
> >
> > It looks like this is mapping a page allocated by user space for a
> > doorbell?!?!
> 
> Many drivers do this, the 'doorbell' is a PCI -> CPU thing of some sort

My surprise is why does _userspace_ allocate this memory?

> 
> > This does not seem to be allocating memory regions.  Jason, do you
> > want a patch to just convert these calls and consider it legacy code?
> 
> It needs to use umem like all the other drivers on this path.
> Otherwise it doesn't get the page pinning logic right

Not sure what you mean regarding the pinning logic?

> 
> There is also something else rotten with these longterm callsites, they seem
> to have very different ideas how to handle RLIMIT_MEMLOCK.
> 
> ie vfio doesn't even touch pinned_vm.. and rdma is applying
> RLIMIT_MEMLOCK to mm->pinned_vm, while vfio is using locked_vm.. No
> idea which is right, but they should be the same, and this pattern should
> probably be in core code someplace.

Neither do I.  But AFAIK pinned_vm is a subset of locked_vm.

So should we be accounting both of the counters?

Ira


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 0/3] Add gup fast + longterm and use it in HFI1
  2019-02-11 22:40         ` Weiny, Ira
@ 2019-02-11 22:50           ` Jason Gunthorpe
  0 siblings, 0 replies; 34+ messages in thread
From: Jason Gunthorpe @ 2019-02-11 22:50 UTC (permalink / raw)
  To: Weiny, Ira
  Cc: linux-rdma, linux-kernel, linux-mm, Daniel Borkmann, netdev,
	Marciniszyn, Mike, Dalessandro, Dennis, Doug Ledford,
	Andrew Morton, Kirill A. Shutemov, Williams, Dan J

On Mon, Feb 11, 2019 at 10:40:02PM +0000, Weiny, Ira wrote:

> > Many drivers do this, the 'doorbell' is a PCI -> CPU thing of some sort
> 
> My surprise is why does _userspace_ allocate this memory?

Well, userspace needs to read the memory, so either userpace allocates
it and the kernel GUP's it, or userspace mmap's a kernel page which
was DMA mapped.

The GUP version lets the doorbells have lower alignment than a PAGE,
and thes RDMA drivers hard requires GUP->DMA to function..

So why not use a umem here? It already has to work.

> > > This does not seem to be allocating memory regions.  Jason, do you
> > > want a patch to just convert these calls and consider it legacy code?
> > 
> > It needs to use umem like all the other drivers on this path.
> > Otherwise it doesn't get the page pinning logic right
>
> Not sure what you mean regarding the pinning logic?

The RLIMIT_MEMLOCK stuff and so on.

> > There is also something else rotten with these longterm callsites,
> > they seem to have very different ideas how to handle
> > RLIMIT_MEMLOCK.
> > 
> > ie vfio doesn't even touch pinned_vm.. and rdma is applying
> > RLIMIT_MEMLOCK to mm->pinned_vm, while vfio is using locked_vm.. No
> > idea which is right, but they should be the same, and this pattern should
> > probably be in core code someplace.
> 
> Neither do I.  But AFAIK pinned_vm is a subset of locked_vm.

I thought so..

> So should we be accounting both of the counters?

Someone should check :)

Since we don't increment locked_vm when we increment pinned_vm and
vfio only checke RLIMIT_MEMLOCK against locked_vm one can certainly
exceed the limit by mixing and matching RDMA and VFIO pins in the same
process. Sure seems like there is a bug somewhere here.

Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 22:06             ` Jason Gunthorpe
@ 2019-02-11 22:55               ` Dan Williams
  2019-02-11 23:04                 ` Weiny, Ira
  2019-02-11 23:25                 ` Jason Gunthorpe
  0 siblings, 2 replies; 34+ messages in thread
From: Dan Williams @ 2019-02-11 22:55 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Ira Weiny, John Hubbard, linux-rdma, Linux Kernel Mailing List,
	Linux MM, Daniel Borkmann, Davidlohr Bueso, Netdev,
	Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Andrew Morton, Kirill A. Shutemov

On Mon, Feb 11, 2019 at 2:07 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Mon, Feb 11, 2019 at 01:52:38PM -0800, Ira Weiny wrote:
> > On Mon, Feb 11, 2019 at 01:39:12PM -0800, John Hubbard wrote:
> > > On 2/11/19 1:26 PM, Ira Weiny wrote:
> > > > On Mon, Feb 11, 2019 at 01:13:56PM -0800, John Hubbard wrote:
> > > >> On 2/11/19 12:39 PM, Jason Gunthorpe wrote:
> > > >>> On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com wrote:
> > > >>>> From: Ira Weiny <ira.weiny@intel.com>
> > > >> [...]
> > > >> It seems to me that the longterm vs. short-term is of questionable value.
> > > >
> > > > This is exactly why I did not post this before.  I've been waiting our other
> > > > discussions on how GUP pins are going to be handled to play out.  But with the
> > > > netdev thread today[1] it seems like we need to make sure we have a "safe" fast
> > > > variant for a while.  Introducing FOLL_LONGTERM seemed like the cleanest way to
> > > > do that even if we will not need the distinction in the future...  :-(
> > >
> > > Yes, I agree. Below...
> > >
> > > > [...]
> > > > This is also why I did not change the get_user_pages_longterm because we could
> > > > be ripping this all out by the end of the year...  (I hope. :-)
> > > >
> > > > So while this does "pollute" the GUP family of calls I'm hoping it is not
> > > > forever.
> > > >
> > > > Ira
> > > >
> > > > [1] https://lkml.org/lkml/2019/2/11/1789
> > > >
> > >
> > > Yes, and to be clear, I think your patchset here is fine. It is easy to find
> > > the FOLL_LONGTERM callers if and when we want to change anything. I just think
> > > also it's appopriate to go a bit further, and use FOLL_LONGTERM all by itself.
> > >
> > > That's because in either design outcome, it's better that way:
> > >
> > > is just right. The gup API already has _fast and non-fast variants, and once
> > > you get past a couple, you end up with a multiplication of names that really
> > > work better as flags. We're there.
> > >
> > > the _longterm API variants.
> >
> > Fair enough.   But to do that correctly I think we will need to convert
> > get_user_pages_fast() to use flags as well.  I have a version of this series
> > which includes a patch does this, but the patch touched a lot of subsystems and
> > a couple of different architectures...[1]
>
> I think this should be done anyhow, it is trouble the two basically
> identical interfaces have different signatures. This already caused a
> bug in vfio..
>
> I also wonder if someone should think about making fast into a flag
> too..
>
> But I'm not sure when fast should be used vs when it shouldn't :(

Effectively fast should always be used just in case the user cares
about performance. It's just that it may fail and need to fall back to
requiring the vma.

Personally I thought RDMA memory registration is a one-time / upfront
slow path so that non-fast-GUP is tolerable.

The workloads that *need* it are O_DIRECT users that can't tolerate a
vma lookup on every I/O.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 22:55               ` Dan Williams
@ 2019-02-11 23:04                 ` Weiny, Ira
  2019-02-11 23:25                 ` Jason Gunthorpe
  1 sibling, 0 replies; 34+ messages in thread
From: Weiny, Ira @ 2019-02-11 23:04 UTC (permalink / raw)
  To: Williams, Dan J, Jason Gunthorpe
  Cc: John Hubbard, linux-rdma, Linux Kernel Mailing List, Linux MM,
	Daniel Borkmann, Davidlohr Bueso, Netdev, Marciniszyn, Mike,
	Dalessandro, Dennis, Doug Ledford, Andrew Morton,
	Kirill A. Shutemov

> 
> On Mon, Feb 11, 2019 at 2:07 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> >
> > On Mon, Feb 11, 2019 at 01:52:38PM -0800, Ira Weiny wrote:
> > > On Mon, Feb 11, 2019 at 01:39:12PM -0800, John Hubbard wrote:
> > > > On 2/11/19 1:26 PM, Ira Weiny wrote:
> > > > > On Mon, Feb 11, 2019 at 01:13:56PM -0800, John Hubbard wrote:
> > > > >> On 2/11/19 12:39 PM, Jason Gunthorpe wrote:
> > > > >>> On Mon, Feb 11, 2019 at 12:16:42PM -0800, ira.weiny@intel.com
> wrote:
> > > > >>>> From: Ira Weiny <ira.weiny@intel.com>
> > > > >> [...]
> > > > >> It seems to me that the longterm vs. short-term is of questionable
> value.
> > > > >
> > > > > This is exactly why I did not post this before.  I've been
> > > > > waiting our other discussions on how GUP pins are going to be
> > > > > handled to play out.  But with the netdev thread today[1] it
> > > > > seems like we need to make sure we have a "safe" fast variant
> > > > > for a while.  Introducing FOLL_LONGTERM seemed like the cleanest
> > > > > way to do that even if we will not need the distinction in the
> > > > > future...  :-(
> > > >
> > > > Yes, I agree. Below...
> > > >
> > > > > [...]
> > > > > This is also why I did not change the get_user_pages_longterm
> > > > > because we could be ripping this all out by the end of the
> > > > > year...  (I hope. :-)
> > > > >
> > > > > So while this does "pollute" the GUP family of calls I'm hoping
> > > > > it is not forever.
> > > > >
> > > > > Ira
> > > > >
> > > > > [1] https://lkml.org/lkml/2019/2/11/1789
> > > > >
> > > >
> > > > Yes, and to be clear, I think your patchset here is fine. It is
> > > > easy to find the FOLL_LONGTERM callers if and when we want to
> > > > change anything. I just think also it's appopriate to go a bit further, and
> use FOLL_LONGTERM all by itself.
> > > >
> > > > That's because in either design outcome, it's better that way:
> > > >
> > > > is just right. The gup API already has _fast and non-fast
> > > > variants, and once you get past a couple, you end up with a
> > > > multiplication of names that really work better as flags. We're there.
> > > >
> > > > the _longterm API variants.
> > >
> > > Fair enough.   But to do that correctly I think we will need to convert
> > > get_user_pages_fast() to use flags as well.  I have a version of
> > > this series which includes a patch does this, but the patch touched
> > > a lot of subsystems and a couple of different architectures...[1]
> >
> > I think this should be done anyhow, it is trouble the two basically
> > identical interfaces have different signatures. This already caused a
> > bug in vfio..
> >
> > I also wonder if someone should think about making fast into a flag
> > too..
> >
> > But I'm not sure when fast should be used vs when it shouldn't :(
> 
> Effectively fast should always be used just in case the user cares about
> performance. It's just that it may fail and need to fall back to requiring the
> vma.
> 
> Personally I thought RDMA memory registration is a one-time / upfront slow
> path so that non-fast-GUP is tolerable.
> 
> The workloads that *need* it are O_DIRECT users that can't tolerate a vma
> lookup on every I/O.

There are some users who need to [un]register memory more often.  While not in the strict fast path these users would like the registrations to occur as fast as possible.  I don't personally have the results but our OPA team did do performance tests on the GUP vs GUP fast and for the hfi1 case fast was better.  I don't have any reason to believe that regular RDMA users would not also benefit.

Ira


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 22:55               ` Dan Williams
  2019-02-11 23:04                 ` Weiny, Ira
@ 2019-02-11 23:25                 ` Jason Gunthorpe
  2019-02-12  0:08                   ` Ira Weiny
  1 sibling, 1 reply; 34+ messages in thread
From: Jason Gunthorpe @ 2019-02-11 23:25 UTC (permalink / raw)
  To: Dan Williams
  Cc: Ira Weiny, John Hubbard, linux-rdma, Linux Kernel Mailing List,
	Linux MM, Daniel Borkmann, Davidlohr Bueso, Netdev,
	Mike Marciniszyn, Dennis Dalessandro, Doug Ledford,
	Andrew Morton, Kirill A. Shutemov

On Mon, Feb 11, 2019 at 02:55:10PM -0800, Dan Williams wrote:

> > I also wonder if someone should think about making fast into a flag
> > too..
> >
> > But I'm not sure when fast should be used vs when it shouldn't :(
> 
> Effectively fast should always be used just in case the user cares
> about performance. It's just that it may fail and need to fall back to
> requiring the vma.

But the fall back / slow path is hidden inside the API, so when should
the caller care? 

ie when should the caller care to use gup_fast vs gup_unlocked? (the
comments say they are the same, but this seems to be a mistake)

Based on some of the comments in the code it looks like this API is
trying to convert itself into:

long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
                           unsigned long start, unsigned long nr_pages,
			   unsigned int gup_flags, struct page **pages,
			   struct vm_area_struct **vmas, bool *locked)

long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
                             unsigned long start, unsigned long nr_pages,
			     unsigned int gup_flags, struct page **pages)

(and maybe a FOLL_FAST if there is some reason we have _fast and
_unlocked)

The reason I ask, is that if there is no reason for fast vs unlocked
then maybe Ira should convert HFI to use gup_unlocked and move the
'fast' code into unlocked?

ie move incrementally closer to the desired end-state here.

Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm()
  2019-02-11 23:25                 ` Jason Gunthorpe
@ 2019-02-12  0:08                   ` Ira Weiny
  0 siblings, 0 replies; 34+ messages in thread
From: Ira Weiny @ 2019-02-12  0:08 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Dan Williams, John Hubbard, linux-rdma,
	Linux Kernel Mailing List, Linux MM, Daniel Borkmann,
	Davidlohr Bueso, Netdev, Mike Marciniszyn, Dennis Dalessandro,
	Doug Ledford, Andrew Morton, Kirill A. Shutemov

On Mon, Feb 11, 2019 at 04:25:10PM -0700, Jason Gunthorpe wrote:
> On Mon, Feb 11, 2019 at 02:55:10PM -0800, Dan Williams wrote:
> 
> > > I also wonder if someone should think about making fast into a flag
> > > too..
> > >
> > > But I'm not sure when fast should be used vs when it shouldn't :(
> > 
> > Effectively fast should always be used just in case the user cares
> > about performance. It's just that it may fail and need to fall back to
> > requiring the vma.
> 
> But the fall back / slow path is hidden inside the API, so when should
> the caller care? 
> 
> ie when should the caller care to use gup_fast vs gup_unlocked? (the
> comments say they are the same, but this seems to be a mistake)
> 
> Based on some of the comments in the code it looks like this API is
> trying to convert itself into:
> 
> long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
>                            unsigned long start, unsigned long nr_pages,
> 			   unsigned int gup_flags, struct page **pages,
> 			   struct vm_area_struct **vmas, bool *locked)
> 
> long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
>                              unsigned long start, unsigned long nr_pages,
> 			     unsigned int gup_flags, struct page **pages)
> 
> (and maybe a FOLL_FAST if there is some reason we have _fast and
> _unlocked)
> 
> The reason I ask, is that if there is no reason for fast vs unlocked
> then maybe Ira should convert HFI to use gup_unlocked and move the
> 'fast' code into unlocked?
> 
> ie move incrementally closer to the desired end-state here.

If the pages are not in the page tables then fast is probably going to be
slightly slower because it will have to fall back after walking the tables and
finding something missing.

For PSM2 (MPI) applications are performance improvement was probably because
the memory in question was in the page tables and very much in use.

Ira

> 
> Jason

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it
  2019-02-11 20:16 [PATCH 0/3] Add gup fast + longterm and use it in HFI1 ira.weiny
                   ` (4 preceding siblings ...)
  2019-02-11 20:40 ` Jason Gunthorpe
@ 2019-02-13 23:04 ` ira.weiny
  2019-02-13 23:04   ` [PATCH V2 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM ira.weiny
                     ` (6 more replies)
  5 siblings, 7 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-13 23:04 UTC (permalink / raw)
  To: linux-mips, linux-kernel, kvm-ppc, linuxppc-dev, linux-s390,
	linux-sh, sparclinux, kvm, linux-fpga, dri-devel, linux-rdma,
	linux-media, linux-scsi, devel, virtualization, netdev,
	linux-fbdev, xen-devel, devel, linux-mm, ceph-devel, rds-devel
  Cc: Ira Weiny, John Hubbard, David Hildenbrand, Cornelia Huck,
	Yoshinori Sato, Rich Felker, David S. Miller, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Joerg Roedel, Wu Hao, Alan Tull,
	Moritz Fischer, David Airlie, Daniel Vetter, Jason Gunthorpe,
	Dennis Dalessandro, Christian Benvenuti, Mauro Carvalho Chehab,
	Matt Porter, Alexandre Bounine, Kai Mäkisara,
	James E.J. Bottomley, Martin K. Petersen, Rob Springer,
	Todd Poynor, Ben Chan, Jens Wiklander, Alex Williamson,
	Michael S. Tsirkin, Jason Wang, Bartlomiej Zolnierkiewicz,
	Stefano Stabellini, Martin Brandenburg, Peter Zijlstra,
	Alexander Viro, Andrew Morton, Michal Hocko, Kirill A. Shutemov

From: Ira Weiny <ira.weiny@intel.com>

NOTE: This series depends on my clean up patch to remove the write parameter
from gup_fast_permitted()[1]

HFI1, qib, and mthca, use get_user_pages_fast() due to it performance
advantages.  These pages can be held for a significant time.  But
get_user_pages_fast() does not protect against mapping of FS DAX pages.

Introduce FOLL_LONGTERM and use this flag in get_user_pages_fast() which
retains the performance while also adding the FS DAX checks.  XDP has also
shown interest in using this functionality.[2]

In addition we change get_user_pages() to use the new FOLL_LONGTERM flag and
remove the specialized get_user_pages_longterm call.

[1] https://lkml.org/lkml/2019/2/11/237
[2] https://lkml.org/lkml/2019/2/11/1789

Ira Weiny (7):
  mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
  mm/gup: Change write parameter to flags in fast walk
  mm/gup: Change GUP fast to use flags rather than a write 'bool'
  mm/gup: Add FOLL_LONGTERM capability to GUP fast
  IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
  IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
  IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()

 arch/mips/mm/gup.c                          |  11 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c         |   4 +-
 arch/powerpc/kvm/e500_mmu.c                 |   2 +-
 arch/powerpc/mm/mmu_context_iommu.c         |   4 +-
 arch/s390/kvm/interrupt.c                   |   2 +-
 arch/s390/mm/gup.c                          |  12 +-
 arch/sh/mm/gup.c                            |  11 +-
 arch/sparc/mm/gup.c                         |   9 +-
 arch/x86/kvm/paging_tmpl.h                  |   2 +-
 arch/x86/kvm/svm.c                          |   2 +-
 drivers/fpga/dfl-afu-dma-region.c           |   2 +-
 drivers/gpu/drm/via/via_dmablit.c           |   3 +-
 drivers/infiniband/core/umem.c              |   5 +-
 drivers/infiniband/hw/hfi1/user_pages.c     |   5 +-
 drivers/infiniband/hw/mthca/mthca_memfree.c |   3 +-
 drivers/infiniband/hw/qib/qib_user_pages.c  |   8 +-
 drivers/infiniband/hw/qib/qib_user_sdma.c   |   2 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c    |   9 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c   |   6 +-
 drivers/misc/genwqe/card_utils.c            |   2 +-
 drivers/misc/vmw_vmci/vmci_host.c           |   2 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c     |   6 +-
 drivers/platform/goldfish/goldfish_pipe.c   |   3 +-
 drivers/rapidio/devices/rio_mport_cdev.c    |   4 +-
 drivers/sbus/char/oradax.c                  |   2 +-
 drivers/scsi/st.c                           |   3 +-
 drivers/staging/gasket/gasket_page_table.c  |   4 +-
 drivers/tee/tee_shm.c                       |   2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c         |   3 +-
 drivers/vfio/vfio_iommu_type1.c             |   3 +-
 drivers/vhost/vhost.c                       |   2 +-
 drivers/video/fbdev/pvr2fb.c                |   2 +-
 drivers/virt/fsl_hypervisor.c               |   2 +-
 drivers/xen/gntdev.c                        |   2 +-
 fs/orangefs/orangefs-bufmap.c               |   2 +-
 include/linux/mm.h                          |  17 +-
 kernel/futex.c                              |   2 +-
 lib/iov_iter.c                              |   7 +-
 mm/gup.c                                    | 220 ++++++++++++--------
 mm/gup_benchmark.c                          |   5 +-
 mm/util.c                                   |   8 +-
 net/ceph/pagevec.c                          |   2 +-
 net/rds/info.c                              |   2 +-
 net/rds/rdma.c                              |   3 +-
 44 files changed, 232 insertions(+), 180 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH V2 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
  2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
@ 2019-02-13 23:04   ` ira.weiny
  2019-02-13 23:04   ` [PATCH V2 2/7] mm/gup: Change write parameter to flags in fast walk ira.weiny
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-13 23:04 UTC (permalink / raw)
  To: linux-mips, linux-kernel, kvm-ppc, linuxppc-dev, linux-s390,
	linux-sh, sparclinux, kvm, linux-fpga, dri-devel, linux-rdma,
	linux-media, linux-scsi, devel, virtualization, netdev,
	linux-fbdev, xen-devel, devel, linux-mm, ceph-devel, rds-devel
  Cc: Ira Weiny, John Hubbard, David Hildenbrand, Cornelia Huck,
	Yoshinori Sato, Rich Felker, David S. Miller, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Joerg Roedel, Wu Hao, Alan Tull,
	Moritz Fischer, David Airlie, Daniel Vetter, Jason Gunthorpe,
	Dennis Dalessandro, Christian Benvenuti, Mauro Carvalho Chehab,
	Matt Porter, Alexandre Bounine, Kai Mäkisara,
	James E.J. Bottomley, Martin K. Petersen, Rob Springer,
	Todd Poynor, Ben Chan, Jens Wiklander, Alex Williamson,
	Michael S. Tsirkin, Jason Wang, Bartlomiej Zolnierkiewicz,
	Stefano Stabellini, Martin Brandenburg, Peter Zijlstra,
	Alexander Viro, Andrew Morton, Michal Hocko, Kirill A. Shutemov

From: Ira Weiny <ira.weiny@intel.com>

Rather than have a separate get_user_pages_longterm() call,
introduce FOLL_LONGTERM and change the longterm callers to use
it.

This patch does not change any functionality.

FOLL_LONGTERM can only be supported with get_user_pages() as it
requires vmas to determine if DAX is in use.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/infiniband/core/umem.c             |   5 +-
 drivers/infiniband/hw/qib/qib_user_pages.c |   8 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c   |   9 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c  |   6 +-
 drivers/vfio/vfio_iommu_type1.c            |   3 +-
 include/linux/mm.h                         |  13 +-
 mm/gup.c                                   | 138 ++++++++++++---------
 mm/gup_benchmark.c                         |   5 +-
 8 files changed, 101 insertions(+), 86 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index b69d3efa8712..120a40df91b4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -185,10 +185,11 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 
 	while (npages) {
 		down_read(&mm->mmap_sem);
-		ret = get_user_pages_longterm(cur_base,
+		ret = get_user_pages(cur_base,
 				     min_t(unsigned long, npages,
 					   PAGE_SIZE / sizeof (struct page *)),
-				     gup_flags, page_list, vma_list);
+				     gup_flags | FOLL_LONGTERM,
+				     page_list, vma_list);
 		if (ret < 0) {
 			up_read(&mm->mmap_sem);
 			goto umem_release;
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index ef8bcf366ddc..1b9368261035 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -114,10 +114,10 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
 
 	down_read(&current->mm->mmap_sem);
 	for (got = 0; got < num_pages; got += ret) {
-		ret = get_user_pages_longterm(start_page + got * PAGE_SIZE,
-					      num_pages - got,
-					      FOLL_WRITE | FOLL_FORCE,
-					      p + got, NULL);
+		ret = get_user_pages(start_page + got * PAGE_SIZE,
+				     num_pages - got,
+				     FOLL_LONGTERM | FOLL_WRITE | FOLL_FORCE,
+				     p + got, NULL);
 		if (ret < 0) {
 			up_read(&current->mm->mmap_sem);
 			goto bail_release;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 06862a6af185..1d9a182ac163 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -143,10 +143,11 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	ret = 0;
 
 	while (npages) {
-		ret = get_user_pages_longterm(cur_base,
-					min_t(unsigned long, npages,
-					PAGE_SIZE / sizeof(struct page *)),
-					gup_flags, page_list, NULL);
+		ret = get_user_pages(cur_base,
+				     min_t(unsigned long, npages,
+				     PAGE_SIZE / sizeof(struct page *)),
+				     gup_flags | FOLL_LONGTERM,
+				     page_list, NULL);
 
 		if (ret < 0)
 			goto out;
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 08929c087e27..870a2a526e0b 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -186,12 +186,12 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
 	dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
 		data, size, dma->nr_pages);
 
-	err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages,
-			     flags, dma->pages, NULL);
+	err = get_user_pages(data & PAGE_MASK, dma->nr_pages,
+			     flags | FOLL_LONGTERM, dma->pages, NULL);
 
 	if (err != dma->nr_pages) {
 		dma->nr_pages = (err >= 0) ? err : 0;
-		dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err,
+		dprintk(1, "get_user_pages: err=%d [%d]\n", err,
 			dma->nr_pages);
 		return err < 0 ? err : -EINVAL;
 	}
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 73652e21efec..1500bd0bb6da 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -351,7 +351,8 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 
 	down_read(&mm->mmap_sem);
 	if (mm == current->mm) {
-		ret = get_user_pages_longterm(vaddr, 1, flags, page, vmas);
+		ret = get_user_pages(vaddr, 1, flags | FOLL_LONGTERM, page,
+				     vmas);
 	} else {
 		ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
 					    vmas, NULL);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80bb6408fe73..05a105d9d4c3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1536,18 +1536,6 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages, int *locked);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
-#ifdef CONFIG_FS_DAX
-long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
-			    unsigned int gup_flags, struct page **pages,
-			    struct vm_area_struct **vmas);
-#else
-static inline long get_user_pages_longterm(unsigned long start,
-		unsigned long nr_pages, unsigned int gup_flags,
-		struct page **pages, struct vm_area_struct **vmas)
-{
-	return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
-}
-#endif /* CONFIG_FS_DAX */
 
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
@@ -2615,6 +2603,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
 #define FOLL_COW	0x4000	/* internal GUP flag */
 #define FOLL_ANON	0x8000	/* don't do file mappings */
+#define FOLL_LONGTERM	0x10000	/* mapping is intended for a long term pin */
 
 static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
 {
diff --git a/mm/gup.c b/mm/gup.c
index b63e88eca31b..ee96eaff118c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1109,87 +1109,109 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
 }
 EXPORT_SYMBOL(get_user_pages_remote);
 
-/*
- * This is the same as get_user_pages_remote(), just with a
- * less-flexible calling convention where we assume that the task
- * and mm being operated on are the current task's and don't allow
- * passing of a locked parameter.  We also obviously don't pass
- * FOLL_REMOTE in here.
- */
-long get_user_pages(unsigned long start, unsigned long nr_pages,
-		unsigned int gup_flags, struct page **pages,
-		struct vm_area_struct **vmas)
-{
-	return __get_user_pages_locked(current, current->mm, start, nr_pages,
-				       pages, vmas, NULL,
-				       gup_flags | FOLL_TOUCH);
-}
-EXPORT_SYMBOL(get_user_pages);
-
 #ifdef CONFIG_FS_DAX
 /*
- * This is the same as get_user_pages() in that it assumes we are
- * operating on the current task's mm, but it goes further to validate
- * that the vmas associated with the address range are suitable for
- * longterm elevated page reference counts. For example, filesystem-dax
- * mappings are subject to the lifetime enforced by the filesystem and
- * we need guarantees that longterm users like RDMA and V4L2 only
- * establish mappings that have a kernel enforced revocation mechanism.
+ * __gup_longterm_locked() is a wrapper for __get_uer_pages_locked which
+ * allows us to process the FOLL_LONGTERM flag if present.
+ *
+ * __gup_longterm_locked() validates that the vmas associated with the address
+ * range are suitable for longterm elevated page reference counts. For example,
+ * filesystem-dax mappings are subject to the lifetime enforced by the
+ * filesystem and we need guarantees that longterm users like RDMA and V4L2
+ * only establish mappings that have a kernel enforced revocation mechanism.
  *
  * "longterm" == userspace controlled elevated page count lifetime.
  * Contrast this to iov_iter_get_pages() usages which are transient.
  */
-long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
-		unsigned int gup_flags, struct page **pages,
-		struct vm_area_struct **vmas_arg)
+static __always_inline long __gup_longterm_locked(struct task_struct *tsk,
+						  struct mm_struct *mm,
+						  unsigned long start,
+						  unsigned long nr_pages,
+						  struct page **pages,
+						  struct vm_area_struct **vmas,
+						  unsigned int flags)
 {
-	struct vm_area_struct **vmas = vmas_arg;
+	struct vm_area_struct **vmas_tmp = vmas;
 	struct vm_area_struct *vma_prev = NULL;
 	long rc, i;
 
-	if (!pages)
-		return -EINVAL;
-
-	if (!vmas) {
-		vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *),
-			       GFP_KERNEL);
-		if (!vmas)
-			return -ENOMEM;
+	if (flags & FOLL_LONGTERM) {
+		if (!pages)
+			return -EINVAL;
+
+		if (!vmas_tmp) {
+			vmas_tmp = kcalloc(nr_pages,
+					   sizeof(struct vm_area_struct *),
+					   GFP_KERNEL);
+			if (!vmas_tmp)
+				return -ENOMEM;
+		}
 	}
 
-	rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+	rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages,
+				     vmas_tmp, NULL, flags);
 
-	for (i = 0; i < rc; i++) {
-		struct vm_area_struct *vma = vmas[i];
+	if (flags & FOLL_LONGTERM) {
+		for (i = 0; i < rc; i++) {
+			struct vm_area_struct *vma = vmas_tmp[i];
 
-		if (vma == vma_prev)
-			continue;
+			if (vma == vma_prev)
+				continue;
 
-		vma_prev = vma;
+			vma_prev = vma;
 
-		if (vma_is_fsdax(vma))
-			break;
-	}
+			if (vma_is_fsdax(vma))
+				break;
+		}
 
-	/*
-	 * Either get_user_pages() failed, or the vma validation
-	 * succeeded, in either case we don't need to put_page() before
-	 * returning.
-	 */
-	if (i >= rc)
-		goto out;
+		/*
+		 * Either get_user_pages() failed, or the vma validation
+		 * succeeded, in either case we don't need to put_page() before
+		 * returning.
+		 */
+		if (i >= rc)
+			goto out;
 
-	for (i = 0; i < rc; i++)
-		put_page(pages[i]);
-	rc = -EOPNOTSUPP;
+		for (i = 0; i < rc; i++)
+			put_page(pages[i]);
+		rc = -EOPNOTSUPP;
 out:
-	if (vmas != vmas_arg)
-		kfree(vmas);
+		if (vmas_tmp != vmas)
+			kfree(vmas_tmp);
+	}
+
 	return rc;
 }
-EXPORT_SYMBOL(get_user_pages_longterm);
+#else /* !CONFIG_FS_DAX */
+static __always_inline long __gup_longterm_locked(struct task_struct *tsk,
+						  struct mm_struct *mm,
+						  unsigned long start,
+						  unsigned long nr_pages,
+						  struct page **pages,
+						  struct vm_area_struct **vmas,
+						  unsigned int flags)
+{
+	return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
+				       NULL, flags);
+}
 #endif /* CONFIG_FS_DAX */
 
+/*
+ * This is the same as get_user_pages_remote(), just with a
+ * less-flexible calling convention where we assume that the task
+ * and mm being operated on are the current task's and don't allow
+ * passing of a locked parameter.  We also obviously don't pass
+ * FOLL_REMOTE in here.
+ */
+long get_user_pages(unsigned long start, unsigned long nr_pages,
+		unsigned int gup_flags, struct page **pages,
+		struct vm_area_struct **vmas)
+{
+	return __gup_longterm_locked(current, current->mm, start, nr_pages,
+				     pages, vmas, gup_flags | FOLL_TOUCH);
+}
+EXPORT_SYMBOL(get_user_pages);
+
 /**
  * populate_vma_page_range() -  populate a range of pages in the vma.
  * @vma:   target vma
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 5b42d3d4b60a..c898e2e0d1e4 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -54,8 +54,9 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
 						 pages + i);
 			break;
 		case GUP_LONGTERM_BENCHMARK:
-			nr = get_user_pages_longterm(addr, nr, gup->flags & 1,
-						     pages + i, NULL);
+			nr = get_user_pages(addr, nr,
+					    (gup->flags & 1) | FOLL_LONGTERM,
+					    pages + i, NULL);
 			break;
 		case GUP_BENCHMARK:
 			nr = get_user_pages(addr, nr, gup->flags & 1, pages + i,
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH V2 2/7] mm/gup: Change write parameter to flags in fast walk
  2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
  2019-02-13 23:04   ` [PATCH V2 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM ira.weiny
@ 2019-02-13 23:04   ` ira.weiny
  2019-02-13 23:04   ` [PATCH V2 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool' ira.weiny
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-13 23:04 UTC (permalink / raw)
  To: linux-mips, linux-kernel, kvm-ppc, linuxppc-dev, linux-s390,
	linux-sh, sparclinux, kvm, linux-fpga, dri-devel, linux-rdma,
	linux-media, linux-scsi, devel, virtualization, netdev,
	linux-fbdev, xen-devel, devel, linux-mm, ceph-devel, rds-devel
  Cc: Ira Weiny, John Hubbard, David Hildenbrand, Cornelia Huck,
	Yoshinori Sato, Rich Felker, David S. Miller, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Joerg Roedel, Wu Hao, Alan Tull,
	Moritz Fischer, David Airlie, Daniel Vetter, Jason Gunthorpe,
	Dennis Dalessandro, Christian Benvenuti, Mauro Carvalho Chehab,
	Matt Porter, Alexandre Bounine, Kai Mäkisara,
	James E.J. Bottomley, Martin K. Petersen, Rob Springer,
	Todd Poynor, Ben Chan, Jens Wiklander, Alex Williamson,
	Michael S. Tsirkin, Jason Wang, Bartlomiej Zolnierkiewicz,
	Stefano Stabellini, Martin Brandenburg, Peter Zijlstra,
	Alexander Viro, Andrew Morton, Michal Hocko, Kirill A. Shutemov

From: Ira Weiny <ira.weiny@intel.com>

In order to support more options in the GUP fast walk, change
the write parameter to flags throughout the call stack.

This patch does not change functionality and passes FOLL_WRITE
where write was previously used.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 mm/gup.c | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index ee96eaff118c..681388236106 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1417,7 +1417,7 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
 
 #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
-			 int write, struct page **pages, int *nr)
+			 unsigned int flags, struct page **pages, int *nr)
 {
 	struct dev_pagemap *pgmap = NULL;
 	int nr_start = *nr, ret = 0;
@@ -1435,7 +1435,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 		if (pte_protnone(pte))
 			goto pte_unmap;
 
-		if (!pte_access_permitted(pte, write))
+		if (!pte_access_permitted(pte, flags & FOLL_WRITE))
 			goto pte_unmap;
 
 		if (pte_devmap(pte)) {
@@ -1487,7 +1487,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
  * useful to have gup_huge_pmd even if we can't operate on ptes.
  */
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
-			 int write, struct page **pages, int *nr)
+			 unsigned int flags, struct page **pages, int *nr)
 {
 	return 0;
 }
@@ -1570,12 +1570,12 @@ static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
 #endif
 
 static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
+		unsigned long end, unsigned int flags, struct page **pages, int *nr)
 {
 	struct page *head, *page;
 	int refs;
 
-	if (!pmd_access_permitted(orig, write))
+	if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
 	if (pmd_devmap(orig))
@@ -1608,12 +1608,12 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 }
 
 static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
+		unsigned long end, unsigned int flags, struct page **pages, int *nr)
 {
 	struct page *head, *page;
 	int refs;
 
-	if (!pud_access_permitted(orig, write))
+	if (!pud_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
 	if (pud_devmap(orig))
@@ -1646,13 +1646,13 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
 }
 
 static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
-			unsigned long end, int write,
+			unsigned long end, unsigned int flags,
 			struct page **pages, int *nr)
 {
 	int refs;
 	struct page *head, *page;
 
-	if (!pgd_access_permitted(orig, write))
+	if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
 	BUILD_BUG_ON(pgd_devmap(orig));
@@ -1683,7 +1683,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
 }
 
 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
-		int write, struct page **pages, int *nr)
+		unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pmd_t *pmdp;
@@ -1705,7 +1705,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			if (pmd_protnone(pmd))
 				return 0;
 
-			if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
+			if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
 				pages, nr))
 				return 0;
 
@@ -1715,9 +1715,9 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			 * pmd format and THP pmd format
 			 */
 			if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
-					 PMD_SHIFT, next, write, pages, nr))
+					 PMD_SHIFT, next, flags, pages, nr))
 				return 0;
-		} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+		} else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
 			return 0;
 	} while (pmdp++, addr = next, addr != end);
 
@@ -1725,7 +1725,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 }
 
 static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
-			 int write, struct page **pages, int *nr)
+			 unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pud_t *pudp;
@@ -1738,14 +1738,14 @@ static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
 		if (pud_none(pud))
 			return 0;
 		if (unlikely(pud_huge(pud))) {
-			if (!gup_huge_pud(pud, pudp, addr, next, write,
+			if (!gup_huge_pud(pud, pudp, addr, next, flags,
 					  pages, nr))
 				return 0;
 		} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
 			if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
-					 PUD_SHIFT, next, write, pages, nr))
+					 PUD_SHIFT, next, flags, pages, nr))
 				return 0;
-		} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+		} else if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
 			return 0;
 	} while (pudp++, addr = next, addr != end);
 
@@ -1753,7 +1753,7 @@ static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
 }
 
 static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
-			 int write, struct page **pages, int *nr)
+			 unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	p4d_t *p4dp;
@@ -1768,9 +1768,9 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 		BUILD_BUG_ON(p4d_huge(p4d));
 		if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
 			if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
-					 P4D_SHIFT, next, write, pages, nr))
+					 P4D_SHIFT, next, flags, pages, nr))
 				return 0;
-		} else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
+		} else if (!gup_pud_range(p4d, addr, next, flags, pages, nr))
 			return 0;
 	} while (p4dp++, addr = next, addr != end);
 
@@ -1778,7 +1778,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 }
 
 static void gup_pgd_range(unsigned long addr, unsigned long end,
-		int write, struct page **pages, int *nr)
+		unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pgd_t *pgdp;
@@ -1791,14 +1791,14 @@ static void gup_pgd_range(unsigned long addr, unsigned long end,
 		if (pgd_none(pgd))
 			return;
 		if (unlikely(pgd_huge(pgd))) {
-			if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+			if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
 					  pages, nr))
 				return;
 		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
 			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
-					 PGDIR_SHIFT, next, write, pages, nr))
+					 PGDIR_SHIFT, next, flags, pages, nr))
 				return;
-		} else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
+		} else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr))
 			return;
 	} while (pgdp++, addr = next, addr != end);
 }
@@ -1852,7 +1852,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 
 	if (gup_fast_permitted(start, nr_pages)) {
 		local_irq_save(flags);
-		gup_pgd_range(start, end, write, pages, &nr);
+		gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr);
 		local_irq_restore(flags);
 	}
 
@@ -1894,7 +1894,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 
 	if (gup_fast_permitted(start, nr_pages)) {
 		local_irq_disable();
-		gup_pgd_range(addr, end, write, pages, &nr);
+		gup_pgd_range(addr, end, write ? FOLL_WRITE : 0, pages, &nr);
 		local_irq_enable();
 		ret = nr;
 	}
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH V2 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool'
  2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
  2019-02-13 23:04   ` [PATCH V2 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM ira.weiny
  2019-02-13 23:04   ` [PATCH V2 2/7] mm/gup: Change write parameter to flags in fast walk ira.weiny
@ 2019-02-13 23:04   ` ira.weiny
  2019-02-13 23:04   ` [PATCH V2 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast ira.weiny
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-13 23:04 UTC (permalink / raw)
  To: linux-mips, linux-kernel, kvm-ppc, linuxppc-dev, linux-s390,
	linux-sh, sparclinux, kvm, linux-fpga, dri-devel, linux-rdma,
	linux-media, linux-scsi, devel, virtualization, netdev,
	linux-fbdev, xen-devel, devel, linux-mm, ceph-devel, rds-devel
  Cc: Ira Weiny, John Hubbard, David Hildenbrand, Cornelia Huck,
	Yoshinori Sato, Rich Felker, David S. Miller, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Joerg Roedel, Wu Hao, Alan Tull,
	Moritz Fischer, David Airlie, Daniel Vetter, Jason Gunthorpe,
	Dennis Dalessandro, Christian Benvenuti, Mauro Carvalho Chehab,
	Matt Porter, Alexandre Bounine, Kai Mäkisara,
	James E.J. Bottomley, Martin K. Petersen, Rob Springer,
	Todd Poynor, Ben Chan, Jens Wiklander, Alex Williamson,
	Michael S. Tsirkin, Jason Wang, Bartlomiej Zolnierkiewicz,
	Stefano Stabellini, Martin Brandenburg, Peter Zijlstra,
	Alexander Viro, Andrew Morton, Michal Hocko, Kirill A. Shutemov

From: Ira Weiny <ira.weiny@intel.com>

To facilitate additional options to get_user_pages_fast() change the
singular write parameter to be gup_flags.

This patch does not change any functionality.  New functionality will
follow in subsequent patches.

Some of the get_user_pages_fast() call sites were unchanged because they
already passed FOLL_WRITE or 0 for the write parameter.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 arch/mips/mm/gup.c                         | 11 ++++++-----
 arch/powerpc/kvm/book3s_64_mmu_hv.c        |  4 ++--
 arch/powerpc/kvm/e500_mmu.c                |  2 +-
 arch/powerpc/mm/mmu_context_iommu.c        |  4 ++--
 arch/s390/kvm/interrupt.c                  |  2 +-
 arch/s390/mm/gup.c                         | 12 ++++++------
 arch/sh/mm/gup.c                           | 11 ++++++-----
 arch/sparc/mm/gup.c                        |  9 +++++----
 arch/x86/kvm/paging_tmpl.h                 |  2 +-
 arch/x86/kvm/svm.c                         |  2 +-
 drivers/fpga/dfl-afu-dma-region.c          |  2 +-
 drivers/gpu/drm/via/via_dmablit.c          |  3 ++-
 drivers/infiniband/hw/hfi1/user_pages.c    |  3 ++-
 drivers/misc/genwqe/card_utils.c           |  2 +-
 drivers/misc/vmw_vmci/vmci_host.c          |  2 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c    |  6 ++++--
 drivers/platform/goldfish/goldfish_pipe.c  |  3 ++-
 drivers/rapidio/devices/rio_mport_cdev.c   |  4 +++-
 drivers/sbus/char/oradax.c                 |  2 +-
 drivers/scsi/st.c                          |  3 ++-
 drivers/staging/gasket/gasket_page_table.c |  4 ++--
 drivers/tee/tee_shm.c                      |  2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c        |  3 ++-
 drivers/vhost/vhost.c                      |  2 +-
 drivers/video/fbdev/pvr2fb.c               |  2 +-
 drivers/virt/fsl_hypervisor.c              |  2 +-
 drivers/xen/gntdev.c                       |  2 +-
 fs/orangefs/orangefs-bufmap.c              |  2 +-
 include/linux/mm.h                         |  4 ++--
 kernel/futex.c                             |  2 +-
 lib/iov_iter.c                             |  7 +++++--
 mm/gup.c                                   | 10 +++++-----
 mm/util.c                                  |  8 ++++----
 net/ceph/pagevec.c                         |  2 +-
 net/rds/info.c                             |  2 +-
 net/rds/rdma.c                             |  3 ++-
 36 files changed, 81 insertions(+), 65 deletions(-)

diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 0d14e0d8eacf..4c2b4483683c 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -235,7 +235,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  * get_user_pages_fast() - pin user pages in memory
  * @start:	starting user address
  * @nr_pages:	number of pages from start to pin
- * @write:	whether pages will be written to
+ * @gup_flags:	flags modifying pin behaviour
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long.
  *
@@ -247,8 +247,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
  * were pinned, returns -errno.
  */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages)
+int get_user_pages_fast(unsigned long start, int nr_pages,
+			unsigned int gup_flags, struct page **pages)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long addr, len, end;
@@ -273,7 +273,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none(pgd))
 			goto slow;
-		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+		if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
+				   pages, &nr))
 			goto slow;
 	} while (pgdp++, addr = next, addr != end);
 	local_irq_enable();
@@ -289,7 +290,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	pages += nr;
 
 	ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
-				      pages, write ? FOLL_WRITE : 0);
+				      pages, gup_flags);
 
 	/* Have to be a bit careful with return values */
 	if (nr > 0) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index bd2dcfbf00cd..8fcb0a921e46 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -582,7 +582,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* If writing != 0, then the HPTE must allow writing, if we get here */
 	write_ok = writing;
 	hva = gfn_to_hva_memslot(memslot, gfn);
-	npages = get_user_pages_fast(hva, 1, writing, pages);
+	npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
 	if (npages < 1) {
 		/* Check if it's an I/O mapping */
 		down_read(&current->mm->mmap_sem);
@@ -1175,7 +1175,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		goto err;
 	hva = gfn_to_hva_memslot(memslot, gfn);
-	npages = get_user_pages_fast(hva, 1, 1, pages);
+	npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages);
 	if (npages < 1)
 		goto err;
 	page = pages[0];
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 24296f4cadc6..e0af53fd78c5 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -783,7 +783,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 	if (!pages)
 		return -ENOMEM;
 
-	ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
+	ret = get_user_pages_fast(cfg->array, num_pages, FOLL_WRITE, pages);
 	if (ret < 0)
 		goto free_pages;
 
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index a712a650a8b6..acb0990c8364 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -190,7 +190,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 	for (i = 0; i < entries; ++i) {
 		cur_ua = ua + (i << PAGE_SHIFT);
 		if (1 != get_user_pages_fast(cur_ua,
-					1/* pages */, 1/* iswrite */, &page)) {
+					1/* pages */, FOLL_WRITE, &page)) {
 			ret = -EFAULT;
 			for (j = 0; j < i; ++j)
 				put_page(pfn_to_page(mem->hpas[j] >>
@@ -209,7 +209,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 			if (mm_iommu_move_page_from_cma(page))
 				goto populate;
 			if (1 != get_user_pages_fast(cur_ua,
-						1/* pages */, 1/* iswrite */,
+						1/* pages */, FOLL_WRITE,
 						&page)) {
 				ret = -EFAULT;
 				for (j = 0; j < i; ++j)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fcb55b02990e..69d9366b966c 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2278,7 +2278,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
 		ret = -EFAULT;
 		goto out;
 	}
-	ret = get_user_pages_fast(map->addr, 1, 1, &map->page);
+	ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page);
 	if (ret < 0)
 		goto out;
 	BUG_ON(ret != 1);
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 2809d11c7a28..0a6faf3d9960 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -265,7 +265,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  * get_user_pages_fast() - pin user pages in memory
  * @start:	starting user address
  * @nr_pages:	number of pages from start to pin
- * @write:	whether pages will be written to
+ * @gup_flags:	flags modifying pin behaviour
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long.
  *
@@ -277,22 +277,22 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
  * were pinned, returns -errno.
  */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages)
+int get_user_pages_fast(unsigned long start, int nr_pages,
+			unsigned int gup_flags, struct page **pages)
 {
 	int nr, ret;
 
 	might_sleep();
 	start &= PAGE_MASK;
-	nr = __get_user_pages_fast(start, nr_pages, write, pages);
+	nr = __get_user_pages_fast(start, nr_pages, gup_flags & FOLL_WRITE,
+				   pages);
 	if (nr == nr_pages)
 		return nr;
 
 	/* Try to get the remaining pages with get_user_pages */
 	start += nr << PAGE_SHIFT;
 	pages += nr;
-	ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
-				      write ? FOLL_WRITE : 0);
+	ret = get_user_pages_unlocked(start, nr_pages - nr, pages, gup_flags);
 	/* Have to be a bit careful with return values */
 	if (nr > 0)
 		ret = (ret < 0) ? nr : ret + nr;
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
index 3e27f6d1f1ec..277c882f7489 100644
--- a/arch/sh/mm/gup.c
+++ b/arch/sh/mm/gup.c
@@ -204,7 +204,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  * get_user_pages_fast() - pin user pages in memory
  * @start:	starting user address
  * @nr_pages:	number of pages from start to pin
- * @write:	whether pages will be written to
+ * @gup_flags:	flags modifying pin behaviour
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long.
  *
@@ -216,8 +216,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
  * were pinned, returns -errno.
  */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages)
+int get_user_pages_fast(unsigned long start, int nr_pages,
+			unsigned int gup_flags, struct page **pages)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long addr, len, end;
@@ -241,7 +241,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none(pgd))
 			goto slow;
-		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+		if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
+				   pages, &nr))
 			goto slow;
 	} while (pgdp++, addr = next, addr != end);
 	local_irq_enable();
@@ -261,7 +262,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 
 		ret = get_user_pages_unlocked(start,
 			(end - start) >> PAGE_SHIFT, pages,
-			write ? FOLL_WRITE : 0);
+			gup_flags);
 
 		/* Have to be a bit careful with return values */
 		if (nr > 0) {
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index aee6dba83d0e..1e770a517d4a 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -245,8 +245,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	return nr;
 }
 
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages)
+int get_user_pages_fast(unsigned long start, int nr_pages,
+			unsigned int gup_flags, struct page **pages)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long addr, len, end;
@@ -303,7 +303,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none(pgd))
 			goto slow;
-		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+		if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
+				   pages, &nr))
 			goto slow;
 	} while (pgdp++, addr = next, addr != end);
 
@@ -324,7 +325,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 
 		ret = get_user_pages_unlocked(start,
 			(end - start) >> PAGE_SHIFT, pages,
-			write ? FOLL_WRITE : 0);
+			gup_flags);
 
 		/* Have to be a bit careful with return values */
 		if (nr > 0) {
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6bdca39829bc..08715034e315 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -140,7 +140,7 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	pt_element_t *table;
 	struct page *page;
 
-	npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
+	npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
 	/* Check if the user is doing something meaningless. */
 	if (unlikely(npages != 1))
 		return -EFAULT;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f13a3a24d360..173596a020cb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1803,7 +1803,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
 		return NULL;
 
 	/* Pin the user virtual address. */
-	npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
+	npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
 	if (npinned != npages) {
 		pr_err("SEV: Failure locking %lu pages.\n", npages);
 		goto err;
diff --git a/drivers/fpga/dfl-afu-dma-region.c b/drivers/fpga/dfl-afu-dma-region.c
index e18a786fc943..c438722bf4e1 100644
--- a/drivers/fpga/dfl-afu-dma-region.c
+++ b/drivers/fpga/dfl-afu-dma-region.c
@@ -102,7 +102,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata,
 		goto unlock_vm;
 	}
 
-	pinned = get_user_pages_fast(region->user_addr, npages, 1,
+	pinned = get_user_pages_fast(region->user_addr, npages, FOLL_WRITE,
 				     region->pages);
 	if (pinned < 0) {
 		ret = pinned;
diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index 345bda4494e1..0c8b09602910 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c
@@ -239,7 +239,8 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg,  drm_via_dmablit_t *xfer)
 	if (NULL == vsg->pages)
 		return -ENOMEM;
 	ret = get_user_pages_fast((unsigned long)xfer->mem_addr,
-			vsg->num_pages, vsg->direction == DMA_FROM_DEVICE,
+			vsg->num_pages,
+			vsg->direction == DMA_FROM_DEVICE ? FOLL_WRITE : 0,
 			vsg->pages);
 	if (ret != vsg->num_pages) {
 		if (ret < 0)
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 24b592c6522e..78ccacaf97d0 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -105,7 +105,8 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
 {
 	int ret;
 
-	ret = get_user_pages_fast(vaddr, npages, writable, pages);
+	ret = get_user_pages_fast(vaddr, npages, writable ? FOLL_WRITE : 0,
+				  pages);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index 25265fd0fd6e..89cff9d1012b 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -603,7 +603,7 @@ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr,
 	/* pin user pages in memory */
 	rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */
 				 m->nr_pages,
-				 m->write,		/* readable/writable */
+				 m->write ? FOLL_WRITE : 0,	/* readable/writable */
 				 m->page_list);	/* ptrs to pages */
 	if (rc < 0)
 		goto fail_get_user_pages;
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index 997f92543dd4..422d08da3244 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -242,7 +242,7 @@ static int vmci_host_setup_notify(struct vmci_ctx *context,
 	/*
 	 * Lock physical page backing a given user VA.
 	 */
-	retval = get_user_pages_fast(uva, 1, 1, &context->notify_page);
+	retval = get_user_pages_fast(uva, 1, FOLL_WRITE, &context->notify_page);
 	if (retval != 1) {
 		context->notify_page = NULL;
 		return VMCI_ERROR_GENERIC;
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 264f4ed8eef2..c5396ee32e51 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -666,7 +666,8 @@ static int qp_host_get_user_memory(u64 produce_uva,
 	int err = VMCI_SUCCESS;
 
 	retval = get_user_pages_fast((uintptr_t) produce_uva,
-				     produce_q->kernel_if->num_pages, 1,
+				     produce_q->kernel_if->num_pages,
+				     FOLL_WRITE,
 				     produce_q->kernel_if->u.h.header_page);
 	if (retval < (int)produce_q->kernel_if->num_pages) {
 		pr_debug("get_user_pages_fast(produce) failed (retval=%d)",
@@ -678,7 +679,8 @@ static int qp_host_get_user_memory(u64 produce_uva,
 	}
 
 	retval = get_user_pages_fast((uintptr_t) consume_uva,
-				     consume_q->kernel_if->num_pages, 1,
+				     consume_q->kernel_if->num_pages,
+				     FOLL_WRITE,
 				     consume_q->kernel_if->u.h.header_page);
 	if (retval < (int)consume_q->kernel_if->num_pages) {
 		pr_debug("get_user_pages_fast(consume) failed (retval=%d)",
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 321bc673c417..cef0133aa47a 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -274,7 +274,8 @@ static int pin_user_pages(unsigned long first_page,
 		*iter_last_page_size = last_page_size;
 	}
 
-	ret = get_user_pages_fast(first_page, requested_pages, !is_write,
+	ret = get_user_pages_fast(first_page, requested_pages,
+				  !is_write ? FOLL_WRITE : 0,
 				  pages);
 	if (ret <= 0)
 		return -EFAULT;
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
index cbe467ff1aba..f681b3e9e970 100644
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -868,7 +868,9 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
 
 		pinned = get_user_pages_fast(
 				(unsigned long)xfer->loc_addr & PAGE_MASK,
-				nr_pages, dir == DMA_FROM_DEVICE, page_list);
+				nr_pages,
+				dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0,
+				page_list);
 
 		if (pinned != nr_pages) {
 			if (pinned < 0) {
diff --git a/drivers/sbus/char/oradax.c b/drivers/sbus/char/oradax.c
index 6516bc3cb58b..790aa148670d 100644
--- a/drivers/sbus/char/oradax.c
+++ b/drivers/sbus/char/oradax.c
@@ -437,7 +437,7 @@ static int dax_lock_page(void *va, struct page **p)
 
 	dax_dbg("uva %p", va);
 
-	ret = get_user_pages_fast((unsigned long)va, 1, 1, p);
+	ret = get_user_pages_fast((unsigned long)va, 1, FOLL_WRITE, p);
 	if (ret == 1) {
 		dax_dbg("locked page %p, for VA %p", *p, va);
 		return 0;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 7ff22d3f03e3..871b25914c07 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4918,7 +4918,8 @@ static int sgl_map_user_pages(struct st_buffer *STbp,
 
         /* Try to fault in all of the necessary pages */
         /* rw==READ means read from drive, write into memory area */
-	res = get_user_pages_fast(uaddr, nr_pages, rw == READ, pages);
+	res = get_user_pages_fast(uaddr, nr_pages, rw == READ ? FOLL_WRITE : 0,
+				  pages);
 
 	/* Errors and no page mapped should return here */
 	if (res < nr_pages)
diff --git a/drivers/staging/gasket/gasket_page_table.c b/drivers/staging/gasket/gasket_page_table.c
index 26755d9ca41d..f67fdf1d3817 100644
--- a/drivers/staging/gasket/gasket_page_table.c
+++ b/drivers/staging/gasket/gasket_page_table.c
@@ -486,8 +486,8 @@ static int gasket_perform_mapping(struct gasket_page_table *pg_tbl,
 			ptes[i].dma_addr = pg_tbl->coherent_pages[0].paddr +
 					   off + i * PAGE_SIZE;
 		} else {
-			ret = get_user_pages_fast(page_addr - offset, 1, 1,
-						  &page);
+			ret = get_user_pages_fast(page_addr - offset, 1,
+						  FOLL_WRITE, &page);
 
 			if (ret <= 0) {
 				dev_err(pg_tbl->device,
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index 0b9ab1d0dd45..49fd7312e2aa 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -273,7 +273,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
 		goto err;
 	}
 
-	rc = get_user_pages_fast(start, num_pages, 1, shm->pages);
+	rc = get_user_pages_fast(start, num_pages, FOLL_WRITE, shm->pages);
 	if (rc > 0)
 		shm->num_pages = rc;
 	if (rc != num_pages) {
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index c424913324e3..a4b10bb4086b 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -532,7 +532,8 @@ static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
 	enum dma_data_direction direction = iommu_tce_direction(tce);
 
 	if (get_user_pages_fast(tce & PAGE_MASK, 1,
-			direction != DMA_TO_DEVICE, &page) != 1)
+			direction != DMA_TO_DEVICE ? FOLL_WRITE : 0,
+			&page) != 1)
 		return -EFAULT;
 
 	*hpa = __pa((unsigned long) page_address(page));
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 24a129fcdd61..72685b1659ff 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1700,7 +1700,7 @@ static int set_bit_to_user(int nr, void __user *addr)
 	int bit = nr + (log % PAGE_SIZE) * 8;
 	int r;
 
-	r = get_user_pages_fast(log, 1, 1, &page);
+	r = get_user_pages_fast(log, 1, FOLL_WRITE, &page);
 	if (r < 0)
 		return r;
 	BUG_ON(r != 1);
diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c
index 8a53d1de611d..41390c8e0f67 100644
--- a/drivers/video/fbdev/pvr2fb.c
+++ b/drivers/video/fbdev/pvr2fb.c
@@ -686,7 +686,7 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf,
 	if (!pages)
 		return -ENOMEM;
 
-	ret = get_user_pages_fast((unsigned long)buf, nr_pages, true, pages);
+	ret = get_user_pages_fast((unsigned long)buf, nr_pages, FOLL_WRITE, pages);
 	if (ret < nr_pages) {
 		nr_pages = ret;
 		ret = -EINVAL;
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 8ba726e600e9..6446bcab4185 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -244,7 +244,7 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p)
 
 	/* Get the physical addresses of the source buffer */
 	num_pinned = get_user_pages_fast(param.local_vaddr - lb_offset,
-		num_pages, param.source != -1, pages);
+		num_pages, param.source != -1 ? FOLL_WRITE : 0, pages);
 
 	if (num_pinned != num_pages) {
 		/* get_user_pages() failed */
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 5efc5eee9544..7b47f1e6aab4 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -852,7 +852,7 @@ static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
 	unsigned long xen_pfn;
 	int ret;
 
-	ret = get_user_pages_fast(addr, 1, writeable, &page);
+	ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page);
 	if (ret < 0)
 		return ret;
 
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 443bcd8c3c19..5a7c4fda682f 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -269,7 +269,7 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
 
 	/* map the pages */
 	ret = get_user_pages_fast((unsigned long)user_desc->ptr,
-			     bufmap->page_count, 1, bufmap->page_array);
+			     bufmap->page_count, FOLL_WRITE, bufmap->page_array);
 
 	if (ret < 0)
 		return ret;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 05a105d9d4c3..8e1f3cd7482a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1537,8 +1537,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
 
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages);
+int get_user_pages_fast(unsigned long start, int nr_pages,
+			unsigned int gup_flags, struct page **pages);
 
 /* Container for pinned pfns / pages */
 struct frame_vector {
diff --git a/kernel/futex.c b/kernel/futex.c
index fdd312da0992..e10209946f8b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -546,7 +546,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 	if (unlikely(should_fail_futex(fshared)))
 		return -EFAULT;
 
-	err = get_user_pages_fast(address, 1, 1, &page);
+	err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
 	/*
 	 * If write access is not required (eg. FUTEX_WAIT), try
 	 * and get read-only access.
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index be4bd627caf0..6dbae0692719 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1280,7 +1280,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 			len = maxpages * PAGE_SIZE;
 		addr &= ~(PAGE_SIZE - 1);
 		n = DIV_ROUND_UP(len, PAGE_SIZE);
-		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages);
+		res = get_user_pages_fast(addr, n,
+				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
+				pages);
 		if (unlikely(res < 0))
 			return res;
 		return (res == n ? len : res * PAGE_SIZE) - *start;
@@ -1361,7 +1363,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 		p = get_pages_array(n);
 		if (!p)
 			return -ENOMEM;
-		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p);
+		res = get_user_pages_fast(addr, n,
+				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
 		if (unlikely(res < 0)) {
 			kvfree(p);
 			return res;
diff --git a/mm/gup.c b/mm/gup.c
index 681388236106..6f32d36b3c5b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1863,7 +1863,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  * get_user_pages_fast() - pin user pages in memory
  * @start:	starting user address
  * @nr_pages:	number of pages from start to pin
- * @write:	whether pages will be written to
+ * @gup_flags:	flags modifying pin behaviour
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long.
  *
@@ -1875,8 +1875,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
  * were pinned, returns -errno.
  */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages)
+int get_user_pages_fast(unsigned long start, int nr_pages,
+			unsigned int gup_flags, struct page **pages)
 {
 	unsigned long addr, len, end;
 	int nr = 0, ret = 0;
@@ -1894,7 +1894,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 
 	if (gup_fast_permitted(start, nr_pages)) {
 		local_irq_disable();
-		gup_pgd_range(addr, end, write ? FOLL_WRITE : 0, pages, &nr);
+		gup_pgd_range(addr, end, gup_flags, pages, &nr);
 		local_irq_enable();
 		ret = nr;
 	}
@@ -1905,7 +1905,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 		pages += nr;
 
 		ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
-				write ? FOLL_WRITE : 0);
+					      gup_flags);
 
 		/* Have to be a bit careful with return values */
 		if (nr > 0) {
diff --git a/mm/util.c b/mm/util.c
index 1ea055138043..01ffe145c62b 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -306,7 +306,7 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast);
  * get_user_pages_fast() - pin user pages in memory
  * @start:	starting user address
  * @nr_pages:	number of pages from start to pin
- * @write:	whether pages will be written to
+ * @gup_flags:	flags modifying pin behaviour
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long.
  *
@@ -327,10 +327,10 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast);
  * get_user_pages_fast simply falls back to get_user_pages.
  */
 int __weak get_user_pages_fast(unsigned long start,
-				int nr_pages, int write, struct page **pages)
+				int nr_pages, unsigned int gup_flags,
+				struct page **pages)
 {
-	return get_user_pages_unlocked(start, nr_pages, pages,
-				       write ? FOLL_WRITE : 0);
+	return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index d3736f5bffec..74cafc0142ea 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -27,7 +27,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
 	while (got < num_pages) {
 		rc = get_user_pages_fast(
 		    (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
-		    num_pages - got, write_page, pages + got);
+		    num_pages - got, write_page ? FOLL_WRITE : 0, pages + got);
 		if (rc < 0)
 			break;
 		BUG_ON(rc == 0);
diff --git a/net/rds/info.c b/net/rds/info.c
index e367a97a18c8..03f6fd56d237 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -193,7 +193,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
 		ret = -ENOMEM;
 		goto out;
 	}
-	ret = get_user_pages_fast(start, nr_pages, 1, pages);
+	ret = get_user_pages_fast(start, nr_pages, FOLL_WRITE, pages);
 	if (ret != nr_pages) {
 		if (ret > 0)
 			nr_pages = ret;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 182ab8430594..b340ed4fc43a 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -158,7 +158,8 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
 {
 	int ret;
 
-	ret = get_user_pages_fast(user_addr, nr_pages, write, pages);
+	ret = get_user_pages_fast(user_addr, nr_pages, write ? FOLL_WRITE : 0,
+				  pages);
 
 	if (ret >= 0 && ret < nr_pages) {
 		while (ret--)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH V2 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast
  2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
                     ` (2 preceding siblings ...)
  2019-02-13 23:04   ` [PATCH V2 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool' ira.weiny
@ 2019-02-13 23:04   ` ira.weiny
  2019-02-13 23:04   ` [PATCH V2 5/7] IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast() ira.weiny
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-13 23:04 UTC (permalink / raw)
  To: linux-mips, linux-kernel, kvm-ppc, linuxppc-dev, linux-s390,
	linux-sh, sparclinux, kvm, linux-fpga, dri-devel, linux-rdma,
	linux-media, linux-scsi, devel, virtualization, netdev,
	linux-fbdev, xen-devel, devel, linux-mm, ceph-devel, rds-devel
  Cc: Ira Weiny, John Hubbard, David Hildenbrand, Cornelia Huck,
	Yoshinori Sato, Rich Felker, David S. Miller, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Joerg Roedel, Wu Hao, Alan Tull,
	Moritz Fischer, David Airlie, Daniel Vetter, Jason Gunthorpe,
	Dennis Dalessandro, Christian Benvenuti, Mauro Carvalho Chehab,
	Matt Porter, Alexandre Bounine, Kai Mäkisara,
	James E.J. Bottomley, Martin K. Petersen, Rob Springer,
	Todd Poynor, Ben Chan, Jens Wiklander, Alex Williamson,
	Michael S. Tsirkin, Jason Wang, Bartlomiej Zolnierkiewicz,
	Stefano Stabellini, Martin Brandenburg, Peter Zijlstra,
	Alexander Viro, Andrew Morton, Michal Hocko, Kirill A. Shutemov

From: Ira Weiny <ira.weiny@intel.com>

DAX pages were previously unprotected from longterm pins when users
called get_user_pages_fast().

Use the new FOLL_LONGTERM flag to check for DEVMAP pages and fall
back to regular GUP processing if a DEVMAP page is encountered.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 mm/gup.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 6f32d36b3c5b..f7e759c523bb 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1439,6 +1439,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 			goto pte_unmap;
 
 		if (pte_devmap(pte)) {
+			if (unlikely(flags & FOLL_LONGTERM))
+				goto pte_unmap;
+
 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
 			if (unlikely(!pgmap)) {
 				undo_dev_pagemap(nr, nr_start, pages);
@@ -1578,8 +1581,11 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 	if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
-	if (pmd_devmap(orig))
+	if (pmd_devmap(orig)) {
+		if (unlikely(flags & FOLL_LONGTERM))
+			return 0;
 		return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
+	}
 
 	refs = 0;
 	page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
@@ -1904,8 +1910,20 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
 		start += nr << PAGE_SHIFT;
 		pages += nr;
 
-		ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
-					      gup_flags);
+		if (gup_flags & FOLL_LONGTERM) {
+			down_read(&current->mm->mmap_sem);
+			ret = __gup_longterm_locked(current, current->mm,
+						    start, nr_pages - nr,
+						    pages, NULL, gup_flags);
+			up_read(&current->mm->mmap_sem);
+		} else {
+			/*
+			 * retain FAULT_FOLL_ALLOW_RETRY optimization if
+			 * possible
+			 */
+			ret = get_user_pages_unlocked(start, nr_pages - nr,
+						      pages, gup_flags);
+		}
 
 		/* Have to be a bit careful with return values */
 		if (nr > 0) {
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH V2 5/7] IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
  2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
                     ` (3 preceding siblings ...)
  2019-02-13 23:04   ` [PATCH V2 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast ira.weiny
@ 2019-02-13 23:04   ` ira.weiny
  2019-02-13 23:04   ` [PATCH V2 6/7] IB/qib: " ira.weiny
  2019-02-13 23:04   ` [PATCH V2 7/7] IB/mthca: " ira.weiny
  6 siblings, 0 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-13 23:04 UTC (permalink / raw)
  To: linux-mips, linux-kernel, kvm-ppc, linuxppc-dev, linux-s390,
	linux-sh, sparclinux, kvm, linux-fpga, dri-devel, linux-rdma,
	linux-media, linux-scsi, devel, virtualization, netdev,
	linux-fbdev, xen-devel, devel, linux-mm, ceph-devel, rds-devel
  Cc: Ira Weiny, John Hubbard, David Hildenbrand, Cornelia Huck,
	Yoshinori Sato, Rich Felker, David S. Miller, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Joerg Roedel, Wu Hao, Alan Tull,
	Moritz Fischer, David Airlie, Daniel Vetter, Jason Gunthorpe,
	Dennis Dalessandro, Christian Benvenuti, Mauro Carvalho Chehab,
	Matt Porter, Alexandre Bounine, Kai Mäkisara,
	James E.J. Bottomley, Martin K. Petersen, Rob Springer,
	Todd Poynor, Ben Chan, Jens Wiklander, Alex Williamson,
	Michael S. Tsirkin, Jason Wang, Bartlomiej Zolnierkiewicz,
	Stefano Stabellini, Martin Brandenburg, Peter Zijlstra,
	Alexander Viro, Andrew Morton, Michal Hocko, Kirill A. Shutemov

From: Ira Weiny <ira.weiny@intel.com>

Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/infiniband/hw/hfi1/user_pages.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 78ccacaf97d0..6a7f9cd5a94e 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -104,9 +104,11 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
 			    bool writable, struct page **pages)
 {
 	int ret;
+	unsigned int gup_flags = writable ? FOLL_WRITE : 0;
 
-	ret = get_user_pages_fast(vaddr, npages, writable ? FOLL_WRITE : 0,
-				  pages);
+	gup_flags |= FOLL_LONGTERM;
+
+	ret = get_user_pages_fast(vaddr, npages, gup_flags, pages);
 	if (ret < 0)
 		return ret;
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH V2 6/7] IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
  2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
                     ` (4 preceding siblings ...)
  2019-02-13 23:04   ` [PATCH V2 5/7] IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast() ira.weiny
@ 2019-02-13 23:04   ` ira.weiny
  2019-02-13 23:04   ` [PATCH V2 7/7] IB/mthca: " ira.weiny
  6 siblings, 0 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-13 23:04 UTC (permalink / raw)
  To: linux-mips, linux-kernel, kvm-ppc, linuxppc-dev, linux-s390,
	linux-sh, sparclinux, kvm, linux-fpga, dri-devel, linux-rdma,
	linux-media, linux-scsi, devel, virtualization, netdev,
	linux-fbdev, xen-devel, devel, linux-mm, ceph-devel, rds-devel
  Cc: Ira Weiny, John Hubbard, David Hildenbrand, Cornelia Huck,
	Yoshinori Sato, Rich Felker, David S. Miller, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Joerg Roedel, Wu Hao, Alan Tull,
	Moritz Fischer, David Airlie, Daniel Vetter, Jason Gunthorpe,
	Dennis Dalessandro, Christian Benvenuti, Mauro Carvalho Chehab,
	Matt Porter, Alexandre Bounine, Kai Mäkisara,
	James E.J. Bottomley, Martin K. Petersen, Rob Springer,
	Todd Poynor, Ben Chan, Jens Wiklander, Alex Williamson,
	Michael S. Tsirkin, Jason Wang, Bartlomiej Zolnierkiewicz,
	Stefano Stabellini, Martin Brandenburg, Peter Zijlstra,
	Alexander Viro, Andrew Morton, Michal Hocko, Kirill A. Shutemov

From: Ira Weiny <ira.weiny@intel.com>

Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 31c523b2a9f5..b53cc0240e02 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -673,7 +673,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
 		else
 			j = npages;
 
-		ret = get_user_pages_fast(addr, j, 0, pages);
+		ret = get_user_pages_fast(addr, j, FOLL_LONGTERM, pages);
 		if (ret != j) {
 			i = 0;
 			j = ret;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH V2 7/7] IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
  2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
                     ` (5 preceding siblings ...)
  2019-02-13 23:04   ` [PATCH V2 6/7] IB/qib: " ira.weiny
@ 2019-02-13 23:04   ` ira.weiny
  6 siblings, 0 replies; 34+ messages in thread
From: ira.weiny @ 2019-02-13 23:04 UTC (permalink / raw)
  To: linux-mips, linux-kernel, kvm-ppc, linuxppc-dev, linux-s390,
	linux-sh, sparclinux, kvm, linux-fpga, dri-devel, linux-rdma,
	linux-media, linux-scsi, devel, virtualization, netdev,
	linux-fbdev, xen-devel, devel, linux-mm, ceph-devel, rds-devel
  Cc: Ira Weiny, John Hubbard, David Hildenbrand, Cornelia Huck,
	Yoshinori Sato, Rich Felker, David S. Miller, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Joerg Roedel, Wu Hao, Alan Tull,
	Moritz Fischer, David Airlie, Daniel Vetter, Jason Gunthorpe,
	Dennis Dalessandro, Christian Benvenuti, Mauro Carvalho Chehab,
	Matt Porter, Alexandre Bounine, Kai Mäkisara,
	James E.J. Bottomley, Martin K. Petersen, Rob Springer,
	Todd Poynor, Ben Chan, Jens Wiklander, Alex Williamson,
	Michael S. Tsirkin, Jason Wang, Bartlomiej Zolnierkiewicz,
	Stefano Stabellini, Martin Brandenburg, Peter Zijlstra,
	Alexander Viro, Andrew Morton, Michal Hocko, Kirill A. Shutemov

From: Ira Weiny <ira.weiny@intel.com>

Use the new FOLL_LONGTERM to get_user_pages_fast() to protect against
FS DAX pages being mapped.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/infiniband/hw/mthca/mthca_memfree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 112d2f38e0de..8ff0e90d7564 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -472,7 +472,8 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
 		goto out;
 	}
 
-	ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);
+	ret = get_user_pages_fast(uaddr & PAGE_MASK, 1,
+				  FOLL_WRITE | FOLL_LONGTERM, pages);
 	if (ret < 0)
 		goto out;
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2019-02-13 23:06 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-11 20:16 [PATCH 0/3] Add gup fast + longterm and use it in HFI1 ira.weiny
2019-02-11 20:16 ` [PATCH 1/3] mm/gup: Change "write" parameter to flags ira.weiny
2019-02-11 20:16 ` [PATCH 2/3] mm/gup: Introduce get_user_pages_fast_longterm() ira.weiny
2019-02-11 20:39   ` Jason Gunthorpe
2019-02-11 21:13     ` John Hubbard
2019-02-11 21:26       ` Ira Weiny
2019-02-11 21:39         ` John Hubbard
2019-02-11 21:45           ` Dan Williams
2019-02-11 21:52           ` Ira Weiny
2019-02-11 22:01             ` John Hubbard
2019-02-11 22:06             ` Jason Gunthorpe
2019-02-11 22:55               ` Dan Williams
2019-02-11 23:04                 ` Weiny, Ira
2019-02-11 23:25                 ` Jason Gunthorpe
2019-02-12  0:08                   ` Ira Weiny
2019-02-11 20:16 ` [PATCH 3/3] IB/HFI1: Use new get_user_pages_fast_longterm() ira.weiny
2019-02-11 20:34 ` [PATCH 0/3] Add gup fast + longterm and use it in HFI1 Davidlohr Bueso
2019-02-11 20:47   ` Jason Gunthorpe
2019-02-11 21:42     ` Ira Weiny
2019-02-11 22:22       ` Jason Gunthorpe
2019-02-11 22:40         ` Weiny, Ira
2019-02-11 22:50           ` Jason Gunthorpe
2019-02-11 21:29   ` Ira Weiny
2019-02-11 20:40 ` Jason Gunthorpe
2019-02-11 21:14   ` Weiny, Ira
2019-02-11 22:23     ` Jason Gunthorpe
2019-02-13 23:04 ` [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it ira.weiny
2019-02-13 23:04   ` [PATCH V2 1/7] mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM ira.weiny
2019-02-13 23:04   ` [PATCH V2 2/7] mm/gup: Change write parameter to flags in fast walk ira.weiny
2019-02-13 23:04   ` [PATCH V2 3/7] mm/gup: Change GUP fast to use flags rather than a write 'bool' ira.weiny
2019-02-13 23:04   ` [PATCH V2 4/7] mm/gup: Add FOLL_LONGTERM capability to GUP fast ira.weiny
2019-02-13 23:04   ` [PATCH V2 5/7] IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast() ira.weiny
2019-02-13 23:04   ` [PATCH V2 6/7] IB/qib: " ira.weiny
2019-02-13 23:04   ` [PATCH V2 7/7] IB/mthca: " ira.weiny

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).