All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] Simplify the external interface for GUP
@ 2023-01-17 15:58 Jason Gunthorpe
  2023-01-17 15:58 ` [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock() Jason Gunthorpe
                   ` (8 more replies)
  0 siblings, 9 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-17 15:58 UTC (permalink / raw)
  Cc: Alistair Popple, John Hubbard, linux-mm

It is quite a maze of EXPORTED symbols leading up to the three actual
worker functions of GUP. Simplify this by reorganizing some of the code so
the EXPORTED symbols directly call the correct internal function with
validated and consistent arguments.

Consolidate all the assertions into one place at the top of the call
chains.

Remove some dead code.

Jason Gunthorpe (8):
  mm/gup: have internal functions get the mmap_read_lock()
  mm/gup: don't call __gup_longterm_locked() if FOLL_LONGTERM cannot be
    set
  mm/gup: simplify the external interface functions and consolidate
    invariants
  mm/gup: add an assertion that the mmap lock is locked
  mm/gup: add FOLL_UNLOCK
  mm/gup: make locked never NULL in the internal GUP functions
  mm/gup: remove pin_user_pages_fast_only()
  mm/gup: make get_user_pages_fast_only() return the common return value

 include/linux/mm.h |   3 +-
 mm/gup.c           | 319 +++++++++++++++++++++------------------------
 mm/huge_memory.c   |  10 --
 3 files changed, 150 insertions(+), 182 deletions(-)


base-commit: 5dc4c995db9eb45f6373a956eb1f69460e69e6d4
-- 
2.39.0



^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock()
  2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
@ 2023-01-17 15:58 ` Jason Gunthorpe
  2023-01-19 11:16   ` Mike Rapoport
  2023-01-19 21:19   ` John Hubbard
  2023-01-17 15:58 ` [PATCH 2/8] mm/gup: don't call __gup_longterm_locked() if FOLL_LONGTERM cannot be set Jason Gunthorpe
                   ` (7 subsequent siblings)
  8 siblings, 2 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-17 15:58 UTC (permalink / raw)
  Cc: Alistair Popple, John Hubbard, linux-mm

__get_user_pages_locked() and __gup_longterm_locked() both require the
mmap lock to be held. They have a slightly unusual locked parameter that
is used to allow these functions to unlock and relock the mmap lock and
convey that fact to the caller.

Several places wrapper these functions with a simple mmap_read_lock() just
so they can follow the optimized locked protocol.

Consolidate this internally to the functions. Allow internal callers to
set locked = 0 to cause the functions to obtain and release the lock on
their own.

Reorganize __gup_longterm_locked() to use the autolocking in
__get_user_pages_locked().

Replace all the places obtaining the mmap_read_lock() just to call
__get_user_pages_locked() with the new mechanism. Replace all the internal
callers of get_user_pages_unlocked() with direct calls to
__gup_longterm_locked() using the new mechanism.

A following patch will add assertions ensuring the external interface
continues to always pass in locked = 1.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 mm/gup.c | 92 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 47 insertions(+), 45 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index f45a3a5be53a48..3a9f764165f50b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1343,13 +1343,22 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 						unsigned int flags)
 {
 	long ret, pages_done;
-	bool lock_dropped;
+	bool lock_dropped = false;
 
 	if (locked) {
 		/* if VM_FAULT_RETRY can be returned, vmas become invalid */
 		BUG_ON(vmas);
-		/* check caller initialized locked */
-		BUG_ON(*locked != 1);
+	}
+
+	/*
+	 * The internal caller expects GUP to manage the lock internally and the
+	 * lock must be released when this returns.
+	 */
+	if (locked && !*locked) {
+		if (mmap_read_lock_killable(mm))
+			return -EAGAIN;
+		lock_dropped = true;
+		*locked = 1;
 	}
 
 	if (flags & FOLL_PIN)
@@ -1368,7 +1377,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 		flags |= FOLL_GET;
 
 	pages_done = 0;
-	lock_dropped = false;
 	for (;;) {
 		ret = __get_user_pages(mm, start, nr_pages, flags, pages,
 				       vmas, locked);
@@ -1659,9 +1667,24 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
 		unsigned int foll_flags)
 {
 	struct vm_area_struct *vma;
+	bool must_unlock = false;
 	unsigned long vm_flags;
 	long i;
 
+	if (!nr_pages)
+		return 0;
+
+	/*
+	 * The internal caller expects GUP to manage the lock internally and the
+	 * lock must be released when this returns.
+	 */
+	if (locked && !*locked) {
+		if (mmap_read_lock_killable(mm))
+			return -EAGAIN;
+		must_unlock = true;
+		*locked = 1;
+	}
+
 	/* calculate required read or write permissions.
 	 * If FOLL_FORCE is set, we only require the "MAY" flags.
 	 */
@@ -1673,12 +1696,12 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
 	for (i = 0; i < nr_pages; i++) {
 		vma = find_vma(mm, start);
 		if (!vma)
-			goto finish_or_fault;
+			break;
 
 		/* protect what we can, including chardevs */
 		if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 		    !(vm_flags & vma->vm_flags))
-			goto finish_or_fault;
+			break;
 
 		if (pages) {
 			pages[i] = virt_to_page((void *)start);
@@ -1690,9 +1713,11 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
 		start = (start + PAGE_SIZE) & PAGE_MASK;
 	}
 
-	return i;
+	if (must_unlock && *locked) {
+		mmap_read_unlock(mm);
+		*locked = 0;
+	}
 
-finish_or_fault:
 	return i ? : -EFAULT;
 }
 #endif /* !CONFIG_MMU */
@@ -1861,17 +1886,13 @@ EXPORT_SYMBOL(fault_in_readable);
 #ifdef CONFIG_ELF_CORE
 struct page *get_dump_page(unsigned long addr)
 {
-	struct mm_struct *mm = current->mm;
 	struct page *page;
-	int locked = 1;
+	int locked = 0;
 	int ret;
 
-	if (mmap_read_lock_killable(mm))
-		return NULL;
-	ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked,
+	ret = __get_user_pages_locked(current->mm, addr, 1, &page, NULL,
+				      &locked,
 				      FOLL_FORCE | FOLL_DUMP | FOLL_GET);
-	if (locked)
-		mmap_read_unlock(mm);
 	return (ret == 1) ? page : NULL;
 }
 #endif /* CONFIG_ELF_CORE */
@@ -2047,13 +2068,9 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 				  int *locked,
 				  unsigned int gup_flags)
 {
-	bool must_unlock = false;
 	unsigned int flags;
 	long rc, nr_pinned_pages;
 
-	if (locked && WARN_ON_ONCE(!*locked))
-		return -EINVAL;
-
 	if (!(gup_flags & FOLL_LONGTERM))
 		return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
 					       locked, gup_flags);
@@ -2070,11 +2087,6 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 		return -EINVAL;
 	flags = memalloc_pin_save();
 	do {
-		if (locked && !*locked) {
-			mmap_read_lock(mm);
-			must_unlock = true;
-			*locked = 1;
-		}
 		nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
 							  pages, vmas, locked,
 							  gup_flags);
@@ -2085,11 +2097,6 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 		rc = check_and_migrate_movable_pages(nr_pinned_pages, pages);
 	} while (rc == -EAGAIN);
 	memalloc_pin_restore(flags);
-
-	if (locked && *locked && must_unlock) {
-		mmap_read_unlock(mm);
-		*locked = 0;
-	}
 	return rc ? rc : nr_pinned_pages;
 }
 
@@ -2242,16 +2249,10 @@ EXPORT_SYMBOL(get_user_pages);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 			     struct page **pages, unsigned int gup_flags)
 {
-	struct mm_struct *mm = current->mm;
-	int locked = 1;
-	long ret;
+	int locked = 0;
 
-	mmap_read_lock(mm);
-	ret = __gup_longterm_locked(mm, start, nr_pages, pages, NULL, &locked,
-				    gup_flags | FOLL_TOUCH);
-	if (locked)
-		mmap_read_unlock(mm);
-	return ret;
+	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
+				     &locked, gup_flags | FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
@@ -2904,6 +2905,7 @@ static int internal_get_user_pages_fast(unsigned long start,
 {
 	unsigned long len, end;
 	unsigned long nr_pinned;
+	int locked = 0;
 	int ret;
 
 	if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
@@ -2932,8 +2934,9 @@ static int internal_get_user_pages_fast(unsigned long start,
 	/* Slow path: try to get the remaining pages with get_user_pages */
 	start += nr_pinned << PAGE_SHIFT;
 	pages += nr_pinned;
-	ret = get_user_pages_unlocked(start, nr_pages - nr_pinned, pages,
-				      gup_flags);
+	ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned,
+				    pages, NULL, &locked,
+				    gup_flags | FOLL_TOUCH);
 	if (ret < 0) {
 		/*
 		 * The caller has to unpin the pages we already pinned so
@@ -3180,14 +3183,13 @@ EXPORT_SYMBOL(pin_user_pages);
 long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 			     struct page **pages, unsigned int gup_flags)
 {
-	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
-	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-		return -EINVAL;
+	int locked = 0;
 
 	if (WARN_ON_ONCE(!pages))
 		return -EINVAL;
 
-	gup_flags |= FOLL_PIN;
-	return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
+	gup_flags |= FOLL_PIN | FOLL_TOUCH;
+	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
+				     &locked, gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages_unlocked);
-- 
2.39.0



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 2/8] mm/gup: don't call __gup_longterm_locked() if FOLL_LONGTERM cannot be set
  2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
  2023-01-17 15:58 ` [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock() Jason Gunthorpe
@ 2023-01-17 15:58 ` Jason Gunthorpe
  2023-01-19 22:24   ` John Hubbard
  2023-01-17 15:58 ` [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants Jason Gunthorpe
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-17 15:58 UTC (permalink / raw)
  Cc: Alistair Popple, John Hubbard, linux-mm

get_user_pages_remote(), get_user_pages_unlocked() and get_user_pages()
are never called with FOLL_LONGTERM, so directly call
__get_user_pages_locked()

The next patch will add an assertion for this.

Suggested-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 mm/gup.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 3a9f764165f50b..2c833f862d0354 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2188,8 +2188,8 @@ long get_user_pages_remote(struct mm_struct *mm,
 	if (!is_valid_gup_flags(gup_flags))
 		return -EINVAL;
 
-	return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
-				     gup_flags | FOLL_TOUCH | FOLL_REMOTE);
+	return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked,
+				       gup_flags | FOLL_TOUCH | FOLL_REMOTE);
 }
 EXPORT_SYMBOL(get_user_pages_remote);
 
@@ -2226,8 +2226,8 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 	if (!is_valid_gup_flags(gup_flags))
 		return -EINVAL;
 
-	return __gup_longterm_locked(current->mm, start, nr_pages,
-				     pages, vmas, NULL, gup_flags | FOLL_TOUCH);
+	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
+				       vmas, NULL, gup_flags | FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages);
 
@@ -2251,8 +2251,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 {
 	int locked = 0;
 
-	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
-				     &locked, gup_flags | FOLL_TOUCH);
+	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
+				       NULL, &locked, gup_flags | FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
-- 
2.39.0



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants
  2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
  2023-01-17 15:58 ` [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock() Jason Gunthorpe
  2023-01-17 15:58 ` [PATCH 2/8] mm/gup: don't call __gup_longterm_locked() if FOLL_LONGTERM cannot be set Jason Gunthorpe
@ 2023-01-17 15:58 ` Jason Gunthorpe
  2023-01-19 11:17   ` Mike Rapoport
  2023-01-20  2:51   ` John Hubbard
  2023-01-17 15:58 ` [PATCH 4/8] mm/gup: add an assertion that the mmap lock is locked Jason Gunthorpe
                   ` (5 subsequent siblings)
  8 siblings, 2 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-17 15:58 UTC (permalink / raw)
  Cc: Alistair Popple, John Hubbard, linux-mm

The GUP family of functions have a complex, but fairly well defined, set
of invariants for their arguments. Currently these are sprinkled about,
sometimes in duplicate through many functions.

Internally we don't follow all the invariants that the external interface
has to follow, so place these checks directly at the exported
interface. This ensures the internal functions never reach a violated
invariant.

Remove the duplicated invariant checks.

The end result is to make these functions fully internal:
 __get_user_pages_locked()
 internal_get_user_pages_fast()
 __gup_longterm_locked()

And all the other functions call directly into one of these.

Suggested-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 mm/gup.c         | 150 +++++++++++++++++++++++------------------------
 mm/huge_memory.c |  10 ----
 2 files changed, 75 insertions(+), 85 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 2c833f862d0354..9e332e3f6ea8e2 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -215,7 +215,6 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
 {
 	struct folio *folio = page_folio(page);
 
-	WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
 	if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
 		return -ENOMEM;
 
@@ -818,7 +817,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 	if (vma_is_secretmem(vma))
 		return NULL;
 
-	if (foll_flags & FOLL_PIN)
+	if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
 		return NULL;
 
 	page = follow_page_mask(vma, address, foll_flags, &ctx);
@@ -975,9 +974,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 	if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
 		return -EOPNOTSUPP;
 
-	if ((gup_flags & FOLL_LONGTERM) && (gup_flags & FOLL_PCI_P2PDMA))
-		return -EOPNOTSUPP;
-
 	if (vma_is_secretmem(vma))
 		return -EFAULT;
 
@@ -1345,11 +1341,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 	long ret, pages_done;
 	bool lock_dropped = false;
 
-	if (locked) {
-		/* if VM_FAULT_RETRY can be returned, vmas become invalid */
-		BUG_ON(vmas);
-	}
-
 	/*
 	 * The internal caller expects GUP to manage the lock internally and the
 	 * lock must be released when this returns.
@@ -2075,16 +2066,6 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 		return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
 					       locked, gup_flags);
 
-	/*
-	 * If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM
-	 * implies FOLL_PIN (although the reverse is not true). Therefore it is
-	 * correct to unconditionally call check_and_migrate_movable_pages()
-	 * which assumes pages have been pinned via FOLL_PIN.
-	 *
-	 * Enforce the above reasoning by asserting that FOLL_PIN is set.
-	 */
-	if (WARN_ON(!(gup_flags & FOLL_PIN)))
-		return -EINVAL;
 	flags = memalloc_pin_save();
 	do {
 		nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
@@ -2094,28 +2075,66 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 			rc = nr_pinned_pages;
 			break;
 		}
+
+		/* FOLL_LONGTERM implies FOLL_PIN */
 		rc = check_and_migrate_movable_pages(nr_pinned_pages, pages);
 	} while (rc == -EAGAIN);
 	memalloc_pin_restore(flags);
 	return rc ? rc : nr_pinned_pages;
 }
 
-static bool is_valid_gup_flags(unsigned int gup_flags)
+/*
+ * Check that the given flags are valid for the exported gup/pup interface, and
+ * update them with the required flags that the caller must have set.
+ */
+static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
+			      int *locked, unsigned int *gup_flags_p,
+			      unsigned int to_set)
 {
+	unsigned int gup_flags = *gup_flags_p;
+
 	/*
-	 * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
-	 * never directly by the caller, so enforce that with an assertion:
+	 * These flags not allowed to be specified externally to the gup
+	 * interfaces:
+	 * - FOLL_PIN/FOLL_TRIED/FOLL_FAST_ONLY is internal only
+	 * - FOLL_REMOTE is internal only and used on follow_page()
 	 */
-	if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
+	if (WARN_ON_ONCE(gup_flags & (FOLL_PIN | FOLL_TRIED |
+				      FOLL_REMOTE | FOLL_FAST_ONLY)))
 		return false;
+
+	gup_flags |= to_set;
+
+	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
+	if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
+			 (FOLL_PIN | FOLL_GET)))
+		return false;
+
+	/* LONGTERM can only be specified when pinning */
+	if (WARN_ON_ONCE(!(gup_flags & FOLL_PIN) && (gup_flags & FOLL_LONGTERM)))
+		return false;
+
+	/* Pages input must be given if using GET/PIN */
+	if (WARN_ON_ONCE((gup_flags & (FOLL_GET | FOLL_PIN)) && !pages))
+		return false;
+
+	/* At the external interface locked must be set */
+	if (WARN_ON_ONCE(locked && *locked != 1))
+		return false;
+
+	/* We want to allow the pgmap to be hot-unplugged at all times */
+	if (WARN_ON_ONCE((gup_flags & FOLL_LONGTERM) &&
+			 (gup_flags & FOLL_PCI_P2PDMA)))
+		return false;
+
 	/*
-	 * FOLL_PIN is a prerequisite to FOLL_LONGTERM. Another way of saying
-	 * that is, FOLL_LONGTERM is a specific case, more restrictive case of
-	 * FOLL_PIN.
+	 * Can't use VMAs with locked, as locked allows GUP to unlock
+	 * which invalidates the vmas array
 	 */
-	if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
+	if (WARN_ON_ONCE(vmas && locked))
 		return false;
 
+	*gup_flags_p = gup_flags;
 	return true;
 }
 
@@ -2185,11 +2204,12 @@ long get_user_pages_remote(struct mm_struct *mm,
 		unsigned int gup_flags, struct page **pages,
 		struct vm_area_struct **vmas, int *locked)
 {
-	if (!is_valid_gup_flags(gup_flags))
+	if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
+			       FOLL_TOUCH | FOLL_REMOTE))
 		return -EINVAL;
 
 	return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked,
-				       gup_flags | FOLL_TOUCH | FOLL_REMOTE);
+				       gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages_remote);
 
@@ -2223,11 +2243,11 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 		unsigned int gup_flags, struct page **pages,
 		struct vm_area_struct **vmas)
 {
-	if (!is_valid_gup_flags(gup_flags))
+	if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_TOUCH))
 		return -EINVAL;
 
 	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
-				       vmas, NULL, gup_flags | FOLL_TOUCH);
+				       vmas, NULL, gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages);
 
@@ -2251,8 +2271,11 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 {
 	int locked = 0;
 
+	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_TOUCH))
+		return -EINVAL;
+
 	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
-				       NULL, &locked, gup_flags | FOLL_TOUCH);
+				       NULL, &locked, gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
@@ -2980,7 +3003,9 @@ int get_user_pages_fast_only(unsigned long start, int nr_pages,
 	 * FOLL_FAST_ONLY is required in order to match the API description of
 	 * this routine: no fall back to regular ("slow") GUP.
 	 */
-	gup_flags |= FOLL_GET | FOLL_FAST_ONLY;
+	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+			       FOLL_GET | FOLL_FAST_ONLY))
+		return -EINVAL;
 
 	nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
 						 pages);
@@ -3017,16 +3042,14 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
 int get_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages)
 {
-	if (!is_valid_gup_flags(gup_flags))
-		return -EINVAL;
-
 	/*
 	 * The caller may or may not have explicitly set FOLL_GET; either way is
 	 * OK. However, internally (within mm/gup.c), gup fast variants must set
 	 * FOLL_GET, because gup fast is always a "pin with a +1 page refcount"
 	 * request.
 	 */
-	gup_flags |= FOLL_GET;
+	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_GET))
+		return -EINVAL;
 	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
@@ -3050,14 +3073,8 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
 int pin_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages)
 {
-	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
-	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-		return -EINVAL;
-
-	if (WARN_ON_ONCE(!pages))
+	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_PIN))
 		return -EINVAL;
-
-	gup_flags |= FOLL_PIN;
 	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
 }
 EXPORT_SYMBOL_GPL(pin_user_pages_fast);
@@ -3073,20 +3090,14 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages,
 {
 	int nr_pinned;
 
-	/*
-	 * FOLL_GET and FOLL_PIN are mutually exclusive. Note that the API
-	 * rules require returning 0, rather than -errno:
-	 */
-	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-		return 0;
-
-	if (WARN_ON_ONCE(!pages))
-		return 0;
 	/*
 	 * FOLL_FAST_ONLY is required in order to match the API description of
 	 * this routine: no fall back to regular ("slow") GUP.
 	 */
-	gup_flags |= (FOLL_PIN | FOLL_FAST_ONLY);
+	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+			       FOLL_PIN | FOLL_FAST_ONLY))
+		return 0;
+
 	nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
 						 pages);
 	/*
@@ -3128,16 +3139,11 @@ long pin_user_pages_remote(struct mm_struct *mm,
 			   unsigned int gup_flags, struct page **pages,
 			   struct vm_area_struct **vmas, int *locked)
 {
-	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
-	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-		return -EINVAL;
-
-	if (WARN_ON_ONCE(!pages))
-		return -EINVAL;
-
+	if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
+			       FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE))
+		return 0;
 	return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
-				     gup_flags | FOLL_PIN | FOLL_TOUCH |
-					     FOLL_REMOTE);
+				     gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages_remote);
 
@@ -3162,14 +3168,8 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages,
 		    struct vm_area_struct **vmas)
 {
-	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
-	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-		return -EINVAL;
-
-	if (WARN_ON_ONCE(!pages))
-		return -EINVAL;
-
-	gup_flags |= FOLL_PIN;
+	if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_PIN))
+		return 0;
 	return __gup_longterm_locked(current->mm, start, nr_pages,
 				     pages, vmas, NULL, gup_flags);
 }
@@ -3185,10 +3185,10 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 {
 	int locked = 0;
 
-	if (WARN_ON_ONCE(!pages))
-		return -EINVAL;
+	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+			       FOLL_PIN | FOLL_TOUCH))
+		return 0;
 
-	gup_flags |= FOLL_PIN | FOLL_TOUCH;
 	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
 				     &locked, gup_flags);
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index abe6cfd92ffa0e..eaf879c835de44 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1039,11 +1039,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 
 	assert_spin_locked(pmd_lockptr(mm, pmd));
 
-	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
-	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
-			 (FOLL_PIN | FOLL_GET)))
-		return NULL;
-
 	if (flags & FOLL_WRITE && !pmd_write(*pmd))
 		return NULL;
 
@@ -1202,11 +1197,6 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 	if (flags & FOLL_WRITE && !pud_write(*pud))
 		return NULL;
 
-	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
-	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
-			 (FOLL_PIN | FOLL_GET)))
-		return NULL;
-
 	if (pud_present(*pud) && pud_devmap(*pud))
 		/* pass */;
 	else
-- 
2.39.0



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 4/8] mm/gup: add an assertion that the mmap lock is locked
  2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
                   ` (2 preceding siblings ...)
  2023-01-17 15:58 ` [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants Jason Gunthorpe
@ 2023-01-17 15:58 ` Jason Gunthorpe
  2023-01-20  3:08   ` John Hubbard
  2023-01-17 15:58 ` [PATCH 5/8] mm/gup: add FOLL_UNLOCK Jason Gunthorpe
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-17 15:58 UTC (permalink / raw)
  Cc: Alistair Popple, John Hubbard, linux-mm

This is always required, but we can't have a proper unguarded assertion
because of a shortcut in fork. So, cover as much as we can for now.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 mm/gup.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/mm/gup.c b/mm/gup.c
index 9e332e3f6ea8e2..d203e268793b9c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1350,6 +1350,14 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 			return -EAGAIN;
 		lock_dropped = true;
 		*locked = 1;
+	} else if (flags & FOLL_PIN) {
+		/*
+		 * The mmap lock must be held when calling this function. This
+		 * is true even for non-pin modes, but due to a shortcut in fork
+		 * not taking the lock for the new mm we cannot check this
+		 * comprehensively.
+		 */
+		mmap_assert_locked(mm);
 	}
 
 	if (flags & FOLL_PIN)
-- 
2.39.0



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 5/8] mm/gup: add FOLL_UNLOCK
  2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
                   ` (3 preceding siblings ...)
  2023-01-17 15:58 ` [PATCH 4/8] mm/gup: add an assertion that the mmap lock is locked Jason Gunthorpe
@ 2023-01-17 15:58 ` Jason Gunthorpe
  2023-01-19 11:18   ` Mike Rapoport
                     ` (2 more replies)
  2023-01-17 15:58 ` [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions Jason Gunthorpe
                   ` (3 subsequent siblings)
  8 siblings, 3 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-17 15:58 UTC (permalink / raw)
  Cc: Alistair Popple, John Hubbard, linux-mm

Setting FOLL_UNLOCK allows GUP to lock/unlock the mmap lock on its own. It
is a more explicit replacement for locked != NULL. This clears the way for
passing in locked = 1, without intending that the lock can be unlocked.

Set the flag in all cases where it is used, eg locked is present in the
external interface or locked is used internally with locked = 0.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/mm.h |  1 +
 mm/gup.c           | 31 +++++++++++++++++++------------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f3f196e4d66d6f..7496a5c8acede1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3089,6 +3089,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
 #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
 #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
+#define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock */
 
 /*
  * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
diff --git a/mm/gup.c b/mm/gup.c
index d203e268793b9c..4c360fb05cf3e4 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -896,7 +896,7 @@ static int faultin_page(struct vm_area_struct *vma,
 		fault_flags |= FAULT_FLAG_WRITE;
 	if (*flags & FOLL_REMOTE)
 		fault_flags |= FAULT_FLAG_REMOTE;
-	if (locked) {
+	if (*flags & FOLL_UNLOCK) {
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 		/*
 		 * FAULT_FLAG_INTERRUPTIBLE is opt-in. GUP callers must set
@@ -1379,9 +1379,11 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 	for (;;) {
 		ret = __get_user_pages(mm, start, nr_pages, flags, pages,
 				       vmas, locked);
-		if (!locked)
+		if (!(flags & FOLL_UNLOCK)) {
 			/* VM_FAULT_RETRY couldn't trigger, bypass */
-			return ret;
+			pages_done = ret;
+			break;
+		}
 
 		/* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
 		if (!*locked) {
@@ -2106,12 +2108,20 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
 	 * interfaces:
 	 * - FOLL_PIN/FOLL_TRIED/FOLL_FAST_ONLY is internal only
 	 * - FOLL_REMOTE is internal only and used on follow_page()
+	 * - FOLL_UNLOCK is internal only and used if locked is !NULL
 	 */
-	if (WARN_ON_ONCE(gup_flags & (FOLL_PIN | FOLL_TRIED |
+	if (WARN_ON_ONCE(gup_flags & (FOLL_PIN | FOLL_TRIED | FOLL_UNLOCK |
 				      FOLL_REMOTE | FOLL_FAST_ONLY)))
 		return false;
 
 	gup_flags |= to_set;
+	if (locked) {
+		/* At the external interface locked must be set */
+		if (WARN_ON_ONCE(*locked != 1))
+			return false;
+
+		gup_flags |= FOLL_UNLOCK;
+	}
 
 	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
 	if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
@@ -2126,10 +2136,6 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
 	if (WARN_ON_ONCE((gup_flags & (FOLL_GET | FOLL_PIN)) && !pages))
 		return false;
 
-	/* At the external interface locked must be set */
-	if (WARN_ON_ONCE(locked && *locked != 1))
-		return false;
-
 	/* We want to allow the pgmap to be hot-unplugged at all times */
 	if (WARN_ON_ONCE((gup_flags & FOLL_LONGTERM) &&
 			 (gup_flags & FOLL_PCI_P2PDMA)))
@@ -2139,7 +2145,7 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
 	 * Can't use VMAs with locked, as locked allows GUP to unlock
 	 * which invalidates the vmas array
 	 */
-	if (WARN_ON_ONCE(vmas && locked))
+	if (WARN_ON_ONCE(vmas && (gup_flags & FOLL_UNLOCK)))
 		return false;
 
 	*gup_flags_p = gup_flags;
@@ -2279,7 +2285,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 {
 	int locked = 0;
 
-	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_TOUCH))
+	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+			       FOLL_TOUCH | FOLL_UNLOCK))
 		return -EINVAL;
 
 	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
@@ -2967,7 +2974,7 @@ static int internal_get_user_pages_fast(unsigned long start,
 	pages += nr_pinned;
 	ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned,
 				    pages, NULL, &locked,
-				    gup_flags | FOLL_TOUCH);
+				    gup_flags | FOLL_TOUCH | FOLL_UNLOCK);
 	if (ret < 0) {
 		/*
 		 * The caller has to unpin the pages we already pinned so
@@ -3194,7 +3201,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 	int locked = 0;
 
 	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
-			       FOLL_PIN | FOLL_TOUCH))
+			       FOLL_PIN | FOLL_TOUCH | FOLL_UNLOCK))
 		return 0;
 
 	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
-- 
2.39.0



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions
  2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
                   ` (4 preceding siblings ...)
  2023-01-17 15:58 ` [PATCH 5/8] mm/gup: add FOLL_UNLOCK Jason Gunthorpe
@ 2023-01-17 15:58 ` Jason Gunthorpe
  2023-01-20 19:19   ` John Hubbard
  2023-01-23 11:35   ` David Hildenbrand
  2023-01-17 15:58 ` [PATCH 7/8] mm/gup: remove pin_user_pages_fast_only() Jason Gunthorpe
                   ` (2 subsequent siblings)
  8 siblings, 2 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-17 15:58 UTC (permalink / raw)
  Cc: Alistair Popple, John Hubbard, linux-mm

Now that NULL locked doesn't have a special meaning we can just make it
non-NULL in all cases and remove the special tests.

get_user_pages() and pin_user_pages() can safely pass in a locked = 1

get_user_pages_remote) and pin_user_pages_remote() can swap in a local
variable for locked if NULL is passed.

Remove all the NULL checks.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 mm/gup.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 4c360fb05cf3e4..1ccfff759a29eb 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -930,8 +930,8 @@ static int faultin_page(struct vm_area_struct *vma,
 		 * mmap lock in the page fault handler. Sanity check this.
 		 */
 		WARN_ON_ONCE(fault_flags & FAULT_FLAG_RETRY_NOWAIT);
-		if (locked)
-			*locked = 0;
+		*locked = 0;
+
 		/*
 		 * We should do the same as VM_FAULT_RETRY, but let's not
 		 * return -EBUSY since that's not reflecting the reality of
@@ -951,7 +951,7 @@ static int faultin_page(struct vm_area_struct *vma,
 	}
 
 	if (ret & VM_FAULT_RETRY) {
-		if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
+		if (!(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
 			*locked = 0;
 		return -EBUSY;
 	}
@@ -1121,7 +1121,7 @@ static long __get_user_pages(struct mm_struct *mm,
 				i = follow_hugetlb_page(mm, vma, pages, vmas,
 						&start, &nr_pages, i,
 						gup_flags, locked);
-				if (locked && *locked == 0) {
+				if (!*locked) {
 					/*
 					 * We've got a VM_FAULT_RETRY
 					 * and we've lost mmap_lock.
@@ -1345,7 +1345,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 	 * The internal caller expects GUP to manage the lock internally and the
 	 * lock must be released when this returns.
 	 */
-	if (locked && !*locked) {
+	if (!*locked) {
 		if (mmap_read_lock_killable(mm))
 			return -EAGAIN;
 		lock_dropped = true;
@@ -1679,7 +1679,7 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
 	 * The internal caller expects GUP to manage the lock internally and the
 	 * lock must be released when this returns.
 	 */
-	if (locked && !*locked) {
+	if (!*locked) {
 		if (mmap_read_lock_killable(mm))
 			return -EAGAIN;
 		must_unlock = true;
@@ -2218,11 +2218,14 @@ long get_user_pages_remote(struct mm_struct *mm,
 		unsigned int gup_flags, struct page **pages,
 		struct vm_area_struct **vmas, int *locked)
 {
+	int local_locked = 1;
+
 	if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
 			       FOLL_TOUCH | FOLL_REMOTE))
 		return -EINVAL;
 
-	return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked,
+	return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+				       locked ? locked : &local_locked,
 				       gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages_remote);
@@ -2257,11 +2260,13 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 		unsigned int gup_flags, struct page **pages,
 		struct vm_area_struct **vmas)
 {
+	int locked = 1;
+
 	if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_TOUCH))
 		return -EINVAL;
 
 	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
-				       vmas, NULL, gup_flags);
+				       vmas, &locked, gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages);
 
@@ -3154,10 +3159,13 @@ long pin_user_pages_remote(struct mm_struct *mm,
 			   unsigned int gup_flags, struct page **pages,
 			   struct vm_area_struct **vmas, int *locked)
 {
+	int local_locked = 1;
+
 	if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
 			       FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE))
 		return 0;
-	return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
+	return __gup_longterm_locked(mm, start, nr_pages, pages, vmas,
+				     locked ? locked : &local_locked,
 				     gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages_remote);
@@ -3183,10 +3191,12 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages,
 		    struct vm_area_struct **vmas)
 {
+	int locked = 1;
+
 	if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_PIN))
 		return 0;
 	return __gup_longterm_locked(current->mm, start, nr_pages,
-				     pages, vmas, NULL, gup_flags);
+				     pages, vmas, &locked, gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages);
 
-- 
2.39.0



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 7/8] mm/gup: remove pin_user_pages_fast_only()
  2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
                   ` (5 preceding siblings ...)
  2023-01-17 15:58 ` [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions Jason Gunthorpe
@ 2023-01-17 15:58 ` Jason Gunthorpe
  2023-01-20 19:23   ` John Hubbard
  2023-01-23 11:31   ` David Hildenbrand
  2023-01-17 15:58 ` [PATCH 8/8] mm/gup: make get_user_pages_fast_only() return the common return value Jason Gunthorpe
  2023-01-19 11:18 ` [PATCH 0/8] Simplify the external interface for GUP Mike Rapoport
  8 siblings, 2 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-17 15:58 UTC (permalink / raw)
  Cc: Alistair Popple, John Hubbard, linux-mm

Commit ed29c2691188 ("drm/i915: Fix userptr so we do not have to worry
about obj->mm.lock, v7.")  removed the only caller, remove this dead code
too.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/mm.h |  2 --
 mm/gup.c           | 33 ---------------------------------
 2 files changed, 35 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7496a5c8acede1..de300b0de58e7d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2159,8 +2159,6 @@ extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma,
  */
 int get_user_pages_fast_only(unsigned long start, int nr_pages,
 			     unsigned int gup_flags, struct page **pages);
-int pin_user_pages_fast_only(unsigned long start, int nr_pages,
-			     unsigned int gup_flags, struct page **pages);
 
 static inline bool get_user_page_fast_only(unsigned long addr,
 			unsigned int gup_flags, struct page **pagep)
diff --git a/mm/gup.c b/mm/gup.c
index 1ccfff759a29eb..b260c182135587 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -3099,39 +3099,6 @@ int pin_user_pages_fast(unsigned long start, int nr_pages,
 }
 EXPORT_SYMBOL_GPL(pin_user_pages_fast);
 
-/*
- * This is the FOLL_PIN equivalent of get_user_pages_fast_only(). Behavior
- * is the same, except that this one sets FOLL_PIN instead of FOLL_GET.
- *
- * The API rules are the same, too: no negative values may be returned.
- */
-int pin_user_pages_fast_only(unsigned long start, int nr_pages,
-			     unsigned int gup_flags, struct page **pages)
-{
-	int nr_pinned;
-
-	/*
-	 * FOLL_FAST_ONLY is required in order to match the API description of
-	 * this routine: no fall back to regular ("slow") GUP.
-	 */
-	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
-			       FOLL_PIN | FOLL_FAST_ONLY))
-		return 0;
-
-	nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
-						 pages);
-	/*
-	 * This routine is not allowed to return negative values. However,
-	 * internal_get_user_pages_fast() *can* return -errno. Therefore,
-	 * correct for that here:
-	 */
-	if (nr_pinned < 0)
-		nr_pinned = 0;
-
-	return nr_pinned;
-}
-EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
-
 /**
  * pin_user_pages_remote() - pin pages of a remote process
  *
-- 
2.39.0



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 8/8] mm/gup: make get_user_pages_fast_only() return the common return value
  2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
                   ` (6 preceding siblings ...)
  2023-01-17 15:58 ` [PATCH 7/8] mm/gup: remove pin_user_pages_fast_only() Jason Gunthorpe
@ 2023-01-17 15:58 ` Jason Gunthorpe
  2023-01-20 19:27   ` John Hubbard
  2023-01-23 11:33   ` David Hildenbrand
  2023-01-19 11:18 ` [PATCH 0/8] Simplify the external interface for GUP Mike Rapoport
  8 siblings, 2 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-17 15:58 UTC (permalink / raw)
  Cc: Alistair Popple, John Hubbard, linux-mm

There are only two callers, both can handle the common return code:

- get_user_page_fast_only() checks == 1

- gfn_to_page_many_atomic() already returns -1, and the only caller
  checks for -ve

Remove the restriction against returning negative values.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 mm/gup.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index b260c182135587..20ebc4d1f0d719 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -3002,8 +3002,6 @@ static int internal_get_user_pages_fast(unsigned long start,
  *
  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
  * the regular GUP.
- * Note a difference with get_user_pages_fast: this always returns the
- * number of pages pinned, 0 if no pages were pinned.
  *
  * If the architecture does not support this function, simply return with no
  * pages pinned.
@@ -3015,7 +3013,6 @@ static int internal_get_user_pages_fast(unsigned long start,
 int get_user_pages_fast_only(unsigned long start, int nr_pages,
 			     unsigned int gup_flags, struct page **pages)
 {
-	int nr_pinned;
 	/*
 	 * Internally (within mm/gup.c), gup fast variants must set FOLL_GET,
 	 * because gup fast is always a "pin with a +1 page refcount" request.
@@ -3027,19 +3024,7 @@ int get_user_pages_fast_only(unsigned long start, int nr_pages,
 			       FOLL_GET | FOLL_FAST_ONLY))
 		return -EINVAL;
 
-	nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
-						 pages);
-
-	/*
-	 * As specified in the API description above, this routine is not
-	 * allowed to return negative values. However, the common core
-	 * routine internal_get_user_pages_fast() *can* return -errno.
-	 * Therefore, correct for that here:
-	 */
-	if (nr_pinned < 0)
-		nr_pinned = 0;
-
-	return nr_pinned;
+	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
 
-- 
2.39.0



^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock()
  2023-01-17 15:58 ` [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock() Jason Gunthorpe
@ 2023-01-19 11:16   ` Mike Rapoport
  2023-01-19 21:19   ` John Hubbard
  1 sibling, 0 replies; 39+ messages in thread
From: Mike Rapoport @ 2023-01-19 11:16 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On Tue, Jan 17, 2023 at 11:58:32AM -0400, Jason Gunthorpe wrote:
> __get_user_pages_locked() and __gup_longterm_locked() both require the
> mmap lock to be held. They have a slightly unusual locked parameter that
> is used to allow these functions to unlock and relock the mmap lock and
> convey that fact to the caller.
> 
> Several places wrapper these functions with a simple mmap_read_lock() just

Nit:             ^wrap

> so they can follow the optimized locked protocol.
> 
> Consolidate this internally to the functions. Allow internal callers to
> set locked = 0 to cause the functions to obtain and release the lock on

I'd s/obtain/acquire/g

> their own.
> 
> Reorganize __gup_longterm_locked() to use the autolocking in
> __get_user_pages_locked().
> 
> Replace all the places obtaining the mmap_read_lock() just to call
> __get_user_pages_locked() with the new mechanism. Replace all the internal
> callers of get_user_pages_unlocked() with direct calls to
> __gup_longterm_locked() using the new mechanism.
> 
> A following patch will add assertions ensuring the external interface
> continues to always pass in locked = 1.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  mm/gup.c | 92 +++++++++++++++++++++++++++++---------------------------
>  1 file changed, 47 insertions(+), 45 deletions(-)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index f45a3a5be53a48..3a9f764165f50b 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1343,13 +1343,22 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  						unsigned int flags)
>  {
>  	long ret, pages_done;
> -	bool lock_dropped;
> +	bool lock_dropped = false;
>  
>  	if (locked) {
>  		/* if VM_FAULT_RETRY can be returned, vmas become invalid */
>  		BUG_ON(vmas);
> -		/* check caller initialized locked */
> -		BUG_ON(*locked != 1);
> +	}
> +
> +	/*
> +	 * The internal caller expects GUP to manage the lock internally and the
> +	 * lock must be released when this returns.
> +	 */
> +	if (locked && !*locked) {
> +		if (mmap_read_lock_killable(mm))
> +			return -EAGAIN;
> +		lock_dropped = true;
> +		*locked = 1;
>  	}
>  
>  	if (flags & FOLL_PIN)
> @@ -1368,7 +1377,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  		flags |= FOLL_GET;
>  
>  	pages_done = 0;
> -	lock_dropped = false;
>  	for (;;) {
>  		ret = __get_user_pages(mm, start, nr_pages, flags, pages,
>  				       vmas, locked);
> @@ -1659,9 +1667,24 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
>  		unsigned int foll_flags)
>  {
>  	struct vm_area_struct *vma;
> +	bool must_unlock = false;
>  	unsigned long vm_flags;
>  	long i;
>  
> +	if (!nr_pages)
> +		return 0;
> +
> +	/*
> +	 * The internal caller expects GUP to manage the lock internally and the
> +	 * lock must be released when this returns.
> +	 */
> +	if (locked && !*locked) {
> +		if (mmap_read_lock_killable(mm))
> +			return -EAGAIN;
> +		must_unlock = true;
> +		*locked = 1;
> +	}
> +
>  	/* calculate required read or write permissions.
>  	 * If FOLL_FORCE is set, we only require the "MAY" flags.
>  	 */
> @@ -1673,12 +1696,12 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
>  	for (i = 0; i < nr_pages; i++) {
>  		vma = find_vma(mm, start);
>  		if (!vma)
> -			goto finish_or_fault;
> +			break;
>  
>  		/* protect what we can, including chardevs */
>  		if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
>  		    !(vm_flags & vma->vm_flags))
> -			goto finish_or_fault;
> +			break;
>  
>  		if (pages) {
>  			pages[i] = virt_to_page((void *)start);
> @@ -1690,9 +1713,11 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
>  		start = (start + PAGE_SIZE) & PAGE_MASK;
>  	}
>  
> -	return i;
> +	if (must_unlock && *locked) {
> +		mmap_read_unlock(mm);
> +		*locked = 0;
> +	}
>  
> -finish_or_fault:
>  	return i ? : -EFAULT;
>  }
>  #endif /* !CONFIG_MMU */
> @@ -1861,17 +1886,13 @@ EXPORT_SYMBOL(fault_in_readable);
>  #ifdef CONFIG_ELF_CORE
>  struct page *get_dump_page(unsigned long addr)
>  {
> -	struct mm_struct *mm = current->mm;
>  	struct page *page;
> -	int locked = 1;
> +	int locked = 0;
>  	int ret;
>  
> -	if (mmap_read_lock_killable(mm))
> -		return NULL;
> -	ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked,
> +	ret = __get_user_pages_locked(current->mm, addr, 1, &page, NULL,
> +				      &locked,
>  				      FOLL_FORCE | FOLL_DUMP | FOLL_GET);
> -	if (locked)
> -		mmap_read_unlock(mm);
>  	return (ret == 1) ? page : NULL;
>  }
>  #endif /* CONFIG_ELF_CORE */
> @@ -2047,13 +2068,9 @@ static long __gup_longterm_locked(struct mm_struct *mm,
>  				  int *locked,
>  				  unsigned int gup_flags)
>  {
> -	bool must_unlock = false;
>  	unsigned int flags;
>  	long rc, nr_pinned_pages;
>  
> -	if (locked && WARN_ON_ONCE(!*locked))
> -		return -EINVAL;
> -
>  	if (!(gup_flags & FOLL_LONGTERM))
>  		return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
>  					       locked, gup_flags);
> @@ -2070,11 +2087,6 @@ static long __gup_longterm_locked(struct mm_struct *mm,
>  		return -EINVAL;
>  	flags = memalloc_pin_save();
>  	do {
> -		if (locked && !*locked) {
> -			mmap_read_lock(mm);
> -			must_unlock = true;
> -			*locked = 1;
> -		}
>  		nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
>  							  pages, vmas, locked,
>  							  gup_flags);
> @@ -2085,11 +2097,6 @@ static long __gup_longterm_locked(struct mm_struct *mm,
>  		rc = check_and_migrate_movable_pages(nr_pinned_pages, pages);
>  	} while (rc == -EAGAIN);
>  	memalloc_pin_restore(flags);
> -
> -	if (locked && *locked && must_unlock) {
> -		mmap_read_unlock(mm);
> -		*locked = 0;
> -	}
>  	return rc ? rc : nr_pinned_pages;
>  }
>  
> @@ -2242,16 +2249,10 @@ EXPORT_SYMBOL(get_user_pages);
>  long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>  			     struct page **pages, unsigned int gup_flags)
>  {
> -	struct mm_struct *mm = current->mm;
> -	int locked = 1;
> -	long ret;
> +	int locked = 0;
>  
> -	mmap_read_lock(mm);
> -	ret = __gup_longterm_locked(mm, start, nr_pages, pages, NULL, &locked,
> -				    gup_flags | FOLL_TOUCH);
> -	if (locked)
> -		mmap_read_unlock(mm);
> -	return ret;
> +	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
> +				     &locked, gup_flags | FOLL_TOUCH);
>  }
>  EXPORT_SYMBOL(get_user_pages_unlocked);
>  
> @@ -2904,6 +2905,7 @@ static int internal_get_user_pages_fast(unsigned long start,
>  {
>  	unsigned long len, end;
>  	unsigned long nr_pinned;
> +	int locked = 0;
>  	int ret;
>  
>  	if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
> @@ -2932,8 +2934,9 @@ static int internal_get_user_pages_fast(unsigned long start,
>  	/* Slow path: try to get the remaining pages with get_user_pages */
>  	start += nr_pinned << PAGE_SHIFT;
>  	pages += nr_pinned;
> -	ret = get_user_pages_unlocked(start, nr_pages - nr_pinned, pages,
> -				      gup_flags);
> +	ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned,
> +				    pages, NULL, &locked,
> +				    gup_flags | FOLL_TOUCH);
>  	if (ret < 0) {
>  		/*
>  		 * The caller has to unpin the pages we already pinned so
> @@ -3180,14 +3183,13 @@ EXPORT_SYMBOL(pin_user_pages);
>  long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>  			     struct page **pages, unsigned int gup_flags)
>  {
> -	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
> -	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
> -		return -EINVAL;
> +	int locked = 0;
>  
>  	if (WARN_ON_ONCE(!pages))
>  		return -EINVAL;
>  
> -	gup_flags |= FOLL_PIN;
> -	return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
> +	gup_flags |= FOLL_PIN | FOLL_TOUCH;
> +	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
> +				     &locked, gup_flags);
>  }
>  EXPORT_SYMBOL(pin_user_pages_unlocked);
> -- 
> 2.39.0
> 
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants
  2023-01-17 15:58 ` [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants Jason Gunthorpe
@ 2023-01-19 11:17   ` Mike Rapoport
  2023-01-20  2:51   ` John Hubbard
  1 sibling, 0 replies; 39+ messages in thread
From: Mike Rapoport @ 2023-01-19 11:17 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On Tue, Jan 17, 2023 at 11:58:34AM -0400, Jason Gunthorpe wrote:
> The GUP family of functions have a complex, but fairly well defined, set
> of invariants for their arguments. Currently these are sprinkled about,
> sometimes in duplicate through many functions.
> 
> Internally we don't follow all the invariants that the external interface
> has to follow, so place these checks directly at the exported
> interface. This ensures the internal functions never reach a violated
> invariant.
> 
> Remove the duplicated invariant checks.
> 
> The end result is to make these functions fully internal:
>  __get_user_pages_locked()
>  internal_get_user_pages_fast()
>  __gup_longterm_locked()
> 
> And all the other functions call directly into one of these.
> 
> Suggested-by: John Hubbard <jhubbard@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  mm/gup.c         | 150 +++++++++++++++++++++++------------------------
>  mm/huge_memory.c |  10 ----
>  2 files changed, 75 insertions(+), 85 deletions(-)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index 2c833f862d0354..9e332e3f6ea8e2 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c

...

> -static bool is_valid_gup_flags(unsigned int gup_flags)
> +/*
> + * Check that the given flags are valid for the exported gup/pup interface, and
> + * update them with the required flags that the caller must have set.
> + */
> +static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
> +			      int *locked, unsigned int *gup_flags_p,
> +			      unsigned int to_set)
>  {
> +	unsigned int gup_flags = *gup_flags_p;
> +
>  	/*
> -	 * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
> -	 * never directly by the caller, so enforce that with an assertion:
> +	 * These flags not allowed to be specified externally to the gup
> +	 * interfaces:
> +	 * - FOLL_PIN/FOLL_TRIED/FOLL_FAST_ONLY is internal only

                                              ^ are?

> +	 * - FOLL_REMOTE is internal only and used on follow_page()
>  	 */
> -	if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
> +	if (WARN_ON_ONCE(gup_flags & (FOLL_PIN | FOLL_TRIED |
> +				      FOLL_REMOTE | FOLL_FAST_ONLY)))
>  		return false;
> +
> +	gup_flags |= to_set;
> +
> +	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
> +	if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
> +			 (FOLL_PIN | FOLL_GET)))
> +		return false;
> +

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/8] mm/gup: add FOLL_UNLOCK
  2023-01-17 15:58 ` [PATCH 5/8] mm/gup: add FOLL_UNLOCK Jason Gunthorpe
@ 2023-01-19 11:18   ` Mike Rapoport
  2023-01-20 13:45     ` Jason Gunthorpe
  2023-01-20 19:02   ` John Hubbard
  2023-01-23 11:37   ` David Hildenbrand
  2 siblings, 1 reply; 39+ messages in thread
From: Mike Rapoport @ 2023-01-19 11:18 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On Tue, Jan 17, 2023 at 11:58:36AM -0400, Jason Gunthorpe wrote:
> Setting FOLL_UNLOCK allows GUP to lock/unlock the mmap lock on its own. It
> is a more explicit replacement for locked != NULL. This clears the way for
> passing in locked = 1, without intending that the lock can be unlocked.
> 
> Set the flag in all cases where it is used, eg locked is present in the
> external interface or locked is used internally with locked = 0.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  include/linux/mm.h |  1 +
>  mm/gup.c           | 31 +++++++++++++++++++------------
>  2 files changed, 20 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index f3f196e4d66d6f..7496a5c8acede1 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3089,6 +3089,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
>  #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
>  #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
>  #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
> +#define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock */

Please add a comment that this is an internal flag.
  
>  /*
>   * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 0/8] Simplify the external interface for GUP
  2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
                   ` (7 preceding siblings ...)
  2023-01-17 15:58 ` [PATCH 8/8] mm/gup: make get_user_pages_fast_only() return the common return value Jason Gunthorpe
@ 2023-01-19 11:18 ` Mike Rapoport
  8 siblings, 0 replies; 39+ messages in thread
From: Mike Rapoport @ 2023-01-19 11:18 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On Tue, Jan 17, 2023 at 11:58:31AM -0400, Jason Gunthorpe wrote:
> It is quite a maze of EXPORTED symbols leading up to the three actual
> worker functions of GUP. Simplify this by reorganizing some of the code so
> the EXPORTED symbols directly call the correct internal function with
> validated and consistent arguments.
> 
> Consolidate all the assertions into one place at the top of the call
> chains.
> 
> Remove some dead code.

> Jason Gunthorpe (8):
>   mm/gup: have internal functions get the mmap_read_lock()
>   mm/gup: don't call __gup_longterm_locked() if FOLL_LONGTERM cannot be
>     set
>   mm/gup: simplify the external interface functions and consolidate
>     invariants
>   mm/gup: add an assertion that the mmap lock is locked
>   mm/gup: add FOLL_UNLOCK
>   mm/gup: make locked never NULL in the internal GUP functions
>   mm/gup: remove pin_user_pages_fast_only()
>   mm/gup: make get_user_pages_fast_only() return the common return value
> 
>  include/linux/mm.h |   3 +-
>  mm/gup.c           | 319 +++++++++++++++++++++------------------------
>  mm/huge_memory.c   |  10 --
>  3 files changed, 150 insertions(+), 182 deletions(-)

Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
 
> base-commit: 5dc4c995db9eb45f6373a956eb1f69460e69e6d4
> -- 
> 2.39.0
> 
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock()
  2023-01-17 15:58 ` [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock() Jason Gunthorpe
  2023-01-19 11:16   ` Mike Rapoport
@ 2023-01-19 21:19   ` John Hubbard
  2023-01-19 22:40     ` John Hubbard
  2023-01-20 14:47     ` Jason Gunthorpe
  1 sibling, 2 replies; 39+ messages in thread
From: John Hubbard @ 2023-01-19 21:19 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/17/23 07:58, Jason Gunthorpe wrote:
> __get_user_pages_locked() and __gup_longterm_locked() both require the
> mmap lock to be held. They have a slightly unusual locked parameter that
> is used to allow these functions to unlock and relock the mmap lock and
> convey that fact to the caller.
> 
> Several places wrapper these functions with a simple mmap_read_lock() just
> so they can follow the optimized locked protocol.
> 
> Consolidate this internally to the functions. Allow internal callers to
> set locked = 0 to cause the functions to obtain and release the lock on
> their own.
> 
> Reorganize __gup_longterm_locked() to use the autolocking in
> __get_user_pages_locked().
> 
> Replace all the places obtaining the mmap_read_lock() just to call
> __get_user_pages_locked() with the new mechanism. Replace all the internal
> callers of get_user_pages_unlocked() with direct calls to
> __gup_longterm_locked() using the new mechanism.
> 
> A following patch will add assertions ensuring the external interface
> continues to always pass in locked = 1.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   mm/gup.c | 92 +++++++++++++++++++++++++++++---------------------------
>   1 file changed, 47 insertions(+), 45 deletions(-)

Nice cleanup already in just this first patch...

> 
> diff --git a/mm/gup.c b/mm/gup.c
> index f45a3a5be53a48..3a9f764165f50b 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1343,13 +1343,22 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>   						unsigned int flags)
>   {
>   	long ret, pages_done;
> -	bool lock_dropped;
> +	bool lock_dropped = false;

A small thing, but the naming and the associated comments later are now
inaccurate. Let's add function-level documentation to explain the new
calling convention for __get_user_pages_locked(), change the name of
lock_dropped to must_unlock, add a comment mid-function, and correct the
comment at the end of the function. Like this:

diff --git a/mm/gup.c b/mm/gup.c
index c0f86ff7848a..5b7c09be7442 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1331,8 +1331,17 @@ bool gup_signal_pending(unsigned int flags)
  }

  /*
- * Please note that this function, unlike __get_user_pages will not
- * return 0 for nr_pages > 0 without FOLL_NOWAIT
+ * Locking: (*locked == 1) means that the mmap_lock has already been acquired by
+ * the caller. This function may drop the mmap_lock. If it does so, then it will
+ * set (*locked = 0).
+ *
+ * (*locked == 0) means that the caller expects this function to acquire and
+ * drop the mmap_lock. Therefore, the value of *locked will still be zero when
+ * the function returns, even though it may have changed temporarily during
+ * function execution.
+ *
+ * Please note that this function, unlike __get_user_pages(), will not
+ * return 0 for nr_pages > 0, unless FOLL_NOWAIT is used.
   */
  long __get_user_pages_locked(struct mm_struct *mm,
  						unsigned long start,
@@ -1343,7 +1352,7 @@ long __get_user_pages_locked(struct mm_struct *mm,
  						unsigned int flags)
  {
  	long ret, pages_done;
-	bool lock_dropped = false;
+	bool must_unlock = false;

  	if (locked) {
  		/* if VM_FAULT_RETRY can be returned, vmas become invalid */
@@ -1357,7 +1366,7 @@ long __get_user_pages_locked(struct mm_struct *mm,
  	if (locked && !*locked) {
  		if (mmap_read_lock_killable(mm))
  			return -EAGAIN;
-		lock_dropped = true;
+		must_unlock = true;
  		*locked = 1;
  	}

@@ -1412,7 +1421,9 @@ long __get_user_pages_locked(struct mm_struct *mm,
  		if (likely(pages))
  			pages += ret;
  		start += ret << PAGE_SHIFT;
-		lock_dropped = true;
+
+		/* The lock was temporarily dropped, so we must unlock later: */
+		must_unlock = true;

  retry:
  		/*
@@ -1459,10 +1470,11 @@ long __get_user_pages_locked(struct mm_struct *mm,
  			pages++;
  		start += PAGE_SIZE;
  	}
-	if (lock_dropped && *locked) {
+	if (must_unlock && *locked) {
  		/*
-		 * We must let the caller know we temporarily dropped the lock
-		 * and so the critical section protected by it was lost.
+		 * We either temporarily dropped the lock, or the caller
+		 * requested that we both acquire and drop the lock. Either way,
+		 * we must now unlock, and notify the caller of that state.
  		 */
  		mmap_read_unlock(mm);
  		*locked = 0;


...

> @@ -3180,14 +3183,13 @@ EXPORT_SYMBOL(pin_user_pages);
>   long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>   			     struct page **pages, unsigned int gup_flags)
>   {
> -	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
> -	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
> -		return -EINVAL;

Why are we dropping the assertion check for FOLL_GET?


thanks,
-- 
John Hubbard
NVIDIA


^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH 2/8] mm/gup: don't call __gup_longterm_locked() if FOLL_LONGTERM cannot be set
  2023-01-17 15:58 ` [PATCH 2/8] mm/gup: don't call __gup_longterm_locked() if FOLL_LONGTERM cannot be set Jason Gunthorpe
@ 2023-01-19 22:24   ` John Hubbard
  0 siblings, 0 replies; 39+ messages in thread
From: John Hubbard @ 2023-01-19 22:24 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/17/23 07:58, Jason Gunthorpe wrote:
> get_user_pages_remote(), get_user_pages_unlocked() and get_user_pages()
> are never called with FOLL_LONGTERM, so directly call
> __get_user_pages_locked()
> 
> The next patch will add an assertion for this.
> 
> Suggested-by: John Hubbard <jhubbard@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   mm/gup.c | 12 ++++++------
>   1 file changed, 6 insertions(+), 6 deletions(-)

Looks good. I appreciate the patch granularity, it's easy to
review as a result.

Reviewed-by: John Hubbard <jhubbard@nvidia.com>

thanks,
-- 
John Hubbard
NVIDIA

> 
> diff --git a/mm/gup.c b/mm/gup.c
> index 3a9f764165f50b..2c833f862d0354 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -2188,8 +2188,8 @@ long get_user_pages_remote(struct mm_struct *mm,
>   	if (!is_valid_gup_flags(gup_flags))
>   		return -EINVAL;
>   
> -	return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
> -				     gup_flags | FOLL_TOUCH | FOLL_REMOTE);
> +	return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked,
> +				       gup_flags | FOLL_TOUCH | FOLL_REMOTE);
>   }
>   EXPORT_SYMBOL(get_user_pages_remote);
>   
> @@ -2226,8 +2226,8 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
>   	if (!is_valid_gup_flags(gup_flags))
>   		return -EINVAL;
>   
> -	return __gup_longterm_locked(current->mm, start, nr_pages,
> -				     pages, vmas, NULL, gup_flags | FOLL_TOUCH);
> +	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
> +				       vmas, NULL, gup_flags | FOLL_TOUCH);
>   }
>   EXPORT_SYMBOL(get_user_pages);
>   
> @@ -2251,8 +2251,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>   {
>   	int locked = 0;
>   
> -	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
> -				     &locked, gup_flags | FOLL_TOUCH);
> +	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
> +				       NULL, &locked, gup_flags | FOLL_TOUCH);
>   }
>   EXPORT_SYMBOL(get_user_pages_unlocked);
>   




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock()
  2023-01-19 21:19   ` John Hubbard
@ 2023-01-19 22:40     ` John Hubbard
  2023-01-20 14:47     ` Jason Gunthorpe
  1 sibling, 0 replies; 39+ messages in thread
From: John Hubbard @ 2023-01-19 22:40 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/19/23 13:19, John Hubbard wrote:
...
>   long __get_user_pages_locked(struct mm_struct *mm,
>                           unsigned long start,
> @@ -1343,7 +1352,7 @@ long __get_user_pages_locked(struct mm_struct *mm,
>                           unsigned int flags)

Shoot, this diff won't apply without a fix-up, because the above is
actually supposed to be:

static __always_inline long __get_user_pages_locked(struct mm_struct *mm,

...but I had a local patch applied on top, to remove all the static
keywords for functions in gup.c, thus in turn allowing my function
graphing tool to work. It draws helpful pictures of the evolving call
graph, but "static" hides things from it, due to using gcc's output for
this. And since most gup.c functions are static, it was awkward...

Anyway, apologies for letting the tooling noise leak into my response.
"It will never happen again." I hope. :)


thanks,
-- 
John Hubbard
NVIDIA


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants
  2023-01-17 15:58 ` [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants Jason Gunthorpe
  2023-01-19 11:17   ` Mike Rapoport
@ 2023-01-20  2:51   ` John Hubbard
  2023-01-20 14:58     ` Jason Gunthorpe
  1 sibling, 1 reply; 39+ messages in thread
From: John Hubbard @ 2023-01-20  2:51 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/17/23 07:58, Jason Gunthorpe wrote:
> The GUP family of functions have a complex, but fairly well defined, set
> of invariants for their arguments. Currently these are sprinkled about,
> sometimes in duplicate through many functions.
> 
> Internally we don't follow all the invariants that the external interface
> has to follow, so place these checks directly at the exported
> interface. This ensures the internal functions never reach a violated
> invariant.
> 
> Remove the duplicated invariant checks.
> 
> The end result is to make these functions fully internal:
>   __get_user_pages_locked()
>   internal_get_user_pages_fast()
>   __gup_longterm_locked()
> 
> And all the other functions call directly into one of these.
> 
> Suggested-by: John Hubbard <jhubbard@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   mm/gup.c         | 150 +++++++++++++++++++++++------------------------
>   mm/huge_memory.c |  10 ----
>   2 files changed, 75 insertions(+), 85 deletions(-)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index 2c833f862d0354..9e332e3f6ea8e2 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -215,7 +215,6 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
>   {
>   	struct folio *folio = page_folio(page);
>   
> -	WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));

try_grab_page() is declared in mm.h and is therefore potentially
something that other subsystems could call--although they really
shouldn't! And here, we are simply assuming that that is enough. But in
order to be really comfortable removing this check on the basis of
"try_grab_page() is internal to mm", I think it would help to move its
declaration from mm.h, to mm/internal.h. Perhaps as a separate patch.


>   	if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
>   		return -ENOMEM;
>   
> @@ -818,7 +817,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
>   	if (vma_is_secretmem(vma))
>   		return NULL;
>   
> -	if (foll_flags & FOLL_PIN)
> +	if (WARN_ON_ONCE(foll_flags & FOLL_PIN))

OK, so we're slightly fortifying follow_page() checking, but
not at the level of is_valid_gup_args(). Should this be mentioned
in the commit description? And should the checks be more extensive?


...

> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index abe6cfd92ffa0e..eaf879c835de44 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1039,11 +1039,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
>   
>   	assert_spin_locked(pmd_lockptr(mm, pmd));
>   
> -	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
> -	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
> -			 (FOLL_PIN | FOLL_GET)))
> -		return NULL;
> -

For both follow_devmap_pmd() and follow_devmap_pud(), below, it looks like
the following external API path is left exposed (with respect to checking
gup flags):

do_mlock()
   __mm_populate()
     populate_vma_page_range()
       __get_user_pages()
          follow_page_mask()
             ...
               follow_devmap_pmd()


So I'm not sure that it's good to delete these checks without covering
that path.

>   	if (flags & FOLL_WRITE && !pmd_write(*pmd))
>   		return NULL;
>   
> @@ -1202,11 +1197,6 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
>   	if (flags & FOLL_WRITE && !pud_write(*pud))
>   		return NULL;
>   
> -	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
> -	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
> -			 (FOLL_PIN | FOLL_GET)))
> -		return NULL;
> -
>   	if (pud_present(*pud) && pud_devmap(*pud))
>   		/* pass */;
>   	else

thanks,
-- 
John Hubbard
NVIDIA


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/8] mm/gup: add an assertion that the mmap lock is locked
  2023-01-17 15:58 ` [PATCH 4/8] mm/gup: add an assertion that the mmap lock is locked Jason Gunthorpe
@ 2023-01-20  3:08   ` John Hubbard
  2023-01-20 15:44     ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: John Hubbard @ 2023-01-20  3:08 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/17/23 07:58, Jason Gunthorpe wrote:
> This is always required, but we can't have a proper unguarded assertion
> because of a shortcut in fork. So, cover as much as we can for now.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   mm/gup.c | 8 ++++++++
>   1 file changed, 8 insertions(+)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index 9e332e3f6ea8e2..d203e268793b9c 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1350,6 +1350,14 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>   			return -EAGAIN;
>   		lock_dropped = true;
>   		*locked = 1;
> +	} else if (flags & FOLL_PIN) {
> +		/*
> +		 * The mmap lock must be held when calling this function. This
> +		 * is true even for non-pin modes, but due to a shortcut in fork
> +		 * not taking the lock for the new mm we cannot check this
> +		 * comprehensively.

I get that fork doesn't lock the newly created mm. But I'm having
trouble finding the calling path from fork to __get_user_pages_locked()
that leads to this comment, can you provide a hint? Both the comment
and the commit log are rather coy about where this happens.

> +		 */
> +		mmap_assert_locked(mm);
>   	}
>   
>   	if (flags & FOLL_PIN)

thanks,
-- 
John Hubbard
NVIDIA


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/8] mm/gup: add FOLL_UNLOCK
  2023-01-19 11:18   ` Mike Rapoport
@ 2023-01-20 13:45     ` Jason Gunthorpe
  2023-01-20 14:58       ` Mike Rapoport
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-20 13:45 UTC (permalink / raw)
  To: Mike Rapoport; +Cc: Alistair Popple, John Hubbard, linux-mm

On Thu, Jan 19, 2023 at 01:18:08PM +0200, Mike Rapoport wrote:
> On Tue, Jan 17, 2023 at 11:58:36AM -0400, Jason Gunthorpe wrote:
> > Setting FOLL_UNLOCK allows GUP to lock/unlock the mmap lock on its own. It
> > is a more explicit replacement for locked != NULL. This clears the way for
> > passing in locked = 1, without intending that the lock can be unlocked.
> > 
> > Set the flag in all cases where it is used, eg locked is present in the
> > external interface or locked is used internally with locked = 0.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >  include/linux/mm.h |  1 +
> >  mm/gup.c           | 31 +++++++++++++++++++------------
> >  2 files changed, 20 insertions(+), 12 deletions(-)
> > 
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index f3f196e4d66d6f..7496a5c8acede1 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3089,6 +3089,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
> >  #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
> >  #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
> >  #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
> > +#define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock */
> 
> Please add a comment that this is an internal flag.

#define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock (internal only) */

OK?

Jason


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock()
  2023-01-19 21:19   ` John Hubbard
  2023-01-19 22:40     ` John Hubbard
@ 2023-01-20 14:47     ` Jason Gunthorpe
  1 sibling, 0 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-20 14:47 UTC (permalink / raw)
  To: John Hubbard; +Cc: Alistair Popple, linux-mm

On Thu, Jan 19, 2023 at 01:19:06PM -0800, John Hubbard wrote:
> > diff --git a/mm/gup.c b/mm/gup.c
> > index f45a3a5be53a48..3a9f764165f50b 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -1343,13 +1343,22 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
> >   						unsigned int flags)
> >   {
> >   	long ret, pages_done;
> > -	bool lock_dropped;
> > +	bool lock_dropped = false;
> 
> A small thing, but the naming and the associated comments later are now
> inaccurate. Let's add function-level documentation to explain the new
> calling convention for __get_user_pages_locked(), change the name of
> lock_dropped to must_unlock, add a comment mid-function, and correct the
> comment at the end of the function. Like this:

Done

> > @@ -3180,14 +3183,13 @@ EXPORT_SYMBOL(pin_user_pages);
> >   long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
> >   			     struct page **pages, unsigned int gup_flags)
> >   {
> > -	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
> > -	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
> > -		return -EINVAL;
> 
> Why are we dropping the assertion check for FOLL_GET?

Hmm, I think this is a rebase error, it should be in the "consolidate
invariants" patch

Thanks,
Jason


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants
  2023-01-20  2:51   ` John Hubbard
@ 2023-01-20 14:58     ` Jason Gunthorpe
  2023-01-20 18:45       ` John Hubbard
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-20 14:58 UTC (permalink / raw)
  To: John Hubbard; +Cc: Alistair Popple, linux-mm

On Thu, Jan 19, 2023 at 06:51:18PM -0800, John Hubbard wrote:
> On 1/17/23 07:58, Jason Gunthorpe wrote:
> > The GUP family of functions have a complex, but fairly well defined, set
> > of invariants for their arguments. Currently these are sprinkled about,
> > sometimes in duplicate through many functions.
> > 
> > Internally we don't follow all the invariants that the external interface
> > has to follow, so place these checks directly at the exported
> > interface. This ensures the internal functions never reach a violated
> > invariant.
> > 
> > Remove the duplicated invariant checks.
> > 
> > The end result is to make these functions fully internal:
> >   __get_user_pages_locked()
> >   internal_get_user_pages_fast()
> >   __gup_longterm_locked()
> > 
> > And all the other functions call directly into one of these.
> > 
> > Suggested-by: John Hubbard <jhubbard@nvidia.com>
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >   mm/gup.c         | 150 +++++++++++++++++++++++------------------------
> >   mm/huge_memory.c |  10 ----
> >   2 files changed, 75 insertions(+), 85 deletions(-)
> > 
> > diff --git a/mm/gup.c b/mm/gup.c
> > index 2c833f862d0354..9e332e3f6ea8e2 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -215,7 +215,6 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
> >   {
> >   	struct folio *folio = page_folio(page);
> > -	WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
> 
> try_grab_page() is declared in mm.h and is therefore potentially
> something that other subsystems could call--although they really
> shouldn't! And here, we are simply assuming that that is enough. But in
> order to be really comfortable removing this check on the basis of
> "try_grab_page() is internal to mm", I think it would help to move its
> declaration from mm.h, to mm/internal.h. Perhaps as a separate
> patch.

Yes, lets do that

> > @@ -818,7 +817,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
> >   	if (vma_is_secretmem(vma))
> >   		return NULL;
> > -	if (foll_flags & FOLL_PIN)
> > +	if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
> 
> OK, so we're slightly fortifying follow_page() checking, but
> not at the level of is_valid_gup_args(). Should this be mentioned
> in the commit description? And should the checks be more extensive?

I'd leave it, there is no reason to be too nannyish - follow_page()
isn't an exported symbol.

> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index abe6cfd92ffa0e..eaf879c835de44 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -1039,11 +1039,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
> >   	assert_spin_locked(pmd_lockptr(mm, pmd));
> > -	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
> > -	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
> > -			 (FOLL_PIN | FOLL_GET)))
> > -		return NULL;
> > -
> 
> For both follow_devmap_pmd() and follow_devmap_pud(), below, it looks like
> the following external API path is left exposed (with respect to checking
> gup flags):
> 
> do_mlock()
>   __mm_populate()
>     populate_vma_page_range()

This is in gup.c and sets the flags directly, so it is not part of the
"external interface" we should just leave it.

IMHO, the point of the checks is primarily prevent bad gup_flags from
entering gup.c, primarily from creative driver authors, not to prevent
bugs in gup.c.

Jason


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/8] mm/gup: add FOLL_UNLOCK
  2023-01-20 13:45     ` Jason Gunthorpe
@ 2023-01-20 14:58       ` Mike Rapoport
  0 siblings, 0 replies; 39+ messages in thread
From: Mike Rapoport @ 2023-01-20 14:58 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On Fri, Jan 20, 2023 at 09:45:02AM -0400, Jason Gunthorpe wrote:
> On Thu, Jan 19, 2023 at 01:18:08PM +0200, Mike Rapoport wrote:
> > On Tue, Jan 17, 2023 at 11:58:36AM -0400, Jason Gunthorpe wrote:
> > > Setting FOLL_UNLOCK allows GUP to lock/unlock the mmap lock on its own. It
> > > is a more explicit replacement for locked != NULL. This clears the way for
> > > passing in locked = 1, without intending that the lock can be unlocked.
> > > 
> > > Set the flag in all cases where it is used, eg locked is present in the
> > > external interface or locked is used internally with locked = 0.
> > > 
> > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > > ---
> > >  include/linux/mm.h |  1 +
> > >  mm/gup.c           | 31 +++++++++++++++++++------------
> > >  2 files changed, 20 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > > index f3f196e4d66d6f..7496a5c8acede1 100644
> > > --- a/include/linux/mm.h
> > > +++ b/include/linux/mm.h
> > > @@ -3089,6 +3089,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
> > >  #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
> > >  #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
> > >  #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
> > > +#define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock */
> > 
> > Please add a comment that this is an internal flag.
> 
> #define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock (internal only) */
> 
> OK?

Yep.

> Jason

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 4/8] mm/gup: add an assertion that the mmap lock is locked
  2023-01-20  3:08   ` John Hubbard
@ 2023-01-20 15:44     ` Jason Gunthorpe
  0 siblings, 0 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-20 15:44 UTC (permalink / raw)
  To: John Hubbard; +Cc: Alistair Popple, linux-mm

On Thu, Jan 19, 2023 at 07:08:08PM -0800, John Hubbard wrote:
> On 1/17/23 07:58, Jason Gunthorpe wrote:
> > This is always required, but we can't have a proper unguarded assertion
> > because of a shortcut in fork. So, cover as much as we can for now.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >   mm/gup.c | 8 ++++++++
> >   1 file changed, 8 insertions(+)
> > 
> > diff --git a/mm/gup.c b/mm/gup.c
> > index 9e332e3f6ea8e2..d203e268793b9c 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -1350,6 +1350,14 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
> >   			return -EAGAIN;
> >   		lock_dropped = true;
> >   		*locked = 1;
> > +	} else if (flags & FOLL_PIN) {
> > +		/*
> > +		 * The mmap lock must be held when calling this function. This
> > +		 * is true even for non-pin modes, but due to a shortcut in fork
> > +		 * not taking the lock for the new mm we cannot check this
> > +		 * comprehensively.
> 
> I get that fork doesn't lock the newly created mm. But I'm having
> trouble finding the calling path from fork to __get_user_pages_locked()
> that leads to this comment, can you provide a hint? Both the comment
> and the commit log are rather coy about where this happens.

Hurm, so I see this did get fixed but nobody added the assertion to
gup.c :(

Jann Horn was working on this here:

https://lore.kernel.org/linux-mm/CAG48ez1-PBCdv3y8pn-Ty-b+FmBSLwDuVKFSt8h7wARLy0dF-Q@mail.gmail.com/

However, there was never any note on the mailing list what happened to
the series..

But Andrew did take:
  b2767d97f5ff ("binfmt_elf: take the mmap lock around find_extend_vma()")
  f3964599c22f ("mm/gup_benchmark: take the mmap lock around GUP")

Then we had this other work:
  5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()")

Which actually added the assertion to find_vma(), which is called
unconditionally by gup.c:

 __get_user_pages_locked()
  __get_user_pages()
  find_extend_vma()
   find_vma()

So we've already had the assertion for a while now.

I'll update this commit to make it unconditional and note that it
already exists.

Thanks,
Jason


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants
  2023-01-20 14:58     ` Jason Gunthorpe
@ 2023-01-20 18:45       ` John Hubbard
  0 siblings, 0 replies; 39+ messages in thread
From: John Hubbard @ 2023-01-20 18:45 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/20/23 06:58, Jason Gunthorpe wrote:
>> OK, so we're slightly fortifying follow_page() checking, but
>> not at the level of is_valid_gup_args(). Should this be mentioned
>> in the commit description? And should the checks be more extensive?
> 
> I'd leave it, there is no reason to be too nannyish - follow_page()
> isn't an exported symbol.

OK, agreed.

...
>> do_mlock()
>>   __mm_populate()
>>     populate_vma_page_range()
> 
> This is in gup.c and sets the flags directly, so it is not part of the
> "external interface" we should just leave it.
> 
> IMHO, the point of the checks is primarily prevent bad gup_flags from
> entering gup.c, primarily from creative driver authors, not to prevent
> bugs in gup.c.

Yes, also sounds like the right dividing line for how far to go with
these checks. OK.


thanks,
-- 
John Hubbard
NVIDIA



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/8] mm/gup: add FOLL_UNLOCK
  2023-01-17 15:58 ` [PATCH 5/8] mm/gup: add FOLL_UNLOCK Jason Gunthorpe
  2023-01-19 11:18   ` Mike Rapoport
@ 2023-01-20 19:02   ` John Hubbard
  2023-01-23 11:37   ` David Hildenbrand
  2 siblings, 0 replies; 39+ messages in thread
From: John Hubbard @ 2023-01-20 19:02 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/17/23 07:58, Jason Gunthorpe wrote:
> Setting FOLL_UNLOCK allows GUP to lock/unlock the mmap lock on its own. It
> is a more explicit replacement for locked != NULL. This clears the way for
> passing in locked = 1, without intending that the lock can be unlocked.
> 
> Set the flag in all cases where it is used, eg locked is present in the
> external interface or locked is used internally with locked = 0.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  include/linux/mm.h |  1 +
>  mm/gup.c           | 31 +++++++++++++++++++------------
>  2 files changed, 20 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index f3f196e4d66d6f..7496a5c8acede1 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3089,6 +3089,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
>  #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
>  #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
>  #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
> +#define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock */

Looks good. Admin note: that this will conflict with commit b5054174ac7c
("mm: move FOLL_* defs to mm_types.h), which is in mm-stable. Because
the FOLL_* items were moved.

Reviewed-by: John Hubbard <jhubbard@nvidia.com>

thanks,
-- 
John Hubbard
NVIDIA

>  
>  /*
>   * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
> diff --git a/mm/gup.c b/mm/gup.c
> index d203e268793b9c..4c360fb05cf3e4 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -896,7 +896,7 @@ static int faultin_page(struct vm_area_struct *vma,
>  		fault_flags |= FAULT_FLAG_WRITE;
>  	if (*flags & FOLL_REMOTE)
>  		fault_flags |= FAULT_FLAG_REMOTE;
> -	if (locked) {
> +	if (*flags & FOLL_UNLOCK) {
>  		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
>  		/*
>  		 * FAULT_FLAG_INTERRUPTIBLE is opt-in. GUP callers must set
> @@ -1379,9 +1379,11 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  	for (;;) {
>  		ret = __get_user_pages(mm, start, nr_pages, flags, pages,
>  				       vmas, locked);
> -		if (!locked)
> +		if (!(flags & FOLL_UNLOCK)) {
>  			/* VM_FAULT_RETRY couldn't trigger, bypass */
> -			return ret;
> +			pages_done = ret;
> +			break;
> +		}
>  
>  		/* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
>  		if (!*locked) {
> @@ -2106,12 +2108,20 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
>  	 * interfaces:
>  	 * - FOLL_PIN/FOLL_TRIED/FOLL_FAST_ONLY is internal only
>  	 * - FOLL_REMOTE is internal only and used on follow_page()
> +	 * - FOLL_UNLOCK is internal only and used if locked is !NULL
>  	 */
> -	if (WARN_ON_ONCE(gup_flags & (FOLL_PIN | FOLL_TRIED |
> +	if (WARN_ON_ONCE(gup_flags & (FOLL_PIN | FOLL_TRIED | FOLL_UNLOCK |
>  				      FOLL_REMOTE | FOLL_FAST_ONLY)))
>  		return false;
>  
>  	gup_flags |= to_set;
> +	if (locked) {
> +		/* At the external interface locked must be set */
> +		if (WARN_ON_ONCE(*locked != 1))
> +			return false;
> +
> +		gup_flags |= FOLL_UNLOCK;
> +	}
>  
>  	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
>  	if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
> @@ -2126,10 +2136,6 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
>  	if (WARN_ON_ONCE((gup_flags & (FOLL_GET | FOLL_PIN)) && !pages))
>  		return false;
>  
> -	/* At the external interface locked must be set */
> -	if (WARN_ON_ONCE(locked && *locked != 1))
> -		return false;
> -
>  	/* We want to allow the pgmap to be hot-unplugged at all times */
>  	if (WARN_ON_ONCE((gup_flags & FOLL_LONGTERM) &&
>  			 (gup_flags & FOLL_PCI_P2PDMA)))
> @@ -2139,7 +2145,7 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
>  	 * Can't use VMAs with locked, as locked allows GUP to unlock
>  	 * which invalidates the vmas array
>  	 */
> -	if (WARN_ON_ONCE(vmas && locked))
> +	if (WARN_ON_ONCE(vmas && (gup_flags & FOLL_UNLOCK)))
>  		return false;
>  
>  	*gup_flags_p = gup_flags;
> @@ -2279,7 +2285,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>  {
>  	int locked = 0;
>  
> -	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_TOUCH))
> +	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
> +			       FOLL_TOUCH | FOLL_UNLOCK))
>  		return -EINVAL;
>  
>  	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
> @@ -2967,7 +2974,7 @@ static int internal_get_user_pages_fast(unsigned long start,
>  	pages += nr_pinned;
>  	ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned,
>  				    pages, NULL, &locked,
> -				    gup_flags | FOLL_TOUCH);
> +				    gup_flags | FOLL_TOUCH | FOLL_UNLOCK);
>  	if (ret < 0) {
>  		/*
>  		 * The caller has to unpin the pages we already pinned so
> @@ -3194,7 +3201,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>  	int locked = 0;
>  
>  	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
> -			       FOLL_PIN | FOLL_TOUCH))
> +			       FOLL_PIN | FOLL_TOUCH | FOLL_UNLOCK))
>  		return 0;
>  
>  	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions
  2023-01-17 15:58 ` [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions Jason Gunthorpe
@ 2023-01-20 19:19   ` John Hubbard
  2023-01-21  0:21     ` Jason Gunthorpe
  2023-01-23 11:35   ` David Hildenbrand
  1 sibling, 1 reply; 39+ messages in thread
From: John Hubbard @ 2023-01-20 19:19 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/17/23 07:58, Jason Gunthorpe wrote:
> Now that NULL locked doesn't have a special meaning we can just make it
> non-NULL in all cases and remove the special tests.
> 
> get_user_pages() and pin_user_pages() can safely pass in a locked = 1
> 
> get_user_pages_remote) and pin_user_pages_remote() can swap in a local
> variable for locked if NULL is passed.
> 
> Remove all the NULL checks.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  mm/gup.c | 30 ++++++++++++++++++++----------
>  1 file changed, 20 insertions(+), 10 deletions(-)
> 
...

Looks correct, just a few remaining comments that could be 
removed or fixed up:

> @@ -1121,7 +1121,7 @@ static long __get_user_pages(struct mm_struct *mm,
>  				i = follow_hugetlb_page(mm, vma, pages, vmas,
>  						&start, &nr_pages, i,
>  						gup_flags, locked);
> -				if (locked && *locked == 0) {
> +				if (!*locked) {


Just above this function, there is a comment that can
be adjusted to remove the reference to possibly NULL locked 
arg. This one:

 * If @locked != NULL, *@locked will be set to 0 when mmap_lock is
 * released by an up_read().  That can happen if @gup_flags does not
 * have FOLL_NOWAIT.

There's another one above populate_vma_page_range():

 * If @locked is NULL, it may be held for read or write and will
 * be unperturbed.
 *
 * If @locked is non-NULL, it must held for read only and may be
 * released.  If it's released, *@locked will be set to 0.

...and another set, above faultin_vma_page_range():

 * If @locked is NULL, it may be held for read or write and will be unperturbed.
 *
 * If @locked is non-NULL, it must held for read only and may be released.  If
 * it's released, *@locked will be set to 0.


thanks,
-- 
John Hubbard
NVIDIA



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 7/8] mm/gup: remove pin_user_pages_fast_only()
  2023-01-17 15:58 ` [PATCH 7/8] mm/gup: remove pin_user_pages_fast_only() Jason Gunthorpe
@ 2023-01-20 19:23   ` John Hubbard
  2023-01-23 11:31   ` David Hildenbrand
  1 sibling, 0 replies; 39+ messages in thread
From: John Hubbard @ 2023-01-20 19:23 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/17/23 07:58, Jason Gunthorpe wrote:
> Commit ed29c2691188 ("drm/i915: Fix userptr so we do not have to worry
> about obj->mm.lock, v7.")  removed the only caller, remove this dead code
> too.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  include/linux/mm.h |  2 --
>  mm/gup.c           | 33 ---------------------------------
>  2 files changed, 35 deletions(-)


Reviewed-by: John Hubbard <jhubbard@nvidia.com>


thanks,
-- 
John Hubbard
NVIDIA



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 8/8] mm/gup: make get_user_pages_fast_only() return the common return value
  2023-01-17 15:58 ` [PATCH 8/8] mm/gup: make get_user_pages_fast_only() return the common return value Jason Gunthorpe
@ 2023-01-20 19:27   ` John Hubbard
  2023-01-23 11:33   ` David Hildenbrand
  1 sibling, 0 replies; 39+ messages in thread
From: John Hubbard @ 2023-01-20 19:27 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, linux-mm

On 1/17/23 07:58, Jason Gunthorpe wrote:
> There are only two callers, both can handle the common return code:
> 
> - get_user_page_fast_only() checks == 1
> 
> - gfn_to_page_many_atomic() already returns -1, and the only caller
>   checks for -ve

Maybe "checks for negative return values" ?

> 
> Remove the restriction against returning negative values.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  mm/gup.c | 17 +----------------
>  1 file changed, 1 insertion(+), 16 deletions(-)
> 

Reviewed-by: John Hubbard <jhubbard@nvidia.com>


thanks,
-- 
John Hubbard
NVIDIA



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions
  2023-01-20 19:19   ` John Hubbard
@ 2023-01-21  0:21     ` Jason Gunthorpe
  0 siblings, 0 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-21  0:21 UTC (permalink / raw)
  To: John Hubbard; +Cc: Alistair Popple, linux-mm

On Fri, Jan 20, 2023 at 11:19:58AM -0800, John Hubbard wrote:
> On 1/17/23 07:58, Jason Gunthorpe wrote:
> > Now that NULL locked doesn't have a special meaning we can just make it
> > non-NULL in all cases and remove the special tests.
> > 
> > get_user_pages() and pin_user_pages() can safely pass in a locked = 1
> > 
> > get_user_pages_remote) and pin_user_pages_remote() can swap in a local
> > variable for locked if NULL is passed.
> > 
> > Remove all the NULL checks.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >  mm/gup.c | 30 ++++++++++++++++++++----------
> >  1 file changed, 20 insertions(+), 10 deletions(-)
> > 
> ...
> 
> Looks correct, just a few remaining comments that could be 
> removed or fixed up:
> 
> > @@ -1121,7 +1121,7 @@ static long __get_user_pages(struct mm_struct *mm,
> >  				i = follow_hugetlb_page(mm, vma, pages, vmas,
> >  						&start, &nr_pages, i,
> >  						gup_flags, locked);
> > -				if (locked && *locked == 0) {
> > +				if (!*locked) {
> 
> 
> Just above this function, there is a comment that can
> be adjusted to remove the reference to possibly NULL locked 
> arg. This one:
> 
>  * If @locked != NULL, *@locked will be set to 0 when mmap_lock is
>  * released by an up_read().  That can happen if @gup_flags does not
>  * have FOLL_NOWAIT.

OK

> There's another one above populate_vma_page_range():
> 
>  * If @locked is NULL, it may be held for read or write and will
>  * be unperturbed.
>  *
>  * If @locked is non-NULL, it must held for read only and may be
>  * released.  If it's released, *@locked will be set to 0.
> 
> ...and another set, above faultin_vma_page_range():
> 
>  * If @locked is NULL, it may be held for read or write and will be unperturbed.
>  *
>  * If @locked is non-NULL, it must held for read only and may be released.  If
>  * it's released, *@locked will be set to 0.

ahh, I missed these two functions, they need to set FOLL_UNLOCK and
de-null locked too. I didn't check things that were calling
__get_user_pages() :\

Thanks,
Jason


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 7/8] mm/gup: remove pin_user_pages_fast_only()
  2023-01-17 15:58 ` [PATCH 7/8] mm/gup: remove pin_user_pages_fast_only() Jason Gunthorpe
  2023-01-20 19:23   ` John Hubbard
@ 2023-01-23 11:31   ` David Hildenbrand
  1 sibling, 0 replies; 39+ messages in thread
From: David Hildenbrand @ 2023-01-23 11:31 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On 17.01.23 16:58, Jason Gunthorpe wrote:
> Commit ed29c2691188 ("drm/i915: Fix userptr so we do not have to worry
> about obj->mm.lock, v7.")  removed the only caller, remove this dead code
> too.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 8/8] mm/gup: make get_user_pages_fast_only() return the common return value
  2023-01-17 15:58 ` [PATCH 8/8] mm/gup: make get_user_pages_fast_only() return the common return value Jason Gunthorpe
  2023-01-20 19:27   ` John Hubbard
@ 2023-01-23 11:33   ` David Hildenbrand
  1 sibling, 0 replies; 39+ messages in thread
From: David Hildenbrand @ 2023-01-23 11:33 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On 17.01.23 16:58, Jason Gunthorpe wrote:
> There are only two callers, both can handle the common return code:
> 
> - get_user_page_fast_only() checks == 1
> 
> - gfn_to_page_many_atomic() already returns -1, and the only caller
>    checks for -ve
> 
> Remove the restriction against returning negative values.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions
  2023-01-17 15:58 ` [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions Jason Gunthorpe
  2023-01-20 19:19   ` John Hubbard
@ 2023-01-23 11:35   ` David Hildenbrand
  2023-01-23 12:44     ` Jason Gunthorpe
  1 sibling, 1 reply; 39+ messages in thread
From: David Hildenbrand @ 2023-01-23 11:35 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On 17.01.23 16:58, Jason Gunthorpe wrote:
> Now that NULL locked doesn't have a special meaning we can just make it
> non-NULL in all cases and remove the special tests.
> 
> get_user_pages() and pin_user_pages() can safely pass in a locked = 1
> 
> get_user_pages_remote) and pin_user_pages_remote() can swap in a local
> variable for locked if NULL is passed.
> 
> Remove all the NULL checks.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   mm/gup.c | 30 ++++++++++++++++++++----------
>   1 file changed, 20 insertions(+), 10 deletions(-)

... doesn't really look like a real cleanup. Especially with the

  2 files changed, 20 insertions(+), 12 deletions(-)

on the previous patch and a new internal flag ....

What's the benefit?

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/8] mm/gup: add FOLL_UNLOCK
  2023-01-17 15:58 ` [PATCH 5/8] mm/gup: add FOLL_UNLOCK Jason Gunthorpe
  2023-01-19 11:18   ` Mike Rapoport
  2023-01-20 19:02   ` John Hubbard
@ 2023-01-23 11:37   ` David Hildenbrand
  2023-01-23 17:58     ` Jason Gunthorpe
  2 siblings, 1 reply; 39+ messages in thread
From: David Hildenbrand @ 2023-01-23 11:37 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On 17.01.23 16:58, Jason Gunthorpe wrote:
> Setting FOLL_UNLOCK allows GUP to lock/unlock the mmap lock on its own. It
> is a more explicit replacement for locked != NULL. This clears the way for
> passing in locked = 1, without intending that the lock can be unlocked.
> 
> Set the flag in all cases where it is used, eg locked is present in the
> external interface or locked is used internally with locked = 0.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   include/linux/mm.h |  1 +
>   mm/gup.c           | 31 +++++++++++++++++++------------
>   2 files changed, 20 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index f3f196e4d66d6f..7496a5c8acede1 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3089,6 +3089,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
>   #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
>   #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
>   #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
> +#define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock */

So it's more like "FOLL_UNLOCKABLE" ? FOLL_UNLOCK sounds like an 
instruction instead.

Not that I particularly like adding new internal FOLL flags where 
avoidable :P


-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions
  2023-01-23 11:35   ` David Hildenbrand
@ 2023-01-23 12:44     ` Jason Gunthorpe
  2023-01-23 12:59       ` David Hildenbrand
  0 siblings, 1 reply; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-23 12:44 UTC (permalink / raw)
  To: David Hildenbrand; +Cc: Alistair Popple, John Hubbard, linux-mm

On Mon, Jan 23, 2023 at 12:35:28PM +0100, David Hildenbrand wrote:
> On 17.01.23 16:58, Jason Gunthorpe wrote:
> > Now that NULL locked doesn't have a special meaning we can just make it
> > non-NULL in all cases and remove the special tests.
> > 
> > get_user_pages() and pin_user_pages() can safely pass in a locked = 1
> > 
> > get_user_pages_remote) and pin_user_pages_remote() can swap in a local
> > variable for locked if NULL is passed.
> > 
> > Remove all the NULL checks.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >   mm/gup.c | 30 ++++++++++++++++++++----------
> >   1 file changed, 20 insertions(+), 10 deletions(-)
> 
> ... doesn't really look like a real cleanup. Especially with the
> 
>  2 files changed, 20 insertions(+), 12 deletions(-)
> 
> on the previous patch and a new internal flag ....
> 
> What's the benefit?

There are all kinds of unnecessary branches on the faster paths,
inside loops, etc to check for NULL when all we really needed was a
single bit flag.

It isalos much clearer to understand that a FOLL flag changes the
behavior of how GUP works rather than some weirdo NULL argument.

Jason


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions
  2023-01-23 12:44     ` Jason Gunthorpe
@ 2023-01-23 12:59       ` David Hildenbrand
  2023-01-23 18:07         ` Jason Gunthorpe
  0 siblings, 1 reply; 39+ messages in thread
From: David Hildenbrand @ 2023-01-23 12:59 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On 23.01.23 13:44, Jason Gunthorpe wrote:
> On Mon, Jan 23, 2023 at 12:35:28PM +0100, David Hildenbrand wrote:
>> On 17.01.23 16:58, Jason Gunthorpe wrote:
>>> Now that NULL locked doesn't have a special meaning we can just make it
>>> non-NULL in all cases and remove the special tests.
>>>
>>> get_user_pages() and pin_user_pages() can safely pass in a locked = 1
>>>
>>> get_user_pages_remote) and pin_user_pages_remote() can swap in a local
>>> variable for locked if NULL is passed.
>>>
>>> Remove all the NULL checks.
>>>
>>> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
>>> ---
>>>    mm/gup.c | 30 ++++++++++++++++++++----------
>>>    1 file changed, 20 insertions(+), 10 deletions(-)
>>
>> ... doesn't really look like a real cleanup. Especially with the
>>
>>   2 files changed, 20 insertions(+), 12 deletions(-)
>>
>> on the previous patch and a new internal flag ....
>>
>> What's the benefit?
> 
> There are all kinds of unnecessary branches on the faster paths,
> inside loops, etc to check for NULL when all we really needed was a
> single bit flag.

... call me skeptical that this optimization matters in practice ;)

> 
> It isalos much clearer to understand that a FOLL flag changes the
> behavior of how GUP works rather than some weirdo NULL argument.

The whole "locked" parameter handling is weird. I don't think adding a 
new FOLL_ internal flag on top particularly improves the situation ... 
at least before, the logic around that "locked" parameter handling was 
self-contained. Self-contained weirdness.

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/8] mm/gup: add FOLL_UNLOCK
  2023-01-23 11:37   ` David Hildenbrand
@ 2023-01-23 17:58     ` Jason Gunthorpe
  2023-01-23 22:22       ` John Hubbard
  2023-01-24 13:08       ` David Hildenbrand
  0 siblings, 2 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-23 17:58 UTC (permalink / raw)
  To: David Hildenbrand; +Cc: Alistair Popple, John Hubbard, linux-mm

On Mon, Jan 23, 2023 at 12:37:17PM +0100, David Hildenbrand wrote:
> On 17.01.23 16:58, Jason Gunthorpe wrote:
> > Setting FOLL_UNLOCK allows GUP to lock/unlock the mmap lock on its own. It
> > is a more explicit replacement for locked != NULL. This clears the way for
> > passing in locked = 1, without intending that the lock can be unlocked.
> > 
> > Set the flag in all cases where it is used, eg locked is present in the
> > external interface or locked is used internally with locked = 0.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >   include/linux/mm.h |  1 +
> >   mm/gup.c           | 31 +++++++++++++++++++------------
> >   2 files changed, 20 insertions(+), 12 deletions(-)
> > 
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index f3f196e4d66d6f..7496a5c8acede1 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3089,6 +3089,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
> >   #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
> >   #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
> >   #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
> > +#define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock */
> 
> So it's more like "FOLL_UNLOCKABLE" ? FOLL_UNLOCK sounds like an instruction
> instead.

Sure, I renamed it

> Not that I particularly like adding new internal FOLL flags where avoidable
> :P

How about, I made a patch to move all the internal FOLL_ flags into
mm/internal.h ?

Jason


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions
  2023-01-23 12:59       ` David Hildenbrand
@ 2023-01-23 18:07         ` Jason Gunthorpe
  0 siblings, 0 replies; 39+ messages in thread
From: Jason Gunthorpe @ 2023-01-23 18:07 UTC (permalink / raw)
  To: David Hildenbrand; +Cc: Alistair Popple, John Hubbard, linux-mm

On Mon, Jan 23, 2023 at 01:59:55PM +0100, David Hildenbrand wrote:
> On 23.01.23 13:44, Jason Gunthorpe wrote:
> > On Mon, Jan 23, 2023 at 12:35:28PM +0100, David Hildenbrand wrote:
> > > On 17.01.23 16:58, Jason Gunthorpe wrote:
> > > > Now that NULL locked doesn't have a special meaning we can just make it
> > > > non-NULL in all cases and remove the special tests.
> > > > 
> > > > get_user_pages() and pin_user_pages() can safely pass in a locked = 1
> > > > 
> > > > get_user_pages_remote) and pin_user_pages_remote() can swap in a local
> > > > variable for locked if NULL is passed.
> > > > 
> > > > Remove all the NULL checks.
> > > > 
> > > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > > > ---
> > > >    mm/gup.c | 30 ++++++++++++++++++++----------
> > > >    1 file changed, 20 insertions(+), 10 deletions(-)
> > > 
> > > ... doesn't really look like a real cleanup. Especially with the
> > > 
> > >   2 files changed, 20 insertions(+), 12 deletions(-)
> > > 
> > > on the previous patch and a new internal flag ....
> > > 
> > > What's the benefit?
> > 
> > There are all kinds of unnecessary branches on the faster paths,
> > inside loops, etc to check for NULL when all we really needed was a
> > single bit flag.
> 
> ... call me skeptical that this optimization matters in practice ;)

Oh no doubt, just noting that it isn't just about line count.

> > It isalos much clearer to understand that a FOLL flag changes the
> > behavior of how GUP works rather than some weirdo NULL argument.
> 
> The whole "locked" parameter handling is weird. I don't think adding a new
> FOLL_ internal flag on top particularly improves the situation ... at least
> before, the logic around that "locked" parameter handling was
> self-contained. Self-contained weirdness.

Well it has become less self contained now. Locked is sprinkled
everywhere, and the main purpose of locked has now changed to
primarily be about managing the mmap_sem, ie we now lock it
automatically for *locked=0

So the special meaning of 'you can unlock while guppping' has been
really obscured. With FOLL_UNLOCKABLE it is clear, search on that and
you will find the few users and the one place that implements it.

Jason


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/8] mm/gup: add FOLL_UNLOCK
  2023-01-23 17:58     ` Jason Gunthorpe
@ 2023-01-23 22:22       ` John Hubbard
  2023-01-24 13:08       ` David Hildenbrand
  1 sibling, 0 replies; 39+ messages in thread
From: John Hubbard @ 2023-01-23 22:22 UTC (permalink / raw)
  To: Jason Gunthorpe, David Hildenbrand; +Cc: Alistair Popple, linux-mm

On 1/23/23 09:58, Jason Gunthorpe wrote:
...
> How about, I made a patch to move all the internal FOLL_ flags into
> mm/internal.h ?

Yes, after thinking through the internal and external APIs, and the
FOLL_* situation, that sounds like the best overall approach right now,
IMHO.

I also considered whether adding some naming hints on top (suffixes such
as _INTERNAL) might help, but so far, it looks like it would add more
clutter than is justified. Instead, just looking at where the FOLL_*
flag is defined seems like it could be enough. And there aren't *too*
many flags, after all.

thanks,
-- 
John Hubbard
NVIDIA


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 5/8] mm/gup: add FOLL_UNLOCK
  2023-01-23 17:58     ` Jason Gunthorpe
  2023-01-23 22:22       ` John Hubbard
@ 2023-01-24 13:08       ` David Hildenbrand
  1 sibling, 0 replies; 39+ messages in thread
From: David Hildenbrand @ 2023-01-24 13:08 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Alistair Popple, John Hubbard, linux-mm

On 23.01.23 18:58, Jason Gunthorpe wrote:
> On Mon, Jan 23, 2023 at 12:37:17PM +0100, David Hildenbrand wrote:
>> On 17.01.23 16:58, Jason Gunthorpe wrote:
>>> Setting FOLL_UNLOCK allows GUP to lock/unlock the mmap lock on its own. It
>>> is a more explicit replacement for locked != NULL. This clears the way for
>>> passing in locked = 1, without intending that the lock can be unlocked.
>>>
>>> Set the flag in all cases where it is used, eg locked is present in the
>>> external interface or locked is used internally with locked = 0.
>>>
>>> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
>>> ---
>>>    include/linux/mm.h |  1 +
>>>    mm/gup.c           | 31 +++++++++++++++++++------------
>>>    2 files changed, 20 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>>> index f3f196e4d66d6f..7496a5c8acede1 100644
>>> --- a/include/linux/mm.h
>>> +++ b/include/linux/mm.h
>>> @@ -3089,6 +3089,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
>>>    #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
>>>    #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
>>>    #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
>>> +#define FOLL_UNLOCK	0x400000 /* allow unlocking the mmap lock */
>>
>> So it's more like "FOLL_UNLOCKABLE" ? FOLL_UNLOCK sounds like an instruction
>> instead.
> 
> Sure, I renamed it
> 
>> Not that I particularly like adding new internal FOLL flags where avoidable
>> :P
> 
> How about, I made a patch to move all the internal FOLL_ flags into
> mm/internal.h ?

That would be a significant improvement. Maybe we could compress each 
set and move the internal ones to the highest bits.

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2023-01-24 13:08 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-17 15:58 [PATCH 0/8] Simplify the external interface for GUP Jason Gunthorpe
2023-01-17 15:58 ` [PATCH 1/8] mm/gup: have internal functions get the mmap_read_lock() Jason Gunthorpe
2023-01-19 11:16   ` Mike Rapoport
2023-01-19 21:19   ` John Hubbard
2023-01-19 22:40     ` John Hubbard
2023-01-20 14:47     ` Jason Gunthorpe
2023-01-17 15:58 ` [PATCH 2/8] mm/gup: don't call __gup_longterm_locked() if FOLL_LONGTERM cannot be set Jason Gunthorpe
2023-01-19 22:24   ` John Hubbard
2023-01-17 15:58 ` [PATCH 3/8] mm/gup: simplify the external interface functions and consolidate invariants Jason Gunthorpe
2023-01-19 11:17   ` Mike Rapoport
2023-01-20  2:51   ` John Hubbard
2023-01-20 14:58     ` Jason Gunthorpe
2023-01-20 18:45       ` John Hubbard
2023-01-17 15:58 ` [PATCH 4/8] mm/gup: add an assertion that the mmap lock is locked Jason Gunthorpe
2023-01-20  3:08   ` John Hubbard
2023-01-20 15:44     ` Jason Gunthorpe
2023-01-17 15:58 ` [PATCH 5/8] mm/gup: add FOLL_UNLOCK Jason Gunthorpe
2023-01-19 11:18   ` Mike Rapoport
2023-01-20 13:45     ` Jason Gunthorpe
2023-01-20 14:58       ` Mike Rapoport
2023-01-20 19:02   ` John Hubbard
2023-01-23 11:37   ` David Hildenbrand
2023-01-23 17:58     ` Jason Gunthorpe
2023-01-23 22:22       ` John Hubbard
2023-01-24 13:08       ` David Hildenbrand
2023-01-17 15:58 ` [PATCH 6/8] mm/gup: make locked never NULL in the internal GUP functions Jason Gunthorpe
2023-01-20 19:19   ` John Hubbard
2023-01-21  0:21     ` Jason Gunthorpe
2023-01-23 11:35   ` David Hildenbrand
2023-01-23 12:44     ` Jason Gunthorpe
2023-01-23 12:59       ` David Hildenbrand
2023-01-23 18:07         ` Jason Gunthorpe
2023-01-17 15:58 ` [PATCH 7/8] mm/gup: remove pin_user_pages_fast_only() Jason Gunthorpe
2023-01-20 19:23   ` John Hubbard
2023-01-23 11:31   ` David Hildenbrand
2023-01-17 15:58 ` [PATCH 8/8] mm/gup: make get_user_pages_fast_only() return the common return value Jason Gunthorpe
2023-01-20 19:27   ` John Hubbard
2023-01-23 11:33   ` David Hildenbrand
2023-01-19 11:18 ` [PATCH 0/8] Simplify the external interface for GUP Mike Rapoport

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.