All of lore.kernel.org
 help / color / mirror / Atom feed
From: John Hubbard <jhubbard@nvidia.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <axboe@kernel.dk>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Miklos Szeredi <miklos@szeredi.hu>,
	Christoph Hellwig <hch@infradead.org>,
	"Darrick J . Wong" <djwong@kernel.org>,
	Trond Myklebust <trond.myklebust@hammerspace.com>,
	Anna Schumaker <anna@kernel.org>, Jan Kara <jack@suse.cz>,
	David Hildenbrand <david@redhat.com>,
	Logan Gunthorpe <logang@deltatee.com>,
	<linux-block@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>,
	<linux-xfs@vger.kernel.org>, <linux-nfs@vger.kernel.org>,
	<linux-mm@kvack.org>, LKML <linux-kernel@vger.kernel.org>,
	John Hubbard <jhubbard@nvidia.com>
Subject: [PATCH v2 4/7] iov_iter: new iov_iter_pin_pages*() routines
Date: Tue, 30 Aug 2022 21:18:40 -0700	[thread overview]
Message-ID: <20220831041843.973026-5-jhubbard@nvidia.com> (raw)
In-Reply-To: <20220831041843.973026-1-jhubbard@nvidia.com>

Provide two new wrapper routines that are intended for user space pages
only:

    iov_iter_pin_pages()
    iov_iter_pin_pages_alloc()

Internally, these routines call pin_user_pages_fast(), instead of
get_user_pages_fast(), for user_backed_iter(i) and iov_iter_bvec(i)
cases.

As always, callers must use unpin_user_pages() or a suitable FOLL_PIN
variant, to release the pages, if they actually were acquired via
pin_user_pages_fast().

This is a prerequisite to converting bio/block layers over to use
pin_user_pages_fast().

Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 include/linux/uio.h |  4 +++
 lib/iov_iter.c      | 86 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 5896af36199c..e26908e443d1 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -251,6 +251,10 @@ ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
+ssize_t iov_iter_pin_pages(struct iov_iter *i, struct page **pages,
+			size_t maxsize, unsigned int maxpages, size_t *start);
+ssize_t iov_iter_pin_pages_alloc(struct iov_iter *i, struct page ***pages,
+			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
 void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 4b7fce72e3e5..c63ce0eadfcb 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1425,9 +1425,31 @@ static struct page *first_bvec_segment(const struct iov_iter *i,
 	return page;
 }
 
+enum pages_alloc_internal_flags {
+	USE_FOLL_GET,
+	MAYBE_USE_FOLL_PIN
+};
+
+/*
+ * Pins pages, either via get_page(), or via pin_user_page*(). The caller is
+ * responsible for tracking which pinning mechanism was used here, and releasing
+ * pages via the appropriate call: put_page() or unpin_user_page().
+ *
+ * The way to figure that out is:
+ *
+ *     a) If how_to_pin == FOLL_GET, then this routine will always pin via
+ *        get_page().
+ *
+ *     b) If how_to_pin == MAYBE_USE_FOLL_PIN, then this routine will pin via
+ *          pin_user_page*() for either user_backed_iter(i) cases, or
+ *          iov_iter_is_bvec(i) cases. However, for the other cases (pipe,
+ *          xarray), pages will be pinned via get_page().
+ */
 static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
-		   unsigned int maxpages, size_t *start)
+		   unsigned int maxpages, size_t *start,
+		   enum pages_alloc_internal_flags how_to_pin)
+
 {
 	unsigned int n;
 
@@ -1454,7 +1476,12 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		n = want_pages_array(pages, maxsize, *start, maxpages);
 		if (!n)
 			return -ENOMEM;
-		res = get_user_pages_fast(addr, n, gup_flags, *pages);
+
+		if (how_to_pin == MAYBE_USE_FOLL_PIN)
+			res = pin_user_pages_fast(addr, n, gup_flags, *pages);
+		else
+			res = get_user_pages_fast(addr, n, gup_flags, *pages);
+
 		if (unlikely(res <= 0))
 			return res;
 		maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - *start);
@@ -1470,8 +1497,13 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		if (!n)
 			return -ENOMEM;
 		p = *pages;
-		for (int k = 0; k < n; k++)
-			get_page(p[k] = page + k);
+		for (int k = 0; k < n; k++) {
+			p[k] = page + k;
+			if (how_to_pin == MAYBE_USE_FOLL_PIN)
+				pin_user_page(p[k]);
+			else
+				get_page(p[k]);
+		}
 		maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start);
 		i->count -= maxsize;
 		i->iov_offset += maxsize;
@@ -1497,10 +1529,29 @@ ssize_t iov_iter_get_pages2(struct iov_iter *i,
 		return 0;
 	BUG_ON(!pages);
 
-	return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start);
+	return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start,
+					  USE_FOLL_GET);
 }
 EXPORT_SYMBOL(iov_iter_get_pages2);
 
+/*
+ * A FOLL_PIN variant that calls pin_user_pages_fast() instead of
+ * get_user_pages_fast().
+ */
+ssize_t iov_iter_pin_pages(struct iov_iter *i,
+		   struct page **pages, size_t maxsize, unsigned int maxpages,
+		   size_t *start)
+{
+	if (!maxpages)
+		return 0;
+	if (WARN_ON_ONCE(!pages))
+		return -EINVAL;
+
+	return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start,
+					  MAYBE_USE_FOLL_PIN);
+}
+EXPORT_SYMBOL(iov_iter_pin_pages);
+
 ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   size_t *start)
@@ -1509,7 +1560,8 @@ ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,
 
 	*pages = NULL;
 
-	len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start);
+	len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start,
+					 USE_FOLL_GET);
 	if (len <= 0) {
 		kvfree(*pages);
 		*pages = NULL;
@@ -1518,6 +1570,28 @@ ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,
 }
 EXPORT_SYMBOL(iov_iter_get_pages_alloc2);
 
+/*
+ * A FOLL_PIN variant that calls pin_user_pages_fast() instead of
+ * get_user_pages_fast().
+ */
+ssize_t iov_iter_pin_pages_alloc(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	ssize_t len;
+
+	*pages = NULL;
+
+	len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start,
+					 MAYBE_USE_FOLL_PIN);
+	if (len <= 0) {
+		kvfree(*pages);
+		*pages = NULL;
+	}
+	return len;
+}
+EXPORT_SYMBOL(iov_iter_pin_pages_alloc);
+
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 			       struct iov_iter *i)
 {
-- 
2.37.2


  parent reply	other threads:[~2022-08-31  4:19 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-31  4:18 [PATCH v2 0/7] convert most filesystems to pin_user_pages_fast() John Hubbard
2022-08-31  4:18 ` [PATCH v2 1/7] mm: change release_pages() to use unsigned long for npages John Hubbard
2022-08-31  4:18 ` [PATCH v2 2/7] mm/gup: introduce pin_user_page() John Hubbard
2022-09-06  6:37   ` Christoph Hellwig
2022-09-06  7:12     ` John Hubbard
2022-08-31  4:18 ` [PATCH v2 3/7] block: add dio_w_*() wrappers for pin, unpin user pages John Hubbard
2022-08-31  4:18 ` John Hubbard [this message]
2022-09-01  0:42   ` [PATCH v2 4/7] iov_iter: new iov_iter_pin_pages*() routines Al Viro
2022-09-01  1:48     ` John Hubbard
2022-09-06  6:47   ` Christoph Hellwig
2022-09-06  7:44     ` John Hubbard
2022-09-06  7:48       ` Christoph Hellwig
2022-09-06  7:58         ` John Hubbard
2022-09-07  8:50           ` Christoph Hellwig
2022-09-06 10:21         ` Jan Kara
2022-09-07  8:45           ` Christoph Hellwig
2022-09-14  3:51             ` Al Viro
2022-09-14 14:52               ` Jan Kara
2022-09-14 16:42                 ` Al Viro
2022-09-15  8:16                   ` Jan Kara
2022-09-16  1:55                     ` Al Viro
2022-09-20  5:02                       ` Al Viro
2022-09-22 14:36                         ` Christoph Hellwig
2022-09-22 14:43                           ` David Hildenbrand
2022-09-22 14:45                             ` Christoph Hellwig
2022-09-22  2:22                     ` Al Viro
2022-09-22  6:09                       ` John Hubbard
2022-09-22 11:29                         ` Jan Kara
2022-09-23  3:19                           ` Al Viro
2022-09-23  4:05                             ` John Hubbard
2022-09-23  8:39                               ` Christoph Hellwig
2022-09-23 12:22                               ` Jan Kara
2022-09-23  4:34                           ` John Hubbard
2022-09-22 14:38                       ` Christoph Hellwig
2022-09-23  4:22                         ` Al Viro
2022-09-23  8:44                           ` Christoph Hellwig
2022-09-23 16:13                             ` Al Viro
2022-09-26 15:53                               ` Christoph Hellwig
2022-09-26 19:55                                 ` Al Viro
2022-09-22 14:31               ` Christoph Hellwig
2022-09-22 14:36                 ` Al Viro
2022-08-31  4:18 ` [PATCH v2 5/7] block, bio, fs: convert most filesystems to pin_user_pages_fast() John Hubbard
2022-09-06  6:48   ` Christoph Hellwig
2022-09-06  7:15     ` John Hubbard
2022-08-31  4:18 ` [PATCH v2 6/7] NFS: direct-io: convert to FOLL_PIN pages John Hubbard
2022-09-06  6:49   ` Christoph Hellwig
2022-09-06  7:16     ` John Hubbard
2022-08-31  4:18 ` [PATCH v2 7/7] fuse: convert direct IO paths to use FOLL_PIN John Hubbard
2022-08-31 10:37   ` Miklos Szeredi
2022-09-01  1:33     ` John Hubbard
2022-09-06  6:36 ` [PATCH v2 0/7] convert most filesystems to pin_user_pages_fast() Christoph Hellwig
2022-09-06  7:10   ` John Hubbard
2022-09-06  7:22     ` Christoph Hellwig
2022-09-06  7:37       ` John Hubbard
2022-09-06  7:46         ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220831041843.973026-5-jhubbard@nvidia.com \
    --to=jhubbard@nvidia.com \
    --cc=akpm@linux-foundation.org \
    --cc=anna@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=david@redhat.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=logang@deltatee.com \
    --cc=miklos@szeredi.hu \
    --cc=trond.myklebust@hammerspace.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.