All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: dhowells@redhat.com, Christoph Hellwig <hch@infradead.org>,
	Matthew Wilcox <willy@infradead.org>,
	Jens Axboe <axboe@kernel.dk>, Jan Kara <jack@suse.cz>,
	Jeff Layton <jlayton@kernel.org>,
	Logan Gunthorpe <logang@deltatee.com>,
	linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH v6 06/34] iov_iter: Use the direction in the iterator functions
Date: Mon, 16 Jan 2023 23:08:44 +0000	[thread overview]
Message-ID: <167391052497.2311931.9463379582932734164.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <167391047703.2311931.8115712773222260073.stgit@warthog.procyon.org.uk>

Use the direction in the iterator functions rather than READ/WRITE.

Add a check into __iov_iter_get_pages_alloc() that the supplied
FOLL_SOURCE/DEST_BUF gup_flag matches the ITER_SOURCE/DEST flag on the
iterator.

Changes
=======
ver #6)
 - Add a check on FOLL_SOURCE/DEST_BUF into __iov_iter_get_pages_alloc()

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Al Viro <viro@zeniv.linux.org.uk>

Link: https://lore.kernel.org/r/167305162465.1521586.18077838937455153675.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/167344727112.2425628.995771894170560721.stgit@warthog.procyon.org.uk/ # v5
---

 include/linux/uio.h |   22 +--
 lib/iov_iter.c      |  409 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 396 insertions(+), 35 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 8d0dabfcb2fe..18b64068cc6d 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -256,16 +256,16 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
 			unsigned len_mask);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
-void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
+void iov_iter_init(struct iov_iter *i, enum iter_dir direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
-void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
+void iov_iter_kvec(struct iov_iter *i, enum iter_dir direction, const struct kvec *kvec,
 			unsigned long nr_segs, size_t count);
-void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
+void iov_iter_bvec(struct iov_iter *i, enum iter_dir direction, const struct bio_vec *bvec,
 			unsigned long nr_segs, size_t count);
-void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
+void iov_iter_pipe(struct iov_iter *i, enum iter_dir direction, struct pipe_inode_info *pipe,
 			size_t count);
-void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
-void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
+void iov_iter_discard(struct iov_iter *i, enum iter_dir direction, size_t count);
+void iov_iter_xarray(struct iov_iter *i, enum iter_dir direction, struct xarray *xarray,
 		     loff_t start, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 		size_t maxsize, unsigned maxpages, size_t *start,
@@ -351,19 +351,19 @@ size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 struct iovec *iovec_from_user(const struct iovec __user *uvector,
 		unsigned long nr_segs, unsigned long fast_segs,
 		struct iovec *fast_iov, bool compat);
-ssize_t import_iovec(int type, const struct iovec __user *uvec,
+ssize_t import_iovec(enum iter_dir direction, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
 		 struct iov_iter *i);
-ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+ssize_t __import_iovec(enum iter_dir direction, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
 		 struct iov_iter *i, bool compat);
-int import_single_range(int type, void __user *buf, size_t len,
+int import_single_range(enum iter_dir direction, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i);
 
-static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
+static inline void iov_iter_ubuf(struct iov_iter *i, enum iter_dir direction,
 			void __user *buf, size_t count)
 {
-	WARN_ON(direction & ~(READ | WRITE));
+	WARN_ON(!iov_iter_dir_valid(direction));
 	*i = (struct iov_iter) {
 		.iter_type = ITER_UBUF,
 		.user_backed = true,
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ca89ffa9d6e1..6436438bf46b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -421,11 +421,11 @@ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
 }
 EXPORT_SYMBOL(fault_in_iov_iter_writeable);
 
-void iov_iter_init(struct iov_iter *i, unsigned int direction,
+void iov_iter_init(struct iov_iter *i, enum iter_dir direction,
 			const struct iovec *iov, unsigned long nr_segs,
 			size_t count)
 {
-	WARN_ON(direction & ~(READ | WRITE));
+	WARN_ON(!iov_iter_dir_valid(direction));
 	*i = (struct iov_iter) {
 		.iter_type = ITER_IOVEC,
 		.nofault = false,
@@ -994,11 +994,11 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
 
-void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
+void iov_iter_kvec(struct iov_iter *i, enum iter_dir direction,
 			const struct kvec *kvec, unsigned long nr_segs,
 			size_t count)
 {
-	WARN_ON(direction & ~(READ | WRITE));
+	WARN_ON(!iov_iter_dir_valid(direction));
 	*i = (struct iov_iter){
 		.iter_type = ITER_KVEC,
 		.data_source = direction,
@@ -1010,11 +1010,11 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_kvec);
 
-void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
+void iov_iter_bvec(struct iov_iter *i, enum iter_dir direction,
 			const struct bio_vec *bvec, unsigned long nr_segs,
 			size_t count)
 {
-	WARN_ON(direction & ~(READ | WRITE));
+	WARN_ON(!iov_iter_dir_valid(direction));
 	*i = (struct iov_iter){
 		.iter_type = ITER_BVEC,
 		.data_source = direction,
@@ -1026,15 +1026,15 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_bvec);
 
-void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
+void iov_iter_pipe(struct iov_iter *i, enum iter_dir direction,
 			struct pipe_inode_info *pipe,
 			size_t count)
 {
-	BUG_ON(direction != READ);
+	BUG_ON(direction != ITER_DEST);
 	WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
 	*i = (struct iov_iter){
 		.iter_type = ITER_PIPE,
-		.data_source = false,
+		.data_source = ITER_DEST,
 		.pipe = pipe,
 		.head = pipe->head,
 		.start_head = pipe->head,
@@ -1057,10 +1057,10 @@ EXPORT_SYMBOL(iov_iter_pipe);
  * from evaporation, either by taking a ref on them or locking them by the
  * caller.
  */
-void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
+void iov_iter_xarray(struct iov_iter *i, enum iter_dir direction,
 		     struct xarray *xarray, loff_t start, size_t count)
 {
-	BUG_ON(direction & ~1);
+	WARN_ON(!iov_iter_dir_valid(direction));
 	*i = (struct iov_iter) {
 		.iter_type = ITER_XARRAY,
 		.data_source = direction,
@@ -1079,14 +1079,14 @@ EXPORT_SYMBOL(iov_iter_xarray);
  * @count: The size of the I/O buffer in bytes.
  *
  * Set up an I/O iterator that just discards everything that's written to it.
- * It's only available as a READ iterator.
+ * It's only available as a destination iterator.
  */
-void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
+void iov_iter_discard(struct iov_iter *i, enum iter_dir direction, size_t count)
 {
-	BUG_ON(direction != READ);
+	BUG_ON(direction != ITER_DEST);
 	*i = (struct iov_iter){
 		.iter_type = ITER_DISCARD,
-		.data_source = false,
+		.data_source = ITER_DEST,
 		.count = count,
 		.iov_offset = 0
 	};
@@ -1444,10 +1444,10 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		maxsize = MAX_RW_COUNT;
 
 	if (WARN_ON_ONCE((gup_flags & FOLL_BUF_MASK) == FOLL_SOURCE_BUF &&
-			 i->data_source == ITER_DEST))
+			 iov_iter_is_dest(i)))
 		return -EIO;
 	if (WARN_ON_ONCE((gup_flags & FOLL_BUF_MASK) == FOLL_DEST_BUF &&
-			 i->data_source == ITER_SOURCE))
+			 iov_iter_is_source(i)))
 		return -EIO;
 
 	if (likely(user_backed_iter(i))) {
@@ -1775,7 +1775,7 @@ struct iovec *iovec_from_user(const struct iovec __user *uvec,
 	return iov;
 }
 
-ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+ssize_t __import_iovec(enum iter_dir direction, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
 		 struct iov_iter *i, bool compat)
 {
@@ -1814,7 +1814,7 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
 		total_len += len;
 	}
 
-	iov_iter_init(i, type, iov, nr_segs, total_len);
+	iov_iter_init(i, direction, iov, nr_segs, total_len);
 	if (iov == *iovp)
 		*iovp = NULL;
 	else
@@ -1827,7 +1827,7 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
  *     into the kernel, check that it is valid, and initialize a new
  *     &struct iov_iter iterator to access it.
  *
- * @type: One of %READ or %WRITE.
+ * @direction: One of %ITER_SOURCE or %ITER_DEST.
  * @uvec: Pointer to the userspace array.
  * @nr_segs: Number of elements in userspace array.
  * @fast_segs: Number of elements in @iov.
@@ -1844,16 +1844,16 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
  *
  * Return: Negative error code on error, bytes imported on success
  */
-ssize_t import_iovec(int type, const struct iovec __user *uvec,
+ssize_t import_iovec(enum iter_dir direction, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs,
 		 struct iovec **iovp, struct iov_iter *i)
 {
-	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
+	return __import_iovec(direction, uvec, nr_segs, fast_segs, iovp, i,
 			      in_compat_syscall());
 }
 EXPORT_SYMBOL(import_iovec);
 
-int import_single_range(int rw, void __user *buf, size_t len,
+int import_single_range(enum iter_dir direction, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i)
 {
 	if (len > MAX_RW_COUNT)
@@ -1863,7 +1863,7 @@ int import_single_range(int rw, void __user *buf, size_t len,
 
 	iov->iov_base = buf;
 	iov->iov_len = len;
-	iov_iter_init(i, rw, iov, 1, len);
+	iov_iter_init(i, direction, iov, 1, len);
 	return 0;
 }
 EXPORT_SYMBOL(import_single_range);
@@ -1905,3 +1905,364 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
 		i->iov -= state->nr_segs - i->nr_segs;
 	i->nr_segs = state->nr_segs;
 }
+
+/*
+ * Extract a list of contiguous pages from an ITER_PIPE iterator.  This does
+ * not get references of its own on the pages, nor does it get a pin on them.
+ * If there's a partial page, it adds that first and will then allocate and add
+ * pages into the pipe to make up the buffer space to the amount required.
+ *
+ * The caller must hold the pipe locked and only transferring into a pipe is
+ * supported.
+ */
+static ssize_t iov_iter_extract_pipe_pages(struct iov_iter *i,
+					   struct page ***pages, size_t maxsize,
+					   unsigned int maxpages,
+					   unsigned int gup_flags,
+					   size_t *offset0)
+{
+	unsigned int nr, offset, chunk, j;
+	struct page **p;
+	size_t left;
+
+	if (!sanity(i))
+		return -EFAULT;
+
+	offset = pipe_npages(i, &nr);
+	if (!nr)
+		return -EFAULT;
+	*offset0 = offset;
+
+	maxpages = min_t(size_t, nr, maxpages);
+	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+	if (!maxpages)
+		return -ENOMEM;
+	p = *pages;
+
+	left = maxsize;
+	for (j = 0; j < maxpages; j++) {
+		struct page *page = append_pipe(i, left, &offset);
+		if (!page)
+			break;
+		chunk = min_t(size_t, left, PAGE_SIZE - offset);
+		left -= chunk;
+		*p++ = page;
+	}
+	if (!j)
+		return -EFAULT;
+	return maxsize - left;
+}
+
+/*
+ * Extract a list of contiguous pages from an ITER_XARRAY iterator.  This does not
+ * get references on the pages, nor does it get a pin on them.
+ */
+static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
+					     struct page ***pages, size_t maxsize,
+					     unsigned int maxpages,
+					     unsigned int gup_flags,
+					     size_t *offset0)
+{
+	struct page *page, **p;
+	unsigned int nr = 0, offset;
+	loff_t pos = i->xarray_start + i->iov_offset;
+	pgoff_t index = pos >> PAGE_SHIFT;
+	XA_STATE(xas, i->xarray, index);
+
+	offset = pos & ~PAGE_MASK;
+	*offset0 = offset;
+
+	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+	if (!maxpages)
+		return -ENOMEM;
+	p = *pages;
+
+	rcu_read_lock();
+	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
+		if (xas_retry(&xas, page))
+			continue;
+
+		/* Has the page moved or been split? */
+		if (unlikely(page != xas_reload(&xas))) {
+			xas_reset(&xas);
+			continue;
+		}
+
+		p[nr++] = find_subpage(page, xas.xa_index);
+		if (nr == maxpages)
+			break;
+	}
+	rcu_read_unlock();
+
+	maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
+	i->iov_offset += maxsize;
+	i->count -= maxsize;
+	return maxsize;
+}
+
+/*
+ * Extract a list of contiguous pages from an ITER_BVEC iterator.  This does
+ * not get references on the pages, nor does it get a pin on them.
+ */
+static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
+					   struct page ***pages, size_t maxsize,
+					   unsigned int maxpages,
+					   unsigned int gup_flags,
+					   size_t *offset0)
+{
+	struct page **p, *page;
+	size_t skip = i->iov_offset, offset;
+	int k;
+
+	maxsize = min(maxsize, i->bvec->bv_len - skip);
+	skip += i->bvec->bv_offset;
+	page = i->bvec->bv_page + skip / PAGE_SIZE;
+	offset = skip % PAGE_SIZE;
+	*offset0 = offset;
+
+	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+	if (!maxpages)
+		return -ENOMEM;
+	p = *pages;
+	for (k = 0; k < maxpages; k++)
+		p[k] = page + k;
+
+	maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset);
+	i->count -= maxsize;
+	i->iov_offset += maxsize;
+	if (i->iov_offset == i->bvec->bv_len) {
+		i->iov_offset = 0;
+		i->bvec++;
+		i->nr_segs--;
+	}
+	return maxsize;
+}
+
+/*
+ * Get the first segment from an ITER_UBUF or ITER_IOVEC iterator.  The
+ * iterator must not be empty.
+ */
+static unsigned long iov_iter_extract_first_user_segment(const struct iov_iter *i,
+							 size_t *size)
+{
+	size_t skip;
+	long k;
+
+	if (iter_is_ubuf(i))
+		return (unsigned long)i->ubuf + i->iov_offset;
+
+	for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
+		size_t len = i->iov[k].iov_len - skip;
+
+		if (unlikely(!len))
+			continue;
+		if (*size > len)
+			*size = len;
+		return (unsigned long)i->iov[k].iov_base + skip;
+	}
+	BUG(); // if it had been empty, we wouldn't get called
+}
+
+/*
+ * Extract a list of contiguous pages from a user iterator and get references
+ * on them.  This should only be used iff the iterator is user-backed
+ * (IOBUF/UBUF) and data is being transferred out of the buffer described by
+ * the iterator (ie. this is the source).
+ *
+ * The pages are returned with incremented refcounts that the caller must undo
+ * once the transfer is complete, but no additional pins are obtained.
+ *
+ * This is only safe to be used where background IO/DMA is not going to be
+ * modifying the buffer, and so won't cause a problem with CoW on fork.
+ */
+static ssize_t iov_iter_extract_user_pages_and_get(struct iov_iter *i,
+						   struct page ***pages,
+						   size_t maxsize,
+						   unsigned int maxpages,
+						   unsigned int gup_flags,
+						   size_t *offset0)
+{
+	unsigned long addr;
+	size_t offset;
+	int res;
+
+	if (WARN_ON_ONCE(!iov_iter_is_source(i)))
+		return -EFAULT;
+
+	gup_flags |= FOLL_GET;
+	if (i->nofault)
+		gup_flags |= FOLL_NOFAULT;
+
+	addr = iov_iter_extract_first_user_segment(i, &maxsize);
+	*offset0 = offset = addr % PAGE_SIZE;
+	addr &= PAGE_MASK;
+	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+	if (!maxpages)
+		return -ENOMEM;
+	res = get_user_pages_fast(addr, maxpages, gup_flags, *pages);
+	if (unlikely(res <= 0))
+		return res;
+	maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - offset);
+	iov_iter_advance(i, maxsize);
+	return maxsize;
+}
+
+/*
+ * Extract a list of contiguous pages from a user iterator and get a pin on
+ * each of them.  This should only be used iff the iterator is user-backed
+ * (IOBUF/UBUF) and data is being transferred into the buffer described by the
+ * iterator (ie. this is the destination).
+ *
+ * It does not get refs on the pages, but the pages must be unpinned by the
+ * caller once the transfer is complete.
+ *
+ * This is safe to be used where background IO/DMA *is* going to be modifying
+ * the buffer; using a pin rather than a ref makes sure that CoW happens
+ * correctly in the parent during fork.
+ */
+static ssize_t iov_iter_extract_user_pages_and_pin(struct iov_iter *i,
+						   struct page ***pages,
+						   size_t maxsize,
+						   unsigned int maxpages,
+						   unsigned int gup_flags,
+						   size_t *offset0)
+{
+	unsigned long addr;
+	size_t offset;
+	int res;
+
+	if (WARN_ON_ONCE(!iov_iter_is_dest(i)))
+		return -EFAULT;
+
+	gup_flags |= FOLL_PIN | FOLL_WRITE;
+	if (i->nofault)
+		gup_flags |= FOLL_NOFAULT;
+
+	addr = first_iovec_segment(i, &maxsize);
+	*offset0 = offset = addr % PAGE_SIZE;
+	addr &= PAGE_MASK;
+	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+	if (!maxpages)
+		return -ENOMEM;
+	res = pin_user_pages_fast(addr, maxpages, gup_flags, *pages);
+	if (unlikely(res <= 0))
+		return res;
+	maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - offset);
+	iov_iter_advance(i, maxsize);
+	return maxsize;
+}
+
+static ssize_t iov_iter_extract_user_pages(struct iov_iter *i,
+					   struct page ***pages, size_t maxsize,
+					   unsigned int maxpages,
+					   unsigned int gup_flags,
+					   size_t *offset0)
+{
+	if (iov_iter_extract_mode(i, gup_flags) == FOLL_GET)
+		return iov_iter_extract_user_pages_and_get(i, pages, maxsize,
+							   maxpages, gup_flags,
+							   offset0);
+	else
+		return iov_iter_extract_user_pages_and_pin(i, pages, maxsize,
+							   maxpages, gup_flags,
+							   offset0);
+}
+
+/**
+ * iov_iter_extract_pages - Extract a list of contiguous pages from an iterator
+ * @i: The iterator to extract from
+ * @pages: Where to return the list of pages
+ * @maxsize: The maximum amount of iterator to extract
+ * @maxpages: The maximum size of the list of pages
+ * @gup_flags: Direction indicator and additional flags
+ * @offset0: Where to return the starting offset into (*@pages)[0]
+ *
+ * Extract a list of contiguous pages from the current point of the iterator,
+ * advancing the iterator.  The maximum number of pages and the maximum amount
+ * of page contents can be set.
+ *
+ * If *@pages is NULL, a page list will be allocated to the required size and
+ * *@pages will be set to its base.  If *@pages is not NULL, it will be assumed
+ * that the caller allocated a page list at least @maxpages in size and this
+ * will be filled in.
+ *
+ * @gup_flags can be set to either FOLL_SOURCE_BUF or FOLL_DEST_BUF, indicating
+ * how the buffer is to be used, and can have FOLL_PCI_P2PDMA OR'd with that.
+ *
+ * The iov_iter_extract_mode() function can be used to query how cleanup should
+ * be performed.
+ *
+ * Extra refs or pins on the pages may be obtained as follows:
+ *
+ *  (*) If the iterator is user-backed (ITER_IOVEC/ITER_UBUF) and data is to be
+ *      transferred /OUT OF/ the buffer (@gup_flags |= FOLL_SOURCE_BUF), refs
+ *      will be taken on the pages, but pins will not be added.  This can be
+ *      used for DMA from a page; it cannot be used for DMA to a page, as it
+ *      may cause page-COW problems in fork.  iov_iter_extract_mode() will
+ *      return FOLL_GET.
+ *
+ *  (*) If the iterator is user-backed (ITER_IOVEC/ITER_UBUF) and data is to be
+ *      transferred /INTO/ the described buffer (@gup_flags |= FOLL_DEST_BUF),
+ *      pins will be added to the pages, but refs will not be taken.  This must
+ *      be used for DMA to a page.  iov_iter_extract_mode() will return
+ *      FOLL_PIN.
+ *
+ *  (*) If the iterator is ITER_PIPE, this must describe a destination for the
+ *      data.  Additional pages may be allocated and added to the pipe (which
+ *      will hold the refs), but neither refs nor pins will be obtained for the
+ *      caller.  The caller must hold the pipe lock.  iov_iter_extract_mode()
+ *      will return 0.
+ *
+ *  (*) If the iterator is ITER_BVEC or ITER_XARRAY, the pages are merely
+ *      listed; no extra refs or pins are obtained.  iov_iter_extract_mode()
+ *      will return 0.
+ *
+ * Note also:
+ *
+ *  (*) Use with ITER_KVEC is not supported as that may refer to memory that
+ *      doesn't have associated page structs.
+ *
+ *  (*) Use with ITER_DISCARD is not supported as that has no content.
+ *
+ * On success, the function sets *@pages to the new pagelist, if allocated, and
+ * sets *offset0 to the offset into the first page..
+ *
+ * It may also return -ENOMEM and -EFAULT.
+ */
+ssize_t iov_iter_extract_pages(struct iov_iter *i,
+			       struct page ***pages,
+			       size_t maxsize,
+			       unsigned int maxpages,
+			       unsigned int gup_flags,
+			       size_t *offset0)
+{
+	if (WARN_ON_ONCE((gup_flags & FOLL_BUF_MASK) == FOLL_SOURCE_BUF &&
+			 iov_iter_is_dest(i)))
+		return -EIO;
+	if (WARN_ON_ONCE((gup_flags & FOLL_BUF_MASK) == FOLL_DEST_BUF &&
+			 iov_iter_is_source(i)))
+		return -EIO;
+
+	maxsize = min_t(size_t, min_t(size_t, maxsize, i->count), MAX_RW_COUNT);
+	if (!maxsize)
+		return 0;
+
+	if (likely(user_backed_iter(i)))
+		return iov_iter_extract_user_pages(i, pages, maxsize,
+						   maxpages, gup_flags,
+						   offset0);
+	if (iov_iter_is_bvec(i))
+		return iov_iter_extract_bvec_pages(i, pages, maxsize,
+						   maxpages, gup_flags,
+						   offset0);
+	if (iov_iter_is_pipe(i))
+		return iov_iter_extract_pipe_pages(i, pages, maxsize,
+						   maxpages, gup_flags,
+						   offset0);
+	if (iov_iter_is_xarray(i))
+		return iov_iter_extract_xarray_pages(i, pages, maxsize,
+						     maxpages, gup_flags,
+						     offset0);
+	return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(iov_iter_extract_pages);



  parent reply	other threads:[~2023-01-16 23:10 UTC|newest]

Thread overview: 91+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-16 23:07 [PATCH v6 00/34] iov_iter: Improve page extraction (ref, pin or just list) David Howells
2023-01-16 23:08 ` [PATCH v6 01/34] vfs: Unconditionally set IOCB_WRITE in call_write_iter() David Howells
2023-01-17  7:52   ` Christoph Hellwig
2023-01-18 22:11     ` Al Viro
2023-01-19  5:44       ` Christoph Hellwig
2023-01-19 11:34       ` David Howells
2023-01-19 16:48         ` Christoph Hellwig
2023-01-19 21:14         ` David Howells
2023-01-17  8:28   ` David Howells
2023-01-17  8:44     ` Christoph Hellwig
2023-01-17 11:11   ` David Howells
2023-01-17 11:11     ` Christoph Hellwig
2023-01-18 22:05   ` Al Viro
2023-01-19  5:41     ` Christoph Hellwig
2023-01-19 10:01   ` David Howells
2023-01-19 16:46     ` Christoph Hellwig
2023-01-19 11:06   ` David Howells
2023-01-16 23:08 ` [PATCH v6 02/34] iov_iter: Use IOCB/IOMAP_WRITE/op_is_write rather than iterator direction David Howells
2023-01-17  7:55   ` Christoph Hellwig
2023-01-18 22:18     ` Al Viro
2023-01-16 23:08 ` [PATCH v6 03/34] iov_iter: Pass I/O direction into iov_iter_get_pages*() David Howells
2023-01-17  7:57   ` Christoph Hellwig
2023-01-17  8:07     ` David Hildenbrand
2023-01-17  8:09       ` Christoph Hellwig
2023-01-18 23:03     ` Al Viro
2023-01-19  0:15       ` Al Viro
2023-01-19  2:11         ` Al Viro
2023-01-19  5:47           ` Christoph Hellwig
2023-01-19  5:47         ` Christoph Hellwig
2023-01-17  8:44   ` David Howells
2023-01-17  8:46     ` Christoph Hellwig
2023-01-17  8:47     ` David Hildenbrand
2023-01-16 23:08 ` [PATCH v6 04/34] iov_iter: Remove iov_iter_get_pages2/pages_alloc2() David Howells
2023-01-16 23:08 ` [PATCH v6 05/34] iov_iter: Change the direction macros into an enum David Howells
2023-01-18 23:14   ` Al Viro
2023-01-18 23:17   ` David Howells
2023-01-18 23:19     ` Al Viro
2023-01-18 23:24     ` David Howells
2023-01-16 23:08 ` David Howells [this message]
2023-01-17  7:58   ` [PATCH v6 06/34] iov_iter: Use the direction in the iterator functions Christoph Hellwig
2023-01-18 22:28     ` Al Viro
2023-01-19  5:45       ` Christoph Hellwig
2023-01-18 23:15   ` Al Viro
2023-01-16 23:08 ` [PATCH v6 07/34] iov_iter: Add a function to extract a page list from an iterator David Howells
2023-01-17  8:01   ` Christoph Hellwig
2023-01-17  8:19   ` David Howells
2023-01-16 23:08 ` [PATCH v6 08/34] mm: Provide a helper to drop a pin/ref on a page David Howells
2023-01-17  8:02   ` Christoph Hellwig
2023-01-17  8:21   ` David Howells
2023-01-16 23:09 ` [PATCH v6 09/34] bio: Rename BIO_NO_PAGE_REF to BIO_PAGE_REFFED and invert the meaning David Howells
2023-01-17  8:02   ` Christoph Hellwig
2023-01-16 23:09 ` [PATCH v6 10/34] mm, block: Make BIO_PAGE_REFFED/PINNED the same as FOLL_GET/PIN numerically David Howells
2023-01-17  8:03   ` Christoph Hellwig
2023-01-16 23:09 ` [PATCH v6 11/34] iov_iter, block: Make bio structs pin pages rather than ref'ing if appropriate David Howells
2023-01-17  8:07   ` Christoph Hellwig
2023-01-17  8:26   ` David Howells
2023-01-17  8:44     ` Christoph Hellwig
2023-01-16 23:09 ` [PATCH v6 12/34] bio: Fix bio_flagged() so that gcc can better optimise it David Howells
2023-01-17  8:07   ` Christoph Hellwig
2023-01-16 23:09 ` [PATCH v6 13/34] netfs: Add a function to extract a UBUF or IOVEC into a BVEC iterator David Howells
2023-01-16 23:09 ` [PATCH v6 14/34] netfs: Add a function to extract an iterator into a scatterlist David Howells
2023-01-16 23:09 ` [PATCH v6 15/34] af_alg: Pin pages rather than ref'ing if appropriate David Howells
2023-01-16 23:09 ` [PATCH v6 16/34] af_alg: [RFC] Use netfs_extract_iter_to_sg() to create scatterlists David Howells
2023-01-16 23:10 ` [PATCH v6 17/34] scsi: [RFC] Use netfs_extract_iter_to_sg() David Howells
2023-01-16 23:10 ` [PATCH v6 18/34] dio: Pin pages rather than ref'ing if appropriate David Howells
2023-01-19  5:04   ` Al Viro
2023-01-19  5:51     ` Christoph Hellwig
2023-01-16 23:10 ` [PATCH v6 19/34] fuse: " David Howells
2023-01-16 23:10 ` [PATCH v6 20/34] vfs: Make splice use iov_iter_extract_pages() David Howells
2023-01-19  2:31   ` Al Viro
2023-01-16 23:10 ` [PATCH v6 21/34] 9p: Pin pages rather than ref'ing if appropriate David Howells
2023-01-19  2:52   ` Al Viro
2023-01-19 16:44   ` David Howells
2023-01-19 16:51     ` Christoph Hellwig
2023-01-16 23:10 ` [PATCH v6 22/34] nfs: " David Howells
2023-01-16 23:10 ` [PATCH v6 23/34] cifs: Implement splice_read to pass down ITER_BVEC not ITER_PIPE David Howells
2023-01-16 23:10 ` [PATCH v6 24/34] cifs: Add a function to build an RDMA SGE list from an iterator David Howells
2023-01-16 23:11 ` [PATCH v6 25/34] cifs: Add a function to Hash the contents of " David Howells
2023-01-16 23:11 ` [PATCH v6 26/34] cifs: Add some helper functions David Howells
2023-01-16 23:11 ` [PATCH v6 27/34] cifs: Add a function to read into an iter from a socket David Howells
2023-01-16 23:11 ` [PATCH v6 28/34] cifs: Change the I/O paths to use an iterator rather than a page list David Howells
2023-01-16 23:11 ` [PATCH v6 29/34] cifs: Build the RDMA SGE list directly from an iterator David Howells
2023-01-16 23:11 ` [PATCH v6 30/34] cifs: Remove unused code David Howells
2023-01-16 23:11 ` [PATCH v6 31/34] cifs: Fix problem with encrypted RDMA data read David Howells
2023-01-19 16:25   ` Stefan Metzmacher
2023-01-16 23:11 ` [PATCH v6 32/34] cifs: DIO to/from KVEC-type iterators should now work David Howells
2023-01-16 23:12 ` [PATCH v6 33/34] net: [RFC][WIP] Mark each skb_frags as to how they should be cleaned up David Howells
2023-01-16 23:12 ` [PATCH v6 34/34] net: [RFC][WIP] Make __zerocopy_sg_from_iter() correctly pin or leave pages unref'd David Howells
2023-01-17  7:46 ` [PATCH v6 00/34] iov_iter: Improve page extraction (ref, pin or just list) Christoph Hellwig
2023-01-18 14:00 ` [PATCH v6 09/34] bio: Rename BIO_NO_PAGE_REF to BIO_PAGE_REFFED and invert the meaning David Howells
2023-01-18 14:09   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=167391052497.2311931.9463379582932734164.stgit@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=jlayton@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=logang@deltatee.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.