From: Al Viro <viro@zeniv.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
David Sterba <dsterba@suse.com>,
Miklos Szeredi <miklos@szeredi.hu>,
Anton Altaparmakov <anton@tuxera.com>,
David Howells <dhowells@redhat.com>,
Matthew Wilcox <willy@infradead.org>,
Pavel Begunkov <asml.silence@gmail.com>
Subject: [RFC PATCH 17/37] get rid of iterate_all_kinds() in iov_iter_get_pages()/iov_iter_get_pages_alloc()
Date: Sun, 6 Jun 2021 19:10:31 +0000 [thread overview]
Message-ID: <20210606191051.1216821-17-viro@zeniv.linux.org.uk> (raw)
In-Reply-To: <20210606191051.1216821-1-viro@zeniv.linux.org.uk>
Here iterate_all_kinds() is used just to find the first (non-empty, in
case of iovec) segment. Which can be easily done explicitly.
Note that in bvec case we now can get more than PAGE_SIZE worth of them,
in case when we have a compound page in bvec and a range that crosses
a subpage boundary. Older behaviour had been to stop on that boundary;
we used to get the right first page (for_each_bvec() took care of that),
but that was all we'd got.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 147 +++++++++++++++++++++++++++++++++++----------------------
1 file changed, 91 insertions(+), 56 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index a6947301b9a0..5e8d5e4ee92d 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1463,9 +1463,6 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
unsigned int iter_head, npages;
size_t capacity;
- if (!maxsize)
- return 0;
-
if (!sanity(i))
return -EFAULT;
@@ -1546,29 +1543,67 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
return actual;
}
+/* must be done on non-empty ITER_IOVEC one */
+static unsigned long first_iovec_segment(const struct iov_iter *i,
+ size_t *size, size_t *start,
+ size_t maxsize, unsigned maxpages)
+{
+ size_t skip;
+ long k;
+
+ for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
+ unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
+ size_t len = i->iov[k].iov_len - skip;
+
+ if (unlikely(!len))
+ continue;
+ if (len > maxsize)
+ len = maxsize;
+ len += (*start = addr % PAGE_SIZE);
+ if (len > maxpages * PAGE_SIZE)
+ len = maxpages * PAGE_SIZE;
+ *size = len;
+ return addr & PAGE_MASK;
+ }
+ BUG(); // if it had been empty, we wouldn't get called
+}
+
+/* must be done on non-empty ITER_BVEC one */
+static struct page *first_bvec_segment(const struct iov_iter *i,
+ size_t *size, size_t *start,
+ size_t maxsize, unsigned maxpages)
+{
+ struct page *page;
+ size_t skip = i->iov_offset, len;
+
+ len = i->bvec->bv_len - skip;
+ if (len > maxsize)
+ len = maxsize;
+ skip += i->bvec->bv_offset;
+ page = i->bvec->bv_page + skip / PAGE_SIZE;
+ len += (*start = skip % PAGE_SIZE);
+ if (len > maxpages * PAGE_SIZE)
+ len = maxpages * PAGE_SIZE;
+ *size = len;
+ return page;
+}
+
ssize_t iov_iter_get_pages(struct iov_iter *i,
struct page **pages, size_t maxsize, unsigned maxpages,
size_t *start)
{
+ size_t len;
+ int n, res;
+
if (maxsize > i->count)
maxsize = i->count;
+ if (!maxsize)
+ return 0;
- if (unlikely(iov_iter_is_pipe(i)))
- return pipe_get_pages(i, pages, maxsize, maxpages, start);
- if (unlikely(iov_iter_is_xarray(i)))
- return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
- if (unlikely(iov_iter_is_discard(i)))
- return -EFAULT;
-
- iterate_all_kinds(i, maxsize, v, ({
- unsigned long addr = (unsigned long)v.iov_base;
- size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
- int n;
- int res;
+ if (likely(iter_is_iovec(i))) {
+ unsigned long addr;
- if (len > maxpages * PAGE_SIZE)
- len = maxpages * PAGE_SIZE;
- addr &= ~(PAGE_SIZE - 1);
+ addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
n = DIV_ROUND_UP(len, PAGE_SIZE);
res = get_user_pages_fast(addr, n,
iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0,
@@ -1576,17 +1611,21 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
if (unlikely(res < 0))
return res;
return (res == n ? len : res * PAGE_SIZE) - *start;
- 0;}),({
- /* can't be more than PAGE_SIZE */
- *start = v.bv_offset;
- get_page(*pages = v.bv_page);
- return v.bv_len;
- }),({
- return -EFAULT;
- }),
- 0
- )
- return 0;
+ }
+ if (iov_iter_is_bvec(i)) {
+ struct page *page;
+
+ page = first_bvec_segment(i, &len, start, maxsize, maxpages);
+ n = DIV_ROUND_UP(len, PAGE_SIZE);
+ while (n--)
+ get_page(*pages++ = page++);
+ return len - *start;
+ }
+ if (iov_iter_is_pipe(i))
+ return pipe_get_pages(i, pages, maxsize, maxpages, start);
+ if (iov_iter_is_xarray(i))
+ return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
+ return -EFAULT;
}
EXPORT_SYMBOL(iov_iter_get_pages);
@@ -1603,9 +1642,6 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
unsigned int iter_head, npages;
ssize_t n;
- if (!maxsize)
- return 0;
-
if (!sanity(i))
return -EFAULT;
@@ -1678,24 +1714,18 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
size_t *start)
{
struct page **p;
+ size_t len;
+ int n, res;
if (maxsize > i->count)
maxsize = i->count;
+ if (!maxsize)
+ return 0;
- if (unlikely(iov_iter_is_pipe(i)))
- return pipe_get_pages_alloc(i, pages, maxsize, start);
- if (unlikely(iov_iter_is_xarray(i)))
- return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
- if (unlikely(iov_iter_is_discard(i)))
- return -EFAULT;
-
- iterate_all_kinds(i, maxsize, v, ({
- unsigned long addr = (unsigned long)v.iov_base;
- size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
- int n;
- int res;
+ if (likely(iter_is_iovec(i))) {
+ unsigned long addr;
- addr &= ~(PAGE_SIZE - 1);
+ addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
n = DIV_ROUND_UP(len, PAGE_SIZE);
p = get_pages_array(n);
if (!p)
@@ -1708,19 +1738,24 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
}
*pages = p;
return (res == n ? len : res * PAGE_SIZE) - *start;
- 0;}),({
- /* can't be more than PAGE_SIZE */
- *start = v.bv_offset;
- *pages = p = get_pages_array(1);
+ }
+ if (iov_iter_is_bvec(i)) {
+ struct page *page;
+
+ page = first_bvec_segment(i, &len, start, maxsize, ~0U);
+ n = DIV_ROUND_UP(len, PAGE_SIZE);
+ *pages = p = get_pages_array(n);
if (!p)
return -ENOMEM;
- get_page(*p = v.bv_page);
- return v.bv_len;
- }),({
- return -EFAULT;
- }), 0
- )
- return 0;
+ while (n--)
+ get_page(*p++ = page++);
+ return len - *start;
+ }
+ if (iov_iter_is_pipe(i))
+ return pipe_get_pages_alloc(i, pages, maxsize, start);
+ if (iov_iter_is_xarray(i))
+ return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
+ return -EFAULT;
}
EXPORT_SYMBOL(iov_iter_get_pages_alloc);
--
2.11.0
next prev parent reply other threads:[~2021-06-06 19:12 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-06 19:07 [RFC][PATCHSET] iov_iter work Al Viro
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
2021-06-06 19:10 ` [RFC PATCH 02/37] generic_perform_write()/iomap_write_actor(): saner logics for short copy Al Viro
2021-06-06 19:10 ` [RFC PATCH 03/37] fuse_fill_write_pages(): don't bother with iov_iter_single_seg_count() Al Viro
2021-06-06 19:10 ` [RFC PATCH 04/37] iov_iter: Remove iov_iter_for_each_range() Al Viro
2021-06-06 19:10 ` [RFC PATCH 05/37] teach copy_page_to_iter() to handle compound pages Al Viro
2021-06-06 19:10 ` [RFC PATCH 06/37] copy_page_to_iter(): fix ITER_DISCARD case Al Viro
2021-06-06 19:10 ` [RFC PATCH 07/37] [xarray] iov_iter_fault_in_readable() should do nothing in xarray case Al Viro
2021-06-06 19:10 ` [RFC PATCH 08/37] iov_iter_advance(): use consistent semantics for move past the end Al Viro
2021-06-06 19:10 ` [RFC PATCH 09/37] iov_iter: switch ..._full() variants of primitives to use of iov_iter_revert() Al Viro
2021-06-06 19:10 ` [RFC PATCH 10/37] iov_iter: reorder handling of flavours in primitives Al Viro
2021-06-06 19:10 ` [RFC PATCH 11/37] iov_iter_advance(): don't modify ->iov_offset for ITER_DISCARD Al Viro
2021-06-06 19:10 ` [RFC PATCH 12/37] iov_iter: separate direction from flavour Al Viro
2021-06-06 19:10 ` [RFC PATCH 13/37] iov_iter: optimize iov_iter_advance() for iovec and kvec Al Viro
2021-06-06 19:10 ` [RFC PATCH 14/37] sanitize iov_iter_fault_in_readable() Al Viro
2021-06-06 19:10 ` [RFC PATCH 15/37] iov_iter_alignment(): don't bother with iterate_all_kinds() Al Viro
2021-06-06 19:10 ` [RFC PATCH 16/37] iov_iter_gap_alignment(): get rid of iterate_all_kinds() Al Viro
2021-06-09 13:01 ` Qian Cai
2021-06-09 18:06 ` Al Viro
2021-06-06 19:10 ` Al Viro [this message]
2021-06-06 19:10 ` [RFC PATCH 18/37] iov_iter_npages(): don't bother with iterate_all_kinds() Al Viro
2021-06-06 19:10 ` [RFC PATCH 19/37] [xarray] iov_iter_npages(): just use DIV_ROUND_UP() Al Viro
2021-06-06 19:10 ` [RFC PATCH 20/37] iov_iter: replace iov_iter_copy_from_user_atomic() with iterator-advancing variant Al Viro
2021-06-06 19:10 ` [RFC PATCH 21/37] csum_and_copy_to_iter(): massage into form closer to csum_and_copy_from_iter() Al Viro
2021-06-06 19:10 ` [RFC PATCH 22/37] iterate_and_advance(): get rid of magic in case when n is 0 Al Viro
2021-06-06 19:10 ` [RFC PATCH 23/37] iov_iter: massage iterate_iovec and iterate_kvec to logics similar to iterate_bvec Al Viro
2021-06-06 19:10 ` [RFC PATCH 24/37] iov_iter: unify iterate_iovec and iterate_kvec Al Viro
2021-06-06 19:10 ` [RFC PATCH 25/37] iterate_bvec(): expand bvec.h macro forest, massage a bit Al Viro
2021-06-06 19:10 ` [RFC PATCH 26/37] iov_iter: teach iterate_{bvec,xarray}() about possible short copies Al Viro
2021-06-06 19:10 ` [RFC PATCH 27/37] iov_iter: get rid of separate bvec and xarray callbacks Al Viro
2021-06-06 19:10 ` [RFC PATCH 28/37] iov_iter: make the amount already copied available to iterator callbacks Al Viro
2021-06-06 19:10 ` [RFC PATCH 29/37] iov_iter: make iterator callbacks use base and len instead of iovec Al Viro
2021-06-06 19:10 ` [RFC PATCH 30/37] pull handling of ->iov_offset into iterate_{iovec,bvec,xarray} Al Viro
2021-06-06 19:10 ` [RFC PATCH 31/37] iterate_xarray(): only of the first iteration we might get offset != 0 Al Viro
2021-06-06 19:10 ` [RFC PATCH 32/37] copy_page_to_iter(): don't bother with kmap_atomic() for bvec/kvec cases Al Viro
2021-06-06 19:10 ` [RFC PATCH 33/37] copy_page_from_iter(): don't need kmap_atomic() for kvec/bvec cases Al Viro
2021-06-06 19:10 ` [RFC PATCH 34/37] iov_iter: clean csum_and_copy_...() primitives up a bit Al Viro
2021-06-06 19:10 ` [RFC PATCH 35/37] pipe_zero(): we don't need no stinkin' kmap_atomic() Al Viro
2021-06-06 19:10 ` [RFC PATCH 36/37] clean up copy_mc_pipe_to_iter() Al Viro
2021-06-06 19:10 ` [RFC PATCH 37/37] csum_and_copy_to_pipe_iter(): leave handling of csum_state to caller Al Viro
2021-06-06 22:05 ` [RFC][PATCHSET] iov_iter work Linus Torvalds
2021-06-06 22:46 ` Linus Torvalds
2021-06-07 9:28 ` Christoph Hellwig
2021-06-07 14:43 ` Al Viro
2021-06-07 15:59 ` Christoph Hellwig
2021-06-07 21:07 ` Al Viro
2021-06-07 22:01 ` Linus Torvalds
2021-06-07 23:35 ` Linus Torvalds
2021-06-08 5:25 ` Christoph Hellwig
2021-06-08 11:27 ` Al Viro
2021-06-06 23:29 ` Al Viro
2021-06-07 10:38 ` Pavel Begunkov
2021-06-08 14:43 ` David Laight
2021-06-10 14:29 ` Qian Cai
2021-06-10 15:35 ` Al Viro
2021-06-10 15:48 ` Al Viro
2021-06-10 19:08 ` Qian Cai
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210606191051.1216821-17-viro@zeniv.linux.org.uk \
--to=viro@zeniv.linux.org.uk \
--cc=anton@tuxera.com \
--cc=asml.silence@gmail.com \
--cc=dhowells@redhat.com \
--cc=dsterba@suse.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=miklos@szeredi.hu \
--cc=torvalds@linux-foundation.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).