All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeff Layton <jlayton@redhat.com>
To: viro@zeniv.linux.org.uk
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-nfs@vger.kernel.org, ceph-devel@vger.kernel.org,
	lustre-devel@lists.lustre.org,
	v9fs-developer@lists.sourceforge.net
Subject: [PATCH] iov_iter: allow iov_iter_get_pages_alloc to allocate more pages per call
Date: Tue, 24 Jan 2017 16:23:27 -0500	[thread overview]
Message-ID: <20170124212327.14517-1-jlayton@redhat.com> (raw)

Currently, iov_iter_get_pages_alloc will only ever operate on the first
vector that iterate_all_kinds hands back. Many of the callers however
would like to have as long a set of pages as possible, to allow for
fewer, but larger I/Os.

When the previous vector ends on a page boundary and the current one
begins on one, we can continue to add more pages.

Change the function to first scan the iov_iter to see how long an
array of pages we could create from the current position. Then,
allocate an array that large (or up to the maxsize), and fill that
many pages.

This patch would allow us to rip out a chunk of code in ceph that
tries to do the same thing, but doesn't handle it right.

For NFS, this allows the client to do larger I/Os when userland passes
down an array of page-aligned iovecs in an O_DIRECT request. I imagine
this will also make splice writes into an O_DIRECT file more efficient.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 lib/iov_iter.c | 140 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 111 insertions(+), 29 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e68604ae3ced..956d17767a3e 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -883,6 +883,58 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_gap_alignment);
 
+/**
+ * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
+ * @i: iov_iter to in which to find the size
+ *
+ * Some filesystems can stitch together multiple iovecs into a single
+ * page vector when both the previous tail and current base are page
+ * aligned. This function discovers the length that can fit in a single
+ * pagevec and returns it.
+ */
+static size_t iov_iter_pvec_size(const struct iov_iter *i)
+{
+	size_t size = i->count;
+	size_t pv_size = 0;
+	bool contig = false, first = true;
+
+	if (!size)
+		return 0;
+
+	/* Pipes are naturally aligned for this */
+	if (unlikely(i->type & ITER_PIPE))
+		return size;
+
+	/*
+	 * An iov can be page vectored when the current base and previous
+	 * tail are both page aligned. Note that we don't require that the
+	 * initial base in the first iovec also be page aligned.
+	 */
+	iterate_all_kinds(i, size, v,
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }; 0;
+		 }),
+		({
+		 if (first || (contig && v.bv_offset == 0)) {
+			pv_size += v.bv_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
+		 }
+		 }),
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }
+		 }))
+	return pv_size;
+}
+
 static inline size_t __pipe_get_pages(struct iov_iter *i,
 				size_t maxsize,
 				struct page **pages,
@@ -1006,47 +1058,77 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
 }
 
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
-		   struct page ***pages, size_t maxsize,
-		   size_t *start)
+		   struct page ***ppages, size_t maxsize,
+		   size_t *pstart)
 {
-	struct page **p;
-
-	if (maxsize > i->count)
-		maxsize = i->count;
+	struct page **p, **pc;
+	size_t start = 0;
+	ssize_t len = 0;
+	int npages, res = 0;
+	bool first = true;
 
 	if (unlikely(i->type & ITER_PIPE))
-		return pipe_get_pages_alloc(i, pages, maxsize, start);
+		return pipe_get_pages_alloc(i, ppages, maxsize, pstart);
+
+	maxsize = min(iov_iter_pvec_size(i), maxsize);
+	npages = DIV_ROUND_UP(maxsize, PAGE_SIZE);
+	p = get_pages_array(npages);
+	if (!p)
+		return -ENOMEM;
+
+	pc = p;
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
-		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
+		size_t slen = v.iov_len;
 		int n;
-		int res;
 
-		addr &= ~(PAGE_SIZE - 1);
-		n = DIV_ROUND_UP(len, PAGE_SIZE);
-		p = get_pages_array(n);
-		if (!p)
-			return -ENOMEM;
-		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
-		if (unlikely(res < 0)) {
-			kvfree(p);
-			return res;
+		if (first) {
+			start = addr & (PAGE_SIZE - 1);
+			slen += start;
+			first = false;
 		}
-		*pages = p;
-		return (res == n ? len : res * PAGE_SIZE) - *start;
+
+		n = DIV_ROUND_UP(slen, PAGE_SIZE);
+		if ((pc + n) > (p + npages)) {
+			/* Did something change the iov array?!? */
+			res = -EFAULT;
+			goto out;
+		}
+		addr &= ~(PAGE_SIZE - 1);
+		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pc);
+		if (unlikely(res < 0))
+			goto out;
+		len += (res == n ? slen : res * PAGE_SIZE) - start;
+		pc += res;
 	0;}),({
-		/* can't be more than PAGE_SIZE */
-		*start = v.bv_offset;
-		*pages = p = get_pages_array(1);
-		if (!p)
-			return -ENOMEM;
-		get_page(*p = v.bv_page);
-		return v.bv_len;
+		/* bio_vecs are limited to a single page each */
+		if (first) {
+			start = v.bv_offset;
+			first = false;
+		}
+		get_page(*pc = v.bv_page);
+		len += v.bv_len;
+		++pc;
+		BUG_ON(pc > p + npages);
 	}),({
-		return -EFAULT;
+		/* FIXME: should we handle this case? */
+		res = -EFAULT;
+		goto out;
 	})
 	)
-	return 0;
+out:
+	if (unlikely(res < 0)) {
+		struct page **i;
+
+		for (i = p; i < pc; i++)
+			put_page(*i);
+		kvfree(p);
+		return res;
+	}
+
+	*ppages = p;
+	*pstart = start;
+	return len;
 }
 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
-- 
2.9.3

             reply	other threads:[~2017-01-24 21:23 UTC|newest]

Thread overview: 101+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-24 21:23 Jeff Layton [this message]
2017-01-25 13:32 ` [PATCH v3 0/2] iov_iter: allow iov_iter_get_pages_alloc to allocate more pages per call Jeff Layton
2017-01-25 13:32   ` [PATCH v3 1/2] " Jeff Layton
2017-01-26 12:35     ` Jeff Layton
2017-01-27 13:24       ` [PATCH v4 0/2] " Jeff Layton
2017-01-27 13:24         ` [PATCH v4 1/2] " Jeff Layton
2017-01-27 13:24         ` [PATCH v4 2/2] ceph: switch DIO code to use iov_iter_get_pages_alloc Jeff Layton
2017-01-30 15:40           ` Jeff Layton
2017-01-30 15:40             ` Jeff Layton
2017-01-25 13:32   ` [PATCH v3 " Jeff Layton
2017-02-02  9:51   ` [PATCH v3 0/2] iov_iter: allow iov_iter_get_pages_alloc to allocate more pages per call Al Viro
2017-02-02  9:51     ` [lustre-devel] " Al Viro
2017-02-02  9:51     ` Al Viro
2017-02-02 10:56     ` Christoph Hellwig
2017-02-02 10:56       ` [lustre-devel] " Christoph Hellwig
2017-02-02 10:56       ` Christoph Hellwig
2017-02-02 11:16       ` Al Viro
2017-02-02 11:16         ` [lustre-devel] " Al Viro
2017-02-02 11:16         ` Al Viro
2017-02-02 13:00         ` Jeff Layton
2017-02-02 13:00           ` Jeff Layton
2017-02-03  7:29           ` Al Viro
2017-02-03  7:29             ` [lustre-devel] " Al Viro
2017-02-03  7:29             ` Al Viro
2017-02-03 18:29             ` Linus Torvalds
2017-02-03 18:29               ` [lustre-devel] " Linus Torvalds
2017-02-03 18:29               ` Linus Torvalds
2017-02-03 19:08               ` Al Viro
2017-02-03 19:08                 ` [lustre-devel] " Al Viro
2017-02-03 19:08                 ` Al Viro
2017-02-03 19:28                 ` Linus Torvalds
2017-02-03 19:28                   ` [lustre-devel] " Linus Torvalds
2017-02-03 19:28                   ` Linus Torvalds
2017-02-13  9:56                   ` Steve Capper
2017-02-13 21:40                     ` Linus Torvalds
2017-02-13 21:40                       ` [lustre-devel] " Linus Torvalds
2017-02-13 21:40                       ` Linus Torvalds
2017-02-03  7:49           ` Christoph Hellwig
2017-02-03  7:49             ` [lustre-devel] " Christoph Hellwig
2017-02-03  7:49             ` Christoph Hellwig
2017-02-03  8:54             ` Al Viro
2017-02-03  8:54               ` [lustre-devel] " Al Viro
2017-02-03  8:54               ` Al Viro
2017-02-03 11:09               ` Christoph Hellwig
2017-02-03 11:09                 ` [lustre-devel] " Christoph Hellwig
2017-02-03 11:09                 ` Christoph Hellwig
2017-02-02 14:48     ` Jan Kara
2017-02-02 14:48       ` [lustre-devel] " Jan Kara
2017-02-02 14:48       ` Jan Kara
2017-02-02 18:28       ` Al Viro
2017-02-02 18:28         ` [lustre-devel] " Al Viro
2017-02-02 18:28         ` Al Viro
2017-02-03 14:47         ` Jan Kara
2017-02-03 14:47           ` [lustre-devel] " Jan Kara
2017-02-03 14:47           ` Jan Kara
2017-02-04  3:08     ` Al Viro
2017-02-04  3:08       ` [lustre-devel] " Al Viro
2017-02-04  3:08       ` Al Viro
2017-02-04 19:26       ` Al Viro
2017-02-04 19:26         ` [lustre-devel] " Al Viro
2017-02-04 19:26         ` Al Viro
2017-02-04 22:12         ` Miklos Szeredi
2017-02-04 22:12           ` Miklos Szeredi
2017-02-04 22:11       ` Miklos Szeredi
2017-02-04 22:11         ` Miklos Szeredi
2017-02-05  1:51         ` Al Viro
2017-02-05  1:51           ` [lustre-devel] " Al Viro
2017-02-05  1:51           ` Al Viro
2017-02-05 20:15           ` Miklos Szeredi
2017-02-05 20:15             ` Miklos Szeredi
2017-02-05 21:01             ` Al Viro
2017-02-05 21:01               ` [lustre-devel] " Al Viro
2017-02-05 21:01               ` Al Viro
2017-02-05 21:19               ` Miklos Szeredi
2017-02-05 21:19                 ` Miklos Szeredi
2017-02-05 22:04                 ` Al Viro
2017-02-05 22:04                   ` [lustre-devel] " Al Viro
2017-02-05 22:04                   ` Al Viro
2017-02-05 22:04                   ` Al Viro
2017-02-06  3:05                   ` Al Viro
2017-02-06  3:05                     ` [lustre-devel] " Al Viro
2017-02-06  3:05                     ` Al Viro
2017-02-06  9:08                     ` Miklos Szeredi
2017-02-06  9:57                       ` Al Viro
2017-02-06  9:57                         ` [lustre-devel] " Al Viro
2017-02-06  9:57                         ` Al Viro
2017-02-06 14:18                         ` Miklos Szeredi
2017-02-07  7:19                           ` Al Viro
2017-02-07  7:19                             ` [lustre-devel] " Al Viro
2017-02-07  7:19                             ` Al Viro
2017-02-07 11:35                             ` Miklos Szeredi
2017-02-07 11:35                               ` Miklos Szeredi
2017-02-08  5:54                               ` Al Viro
2017-02-08  5:54                                 ` [lustre-devel] " Al Viro
2017-02-08  5:54                                 ` Al Viro
2017-02-08  9:53                                 ` Miklos Szeredi
2017-02-06  8:37                   ` Miklos Szeredi
2017-02-05 20:56           ` Al Viro
2017-02-05 20:56             ` [lustre-devel] " Al Viro
2017-02-05 20:56             ` Al Viro
2017-02-16 13:10     ` Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170124212327.14517-1-jlayton@redhat.com \
    --to=jlayton@redhat.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=lustre-devel@lists.lustre.org \
    --cc=v9fs-developer@lists.sourceforge.net \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.