All of lore.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@zeniv.linux.org.uk>
To: linux-fsdevel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Jens Axboe <axboe@kernel.dk>, Christoph Hellwig <hch@lst.de>,
	Matthew Wilcox <willy@infradead.org>,
	David Howells <dhowells@redhat.com>,
	Dominique Martinet <asmadeus@codewreck.org>,
	Christian Brauner <brauner@kernel.org>
Subject: [PATCH 21/44] ITER_PIPE: cache the type of last buffer
Date: Wed, 22 Jun 2022 05:15:29 +0100	[thread overview]
Message-ID: <20220622041552.737754-21-viro@zeniv.linux.org.uk> (raw)
In-Reply-To: <20220622041552.737754-1-viro@zeniv.linux.org.uk>

We often need to find whether the last buffer is anon or not, and
currently it's rather clumsy:
	check if ->iov_offset is non-zero (i.e. that pipe is not empty)
	if so, get the corresponding pipe_buffer and check its ->ops
	if it's &default_pipe_buf_ops, we have an anon buffer.

Let's replace the use of ->iov_offset (which is nowhere near similar to
its role for other flavours) with signed field (->last_offset), with
the following rules:
	empty, no buffers occupied:		0
	anon, with bytes up to N-1 filled:	N
	zero-copy, with bytes up to N-1 filled:	-N

That way abs(i->last_offset) is equal to what used to be in i->iov_offset
and empty vs. anon vs. zero-copy can be distinguished by the sign of
i->last_offset.

	Checks for "should we extend the last buffer or should we start
a new one?" become easier to follow that way.

	Note that most of the operations can only be done in a sane
state - i.e. when the pipe has nothing past the current position of
iterator.  About the only thing that could be done outside of that
state is iov_iter_advance(), which transitions to the sane state by
truncating the pipe.  There are only two cases where we leave the
sane state:
	1) iov_iter_get_pages()/iov_iter_get_pages_alloc().  Will be
dealt with later, when we make get_pages advancing - the callers are
actually happier that way.
	2) iov_iter copied, then something is put into the copy.  Since
they share the underlying pipe, the original gets behind.  When we
decide that we are done with the copy (original is not usable until then)
we advance the original.  direct_io used to be done that way; nowadays
it operates on the original and we do iov_iter_revert() to discard
the excessive data.  At the moment there's nothing in the kernel that
could do that to ITER_PIPE iterators, so this reason for insane state
is theoretical right now.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  5 +++-
 lib/iov_iter.c      | 72 ++++++++++++++++++++++-----------------------
 2 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6ab4260c3d6c..d3e13b37ea72 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -40,7 +40,10 @@ struct iov_iter {
 	bool nofault;
 	bool data_source;
 	bool user_backed;
-	size_t iov_offset;
+	union {
+		size_t iov_offset;
+		int last_offset;
+	};
 	size_t count;
 	union {
 		const struct iovec *iov;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 4e2b000b0466..27ad2ef93dbc 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -199,7 +199,7 @@ static bool sanity(const struct iov_iter *i)
 	unsigned int i_head = i->head;
 	unsigned int idx;
 
-	if (i->iov_offset) {
+	if (i->last_offset) {
 		struct pipe_buffer *p;
 		if (unlikely(p_occupancy == 0))
 			goto Bad;	// pipe must be non-empty
@@ -207,7 +207,7 @@ static bool sanity(const struct iov_iter *i)
 			goto Bad;	// must be at the last buffer...
 
 		p = pipe_buf(pipe, i_head);
-		if (unlikely(p->offset + p->len != i->iov_offset))
+		if (unlikely(p->offset + p->len != abs(i->last_offset)))
 			goto Bad;	// ... at the end of segment
 	} else {
 		if (i_head != p_head)
@@ -215,7 +215,7 @@ static bool sanity(const struct iov_iter *i)
 	}
 	return true;
 Bad:
-	printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
+	printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset);
 	printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
 			p_head, p_tail, pipe->ring_size);
 	for (idx = 0; idx < pipe->ring_size; idx++)
@@ -259,29 +259,30 @@ static void push_page(struct pipe_inode_info *pipe, struct page *page,
 	get_page(page);
 }
 
-static inline bool allocated(struct pipe_buffer *buf)
+static inline int last_offset(const struct pipe_buffer *buf)
 {
-	return buf->ops == &default_pipe_buf_ops;
+	if (buf->ops == &default_pipe_buf_ops)
+		return buf->len;	// buf->offset is 0 for those
+	else
+		return -(buf->offset + buf->len);
 }
 
 static struct page *append_pipe(struct iov_iter *i, size_t size, size_t *off)
 {
 	struct pipe_inode_info *pipe = i->pipe;
-	size_t offset = i->iov_offset;
+	int offset = i->last_offset;
 	struct pipe_buffer *buf;
 	struct page *page;
 
-	if (offset && offset < PAGE_SIZE) {
-		// some space in the last buffer; can we add to it?
+	if (offset > 0 && offset < PAGE_SIZE) {
+		// some space in the last buffer; add to it
 		buf = pipe_buf(pipe, pipe->head - 1);
-		if (allocated(buf)) {
-			size = min_t(size_t, size, PAGE_SIZE - offset);
-			buf->len += size;
-			i->iov_offset += size;
-			i->count -= size;
-			*off = offset;
-			return buf->page;
-		}
+		size = min_t(size_t, size, PAGE_SIZE - offset);
+		buf->len += size;
+		i->last_offset += size;
+		i->count -= size;
+		*off = offset;
+		return buf->page;
 	}
 	// OK, we need a new buffer
 	*off = 0;
@@ -292,7 +293,7 @@ static struct page *append_pipe(struct iov_iter *i, size_t size, size_t *off)
 	if (!page)
 		return NULL;
 	i->head = pipe->head - 1;
-	i->iov_offset = size;
+	i->last_offset = size;
 	i->count -= size;
 	return page;
 }
@@ -312,11 +313,11 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
 	if (!sanity(i))
 		return 0;
 
-	if (offset && i->iov_offset == offset) { // could we merge it?
+	if (offset && i->last_offset == -offset) { // could we merge it?
 		struct pipe_buffer *buf = pipe_buf(pipe, head - 1);
 		if (buf->page == page) {
 			buf->len += bytes;
-			i->iov_offset += bytes;
+			i->last_offset -= bytes;
 			i->count -= bytes;
 			return bytes;
 		}
@@ -325,7 +326,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
 		return 0;
 
 	push_page(pipe, page, offset, bytes);
-	i->iov_offset = offset + bytes;
+	i->last_offset = -(offset + bytes);
 	i->head = head;
 	i->count -= bytes;
 	return bytes;
@@ -437,16 +438,15 @@ EXPORT_SYMBOL(iov_iter_init);
 static inline void data_start(const struct iov_iter *i,
 			      unsigned int *iter_headp, size_t *offp)
 {
-	unsigned int iter_head = i->head;
-	size_t off = i->iov_offset;
+	int off = i->last_offset;
 
-	if (off && (!allocated(pipe_buf(i->pipe, iter_head)) ||
-		    off == PAGE_SIZE)) {
-		iter_head++;
-		off = 0;
+	if (off > 0 && off < PAGE_SIZE) { // anon and not full
+		*iter_headp = i->pipe->head - 1;
+		*offp = off;
+	} else {
+		*iter_headp = i->pipe->head;
+		*offp = 0;
 	}
-	*iter_headp = iter_head;
-	*offp = off;
 }
 
 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
@@ -821,7 +821,7 @@ EXPORT_SYMBOL(copy_page_from_iter_atomic);
 static void pipe_advance(struct iov_iter *i, size_t size)
 {
 	struct pipe_inode_info *pipe = i->pipe;
-	unsigned int off = i->iov_offset;
+	int off = i->last_offset;
 
 	if (!off && !size) {
 		pipe_discard_from(pipe, i->start_head); // discard everything
@@ -831,10 +831,10 @@ static void pipe_advance(struct iov_iter *i, size_t size)
 	while (1) {
 		struct pipe_buffer *buf = pipe_buf(pipe, i->head);
 		if (off) /* make it relative to the beginning of buffer */
-			size += off - buf->offset;
+			size += abs(off) - buf->offset;
 		if (size <= buf->len) {
 			buf->len = size;
-			i->iov_offset = buf->offset + size;
+			i->last_offset = last_offset(buf);
 			break;
 		}
 		size -= buf->len;
@@ -918,7 +918,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 			struct pipe_buffer *b = pipe_buf(pipe, --head);
 			if (unroll < b->len) {
 				b->len -= unroll;
-				i->iov_offset = b->offset + b->len;
+				i->last_offset = last_offset(b);
 				i->head = head;
 				return;
 			}
@@ -926,7 +926,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 			pipe_buf_release(pipe, b);
 			pipe->head--;
 		}
-		i->iov_offset = 0;
+		i->last_offset = 0;
 		i->head = head;
 		return;
 	}
@@ -1029,7 +1029,7 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
 		.pipe = pipe,
 		.head = pipe->head,
 		.start_head = pipe->head,
-		.iov_offset = 0,
+		.last_offset = 0,
 		.count = count
 	};
 }
@@ -1145,8 +1145,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 	if (iov_iter_is_pipe(i)) {
 		size_t size = i->count;
 
-		if (size && i->iov_offset && allocated(pipe_buf(i->pipe, i->head)))
-			return size | i->iov_offset;
+		if (size && i->last_offset > 0)
+			return size | i->last_offset;
 		return size;
 	}
 
-- 
2.30.2


  parent reply	other threads:[~2022-06-22  4:16 UTC|newest]

Thread overview: 118+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-22  4:10 [RFC][CFT][PATCHSET] iov_iter stuff Al Viro
2022-06-22  4:15 ` [PATCH 01/44] 9p: handling Rerror without copy_from_iter_full() Al Viro
2022-06-22  4:15   ` [PATCH 02/44] No need of likely/unlikely on calls of check_copy_size() Al Viro
2022-06-22  4:15   ` [PATCH 03/44] teach iomap_dio_rw() to suppress dsync Al Viro
2022-06-22  4:15   ` [PATCH 04/44] btrfs: use IOMAP_DIO_NOSYNC Al Viro
2022-06-22  4:15   ` [PATCH 05/44] struct file: use anonymous union member for rcuhead and llist Al Viro
2022-06-22  4:15   ` [PATCH 06/44] iocb: delay evaluation of IS_SYNC(...) until we want to check IOCB_DSYNC Al Viro
2022-06-22  4:15   ` [PATCH 07/44] keep iocb_flags() result cached in struct file Al Viro
2022-06-22  4:15   ` [PATCH 08/44] copy_page_{to,from}_iter(): switch iovec variants to generic Al Viro
2022-06-27 18:31     ` Jeff Layton
2022-06-28 12:32     ` Christian Brauner
2022-06-28 18:36       ` Al Viro
2022-06-22  4:15   ` [PATCH 09/44] new iov_iter flavour - ITER_UBUF Al Viro
2022-06-27 18:47     ` Jeff Layton
2022-06-28 18:41       ` Al Viro
2022-06-28 12:38     ` Christian Brauner
2022-06-28 18:44       ` Al Viro
2022-07-28  9:55     ` [PATCH 9/44] " Alexander Gordeev
2022-07-29 17:21       ` Al Viro
2022-07-29 21:12         ` Alexander Gordeev
2022-07-30  0:03           ` Al Viro
2022-06-22  4:15   ` [PATCH 10/44] switch new_sync_{read,write}() to ITER_UBUF Al Viro
2022-06-22  4:15   ` [PATCH 11/44] iov_iter_bvec_advance(): don't bother with bvec_iter Al Viro
2022-06-27 18:48     ` Jeff Layton
2022-06-28 12:40     ` Christian Brauner
2022-06-22  4:15   ` [PATCH 12/44] fix short copy handling in copy_mc_pipe_to_iter() Al Viro
2022-06-27 19:15     ` Jeff Layton
2022-06-28 12:42     ` Christian Brauner
2022-06-22  4:15   ` [PATCH 13/44] splice: stop abusing iov_iter_advance() to flush a pipe Al Viro
2022-06-27 19:17     ` Jeff Layton
2022-06-28 12:43     ` Christian Brauner
2022-06-22  4:15   ` [PATCH 14/44] ITER_PIPE: helper for getting pipe buffer by index Al Viro
2022-06-28 10:38     ` Jeff Layton
2022-06-28 12:45     ` Christian Brauner
2022-06-22  4:15   ` [PATCH 15/44] ITER_PIPE: helpers for adding pipe buffers Al Viro
2022-06-28 11:32     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 16/44] ITER_PIPE: allocate buffers as we go in copy-to-pipe primitives Al Viro
2022-06-22  4:15   ` [PATCH 17/44] ITER_PIPE: fold push_pipe() into __pipe_get_pages() Al Viro
2022-06-22  4:15   ` [PATCH 18/44] ITER_PIPE: lose iter_head argument of __pipe_get_pages() Al Viro
2022-06-22  4:15   ` [PATCH 19/44] ITER_PIPE: clean pipe_advance() up Al Viro
2022-06-22  4:15   ` [PATCH 20/44] ITER_PIPE: clean iov_iter_revert() Al Viro
2022-06-22  4:15   ` Al Viro [this message]
2022-06-22  4:15   ` [PATCH 22/44] ITER_PIPE: fold data_start() and pipe_space_for_user() together Al Viro
2022-06-22  4:15   ` [PATCH 23/44] iov_iter_get_pages{,_alloc}(): cap the maxsize with MAX_RW_COUNT Al Viro
2022-06-28 11:41     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 24/44] iov_iter_get_pages_alloc(): lift freeing pages array on failure exits into wrapper Al Viro
2022-06-28 11:45     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 25/44] iov_iter_get_pages(): sanity-check arguments Al Viro
2022-06-28 11:47     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 26/44] unify pipe_get_pages() and pipe_get_pages_alloc() Al Viro
2022-06-28 11:49     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 27/44] unify xarray_get_pages() and xarray_get_pages_alloc() Al Viro
2022-06-28 11:50     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 28/44] unify the rest of iov_iter_get_pages()/iov_iter_get_pages_alloc() guts Al Viro
2022-06-28 11:54     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 29/44] ITER_XARRAY: don't open-code DIV_ROUND_UP() Al Viro
2022-06-28 11:54     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 30/44] iov_iter: lift dealing with maxpages out of first_{iovec,bvec}_segment() Al Viro
2022-06-28 11:56     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 31/44] iov_iter: first_{iovec,bvec}_segment() - simplify a bit Al Viro
2022-06-28 11:58     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 32/44] iov_iter: massage calling conventions for first_{iovec,bvec}_segment() Al Viro
2022-06-28 12:06     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 33/44] found_iovec_segment(): just return address Al Viro
2022-06-28 12:09     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 34/44] fold __pipe_get_pages() into pipe_get_pages() Al Viro
2022-06-28 12:11     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 35/44] iov_iter: saner helper for page array allocation Al Viro
2022-06-28 12:12     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 36/44] iov_iter: advancing variants of iov_iter_get_pages{,_alloc}() Al Viro
2022-06-28 12:13     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 37/44] block: convert to " Al Viro
2022-06-28 12:16     ` Jeff Layton
2022-06-30 22:11     ` [block.git conflicts] " Al Viro
2022-06-30 22:39       ` Al Viro
2022-07-01  2:07         ` Keith Busch
2022-07-01 17:40           ` Al Viro
2022-07-01 17:53             ` Keith Busch
2022-07-01 18:07               ` Al Viro
2022-07-01 18:12                 ` Al Viro
2022-07-01 18:38                   ` Keith Busch
2022-07-01 19:08                     ` Al Viro
2022-07-01 19:28                       ` Keith Busch
2022-07-01 19:43                         ` Al Viro
2022-07-01 19:56                           ` Keith Busch
2022-07-02  5:35                             ` Al Viro
2022-07-02 21:02                               ` Keith Busch
2022-07-01 19:05                 ` Keith Busch
2022-07-01 21:30             ` Jens Axboe
2022-06-30 23:07       ` Jens Axboe
2022-07-10 18:04     ` Sedat Dilek
2022-06-22  4:15   ` [PATCH 38/44] iter_to_pipe(): switch to advancing variant of iov_iter_get_pages() Al Viro
2022-06-28 12:18     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 39/44] af_alg_make_sg(): " Al Viro
2022-06-28 12:18     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 40/44] 9p: convert to advancing variant of iov_iter_get_pages_alloc() Al Viro
2022-07-01  9:01     ` Dominique Martinet
2022-07-01 13:47     ` Christian Schoenebeck
2022-07-06 22:06       ` Christian Schoenebeck
2022-06-22  4:15   ` [PATCH 41/44] ceph: switch the last caller " Al Viro
2022-06-28 12:20     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 42/44] get rid of non-advancing variants Al Viro
2022-06-28 12:21     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 43/44] pipe_get_pages(): switch to append_pipe() Al Viro
2022-06-28 12:23     ` Jeff Layton
2022-06-22  4:15   ` [PATCH 44/44] expand those iov_iter_advance() Al Viro
2022-06-28 12:23     ` Jeff Layton
2022-07-01  6:21   ` [PATCH 01/44] 9p: handling Rerror without copy_from_iter_full() Dominique Martinet
2022-07-01  6:25   ` Dominique Martinet
2022-07-01 16:02     ` Christian Schoenebeck
2022-07-01 21:00       ` Dominique Martinet
2022-07-03 13:30         ` Christian Schoenebeck
2022-08-01 12:42   ` [PATCH 09/44] new iov_iter flavour - ITER_UBUF David Howells
2022-08-01 21:14     ` Al Viro
2022-08-01 22:54     ` David Howells
2022-06-23 15:21 ` [RFC][CFT][PATCHSET] iov_iter stuff David Howells
2022-06-23 20:32   ` Al Viro
2022-06-28 12:25 ` Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220622041552.737754-21-viro@zeniv.linux.org.uk \
    --to=viro@zeniv.linux.org.uk \
    --cc=asmadeus@codewreck.org \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=dhowells@redhat.com \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.