linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Pavel Begunkov <asml.silence@gmail.com>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: Re: [git pull] iov_iter fixes
Date: Fri, 10 Sep 2021 07:57:49 -0600	[thread overview]
Message-ID: <9855f69b-e67e-f7d9-88b8-8941666ab02f@kernel.dk> (raw)
In-Reply-To: <YTrSqvkaWWn61Mzi@zeniv-ca.linux.org.uk>

On 9/9/21 9:36 PM, Al Viro wrote:
> On Thu, Sep 09, 2021 at 09:30:03PM -0600, Jens Axboe wrote:
> 
>>> Again, we should never, ever modify the iovec (or bvec, etc.) array in
>>> ->read_iter()/->write_iter()/->sendmsg()/etc. instances.  If you see
>>> such behaviour anywhere, report it immediately.  Any such is a blatant
>>> bug.
>>
>> Yes that was wrong, the iovec is obviously const. But that really
>> doesn't change the original point, which was that copying the iov_iter
>> itself unconditionally would be miserable.
> 
> Might very well be true, but... won't your patch hit the reimport on
> every short read?  And the cost of uaccess in there is *much* higher
> than copying of 48 bytes into local variable...
> 
> Or am I misreading your patch?  Note that short reads on reaching
> EOF are obviously normal - it's not a rare case at all.

It was just a quick hack, might very well be too eager to go through
those motions. But pondering this instead of sleeping, we don't need to
copy all of iov_iter in order to restore the state, and we can use the
same advance after restoring. So something like this may be more
palatable. Caveat - again untested, and I haven't tested the performance
impact of this at all.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 855ea544807f..4d6d4315deda 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2608,8 +2608,6 @@ static bool io_resubmit_prep(struct io_kiocb *req)
 
 	if (!rw)
 		return !io_req_prep_async(req);
-	/* may have left rw->iter inconsistent on -EIOCBQUEUED */
-	iov_iter_revert(&rw->iter, req->result - iov_iter_count(&rw->iter));
 	return true;
 }
 
@@ -3431,14 +3429,45 @@ static bool need_read_all(struct io_kiocb *req)
 		S_ISBLK(file_inode(req->file)->i_mode);
 }
 
+/*
+ * Stash the items we need to restore an iov_iter after a partial or
+ * -EAGAIN'ed result.
+ */
+struct iov_store {
+	ssize_t io_size;
+	size_t iov_offset;
+	unsigned long nr_segs;
+	const void *ptr;
+};
+
+static void io_iter_reset(struct iov_iter *iter, struct iov_store *store,
+			  ssize_t did_bytes)
+{
+	iter->count = store->io_size;
+	iter->iov_offset = store->iov_offset;
+	iter->nr_segs = store->nr_segs;
+	iter->iov = store->ptr;
+	if (did_bytes > 0)
+		iov_iter_advance(iter, did_bytes);
+}
+
+static void io_iov_store(struct iov_store *store, struct iov_iter *iter)
+{
+	store->io_size = iov_iter_count(iter);
+	store->iov_offset = iter->iov_offset;
+	store->nr_segs = iter->nr_segs;
+	store->ptr = iter->iov;
+}
+
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct kiocb *kiocb = &req->rw.kiocb;
 	struct iov_iter __iter, *iter = &__iter;
 	struct io_async_rw *rw = req->async_data;
-	ssize_t io_size, ret, ret2;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+	struct iov_store store;
+	ssize_t ret, ret2;
 
 	if (rw) {
 		iter = &rw->iter;
@@ -3448,8 +3477,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		if (ret < 0)
 			return ret;
 	}
-	io_size = iov_iter_count(iter);
-	req->result = io_size;
+	io_iov_store(&store, iter);
+	req->result = store.io_size;
 
 	/* Ensure we clear previously set non-block flag */
 	if (!force_nonblock)
@@ -3463,7 +3492,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		return ret ?: -EAGAIN;
 	}
 
-	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
+	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), store.io_size);
 	if (unlikely(ret)) {
 		kfree(iovec);
 		return ret;
@@ -3479,18 +3508,17 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		/* no retry on NONBLOCK nor RWF_NOWAIT */
 		if (req->flags & REQ_F_NOWAIT)
 			goto done;
-		/* some cases will consume bytes even on error returns */
-		iov_iter_reexpand(iter, iter->count + iter->truncated);
-		iov_iter_revert(iter, io_size - iov_iter_count(iter));
 		ret = 0;
 	} else if (ret == -EIOCBQUEUED) {
 		goto out_free;
-	} else if (ret <= 0 || ret == io_size || !force_nonblock ||
+	} else if (ret <= 0 || ret == store.io_size || !force_nonblock ||
 		   (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
 		/* read all, failed, already did sync or don't want to retry */
 		goto done;
 	}
 
+	io_iter_reset(iter, &store, ret);
+
 	ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
 	if (ret2)
 		return ret2;
@@ -3501,7 +3529,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	iter = &rw->iter;
 
 	do {
-		io_size -= ret;
+		store.io_size -= ret;
 		rw->bytes_done += ret;
 		/* if we can retry, do so with the callbacks armed */
 		if (!io_rw_should_retry(req)) {
@@ -3520,7 +3548,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 			return 0;
 		/* we got some bytes, but not all. retry. */
 		kiocb->ki_flags &= ~IOCB_WAITQ;
-	} while (ret > 0 && ret < io_size);
+	} while (ret > 0 && ret < store.io_size);
 done:
 	kiocb_done(kiocb, ret, issue_flags);
 out_free:
@@ -3543,8 +3571,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	struct kiocb *kiocb = &req->rw.kiocb;
 	struct iov_iter __iter, *iter = &__iter;
 	struct io_async_rw *rw = req->async_data;
-	ssize_t ret, ret2, io_size;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+	struct iov_store store;
+	ssize_t ret, ret2;
 
 	if (rw) {
 		iter = &rw->iter;
@@ -3554,8 +3583,10 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		if (ret < 0)
 			return ret;
 	}
-	io_size = iov_iter_count(iter);
-	req->result = io_size;
+
+	io_iov_store(&store, iter);
+	req->result = store.io_size;
+	ret2 = 0;
 
 	/* Ensure we clear previously set non-block flag */
 	if (!force_nonblock)
@@ -3572,7 +3603,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	    (req->flags & REQ_F_ISREG))
 		goto copy_iov;
 
-	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size);
+	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), store.io_size);
 	if (unlikely(ret))
 		goto out_free;
 
@@ -3619,9 +3650,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb_done(kiocb, ret2, issue_flags);
 	} else {
 copy_iov:
-		/* some cases will consume bytes even on error returns */
-		iov_iter_reexpand(iter, iter->count + iter->truncated);
-		iov_iter_revert(iter, io_size - iov_iter_count(iter));
+		io_iter_reset(iter, &store, ret2);
 		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
 		return ret ?: -EAGAIN;
 	}

-- 
Jens Axboe


  reply	other threads:[~2021-09-10 13:57 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-09  4:22 [git pull] iov_iter fixes Al Viro
2021-09-09 19:37 ` Linus Torvalds
2021-09-09 21:19   ` Jens Axboe
2021-09-09 21:39     ` Jens Axboe
2021-09-09 21:56       ` Linus Torvalds
2021-09-09 22:21         ` Jens Axboe
2021-09-09 22:56           ` Linus Torvalds
2021-09-10  1:35             ` Jens Axboe
2021-09-10  2:43               ` Jens Axboe
2021-09-10  2:48               ` Al Viro
2021-09-10  3:06                 ` Jens Axboe
2021-09-10  3:15                   ` Al Viro
2021-09-10  3:23                     ` Jens Axboe
2021-09-10  3:24                     ` Al Viro
2021-09-10  3:28                       ` Jens Axboe
2021-09-13 15:29                 ` David Laight
2021-09-09 21:42     ` Dave Chinner
2021-09-10  2:57     ` Al Viro
2021-09-10  3:05       ` Jens Axboe
2021-09-10  3:11         ` Al Viro
2021-09-10  3:22           ` Jens Axboe
2021-09-10  3:27             ` Al Viro
2021-09-10  3:30               ` Jens Axboe
2021-09-10  3:36                 ` Al Viro
2021-09-10 13:57                   ` Jens Axboe [this message]
2021-09-10 14:42                     ` Al Viro
2021-09-10 15:08                       ` Jens Axboe
2021-09-10 15:32                         ` Al Viro
2021-09-10 15:36                           ` Jens Axboe
2021-09-10 15:04                     ` Jens Axboe
2021-09-10 16:06                       ` Jens Axboe
2021-09-10 16:44                         ` Linus Torvalds
2021-09-10 16:56                         ` Al Viro
2021-09-10 16:58                           ` Linus Torvalds
2021-09-10 17:26                             ` Jens Axboe
2021-09-10 17:31                               ` Linus Torvalds
2021-09-10 17:32                                 ` Jens Axboe
2021-09-10 18:48                                 ` Al Viro
2021-09-10 19:04                                   ` Linus Torvalds
2021-09-10 19:10                                     ` Linus Torvalds
2021-09-10 19:10                                   ` Jens Axboe
2021-09-10 17:04                           ` Jens Axboe
2021-09-09 22:54   ` Pavel Begunkov
2021-09-09 22:57     ` Pavel Begunkov
2021-09-09 23:14   ` Pavel Begunkov
2021-09-09 20:03 ` pr-tracker-bot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9855f69b-e67e-f7d9-88b8-8941666ab02f@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=asml.silence@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).