io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] io_uring: consistent behaviour with linked read/write
@ 2022-02-17 15:58 Dylan Yudaken
  2022-02-17 15:58 ` [PATCH 1/3] io_uring: remove duplicated calls to io_kiocb_ppos Dylan Yudaken
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Dylan Yudaken @ 2022-02-17 15:58 UTC (permalink / raw)
  To: Jens Axboe, Pavel Begunkov, io-uring; +Cc: Dylan Yudaken

Currently submitting multiple read/write for one file with IOSQE_IO_LINK
and offset = -1 will not behave as if calling read(2)/write(2) multiple
times. The offset may be pinned to the same value for each submission (for
example if they are punted to the async worker) and so each read/write will
have the same offset.

This patchset fixes this by grabbing the file position at execution time,
rather than when the job is queued to be run.

A test for this will be submitted to liburing separately.

Worth noting that this does not purposefully change the result of
submitting multiple read/write without IOSQE_IO_LINK (for example as in
[1]). But then I do not know what the correct approach should be when
submitting multiple r/w without any explicit ordering.

[1]: https://lore.kernel.org/io-uring/8a9e55bf-3195-5282-2907-41b2f2b23cc8@kernel.dk/

Dylan Yudaken (3):
  io_uring: remove duplicated calls to io_kiocb_ppos
  io_uring: update kiocb->ki_pos at execution time
  io_uring: do not recalculate ppos unnecessarily

 fs/io_uring.c | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)


base-commit: 754e0b0e35608ed5206d6a67a791563c631cec07
-- 
2.30.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] io_uring: remove duplicated calls to io_kiocb_ppos
  2022-02-17 15:58 [PATCH 0/3] io_uring: consistent behaviour with linked read/write Dylan Yudaken
@ 2022-02-17 15:58 ` Dylan Yudaken
  2022-02-17 15:58 ` [PATCH 2/3] io_uring: update kiocb->ki_pos at execution time Dylan Yudaken
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Dylan Yudaken @ 2022-02-17 15:58 UTC (permalink / raw)
  To: Jens Axboe, Pavel Begunkov, io-uring; +Cc: Dylan Yudaken

io_kiocb_ppos is called in both branches, and it seems that the compiler
does not fuse this. Fusing removes a few bytes from loop_rw_iter.

Before:
$ nm -S fs/io_uring.o | grep loop_rw_iter
0000000000002430 0000000000000124 t loop_rw_iter

After:
$ nm -S fs/io_uring.o | grep loop_rw_iter
0000000000002430 000000000000010d t loop_rw_iter

Signed-off-by: Dylan Yudaken <dylany@fb.com>
---
 fs/io_uring.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 77b9c7e4793b..1f9b4466c269 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3400,6 +3400,7 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
 	struct kiocb *kiocb = &req->rw.kiocb;
 	struct file *file = req->file;
 	ssize_t ret = 0;
+	loff_t *ppos;
 
 	/*
 	 * Don't support polled IO through this interface, and we can't
@@ -3412,6 +3413,8 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
 	    !(kiocb->ki_filp->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
+	ppos = io_kiocb_ppos(kiocb);
+
 	while (iov_iter_count(iter)) {
 		struct iovec iovec;
 		ssize_t nr;
@@ -3425,10 +3428,10 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
 
 		if (rw == READ) {
 			nr = file->f_op->read(file, iovec.iov_base,
-					      iovec.iov_len, io_kiocb_ppos(kiocb));
+					      iovec.iov_len, ppos);
 		} else {
 			nr = file->f_op->write(file, iovec.iov_base,
-					       iovec.iov_len, io_kiocb_ppos(kiocb));
+					       iovec.iov_len, ppos);
 		}
 
 		if (nr < 0) {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] io_uring: update kiocb->ki_pos at execution time
  2022-02-17 15:58 [PATCH 0/3] io_uring: consistent behaviour with linked read/write Dylan Yudaken
  2022-02-17 15:58 ` [PATCH 1/3] io_uring: remove duplicated calls to io_kiocb_ppos Dylan Yudaken
@ 2022-02-17 15:58 ` Dylan Yudaken
  2022-02-17 15:58 ` [PATCH 3/3] io_uring: do not recalculate ppos unnecessarily Dylan Yudaken
  2022-02-17 19:45 ` [PATCH 0/3] io_uring: consistent behaviour with linked read/write Jens Axboe
  3 siblings, 0 replies; 7+ messages in thread
From: Dylan Yudaken @ 2022-02-17 15:58 UTC (permalink / raw)
  To: Jens Axboe, Pavel Begunkov, io-uring; +Cc: Dylan Yudaken

Update kiocb->ki_pos at execution time rather than in io_prep_rw().
io_prep_rw() happens before the job is enqueued to a worker and so the
offset might be read multiple times before being executed once.

Ensures that the file position in a set of _linked_ SQEs will be only
obtained after earlier SQEs have completed, and so will include their
incremented file position.

Signed-off-by: Dylan Yudaken <dylany@fb.com>
---
 fs/io_uring.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1f9b4466c269..6644d3d87934 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3000,14 +3000,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
 
 	kiocb->ki_pos = READ_ONCE(sqe->off);
-	if (kiocb->ki_pos == -1) {
-		if (!(file->f_mode & FMODE_STREAM)) {
-			req->flags |= REQ_F_CUR_POS;
-			kiocb->ki_pos = file->f_pos;
-		} else {
-			kiocb->ki_pos = 0;
-		}
-	}
 	kiocb->ki_flags = iocb_flags(file);
 	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
 	if (unlikely(ret))
@@ -3624,6 +3616,19 @@ static bool need_read_all(struct io_kiocb *req)
 		S_ISBLK(file_inode(req->file)->i_mode);
 }
 
+static inline void
+io_kiocb_update_pos(struct io_kiocb *req, struct kiocb *kiocb)
+{
+	if (kiocb->ki_pos == -1) {
+		if (!(req->file->f_mode & FMODE_STREAM)) {
+			req->flags |= REQ_F_CUR_POS;
+			kiocb->ki_pos = req->file->f_pos;
+		} else {
+			kiocb->ki_pos = 0;
+		}
+	}
+}
+
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_rw_state __s, *s = &__s;
@@ -3662,6 +3667,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
+	io_kiocb_update_pos(req, kiocb);
+
 	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
 	if (unlikely(ret)) {
 		kfree(iovec);
@@ -3791,6 +3798,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
+	io_kiocb_update_pos(req, kiocb);
+
 	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
 	if (unlikely(ret))
 		goto out_free;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] io_uring: do not recalculate ppos unnecessarily
  2022-02-17 15:58 [PATCH 0/3] io_uring: consistent behaviour with linked read/write Dylan Yudaken
  2022-02-17 15:58 ` [PATCH 1/3] io_uring: remove duplicated calls to io_kiocb_ppos Dylan Yudaken
  2022-02-17 15:58 ` [PATCH 2/3] io_uring: update kiocb->ki_pos at execution time Dylan Yudaken
@ 2022-02-17 15:58 ` Dylan Yudaken
  2022-02-17 19:45 ` [PATCH 0/3] io_uring: consistent behaviour with linked read/write Jens Axboe
  3 siblings, 0 replies; 7+ messages in thread
From: Dylan Yudaken @ 2022-02-17 15:58 UTC (permalink / raw)
  To: Jens Axboe, Pavel Begunkov, io-uring; +Cc: Dylan Yudaken

There is a slight optimisation to be had by calculating the correct pos
pointer inside io_kiocb_update_pos and then using that later.

It seems code size drops by a bit:
000000000000a1b0 0000000000000400 t io_read
000000000000a5b0 0000000000000319 t io_write

vs
000000000000a1b0 00000000000003f6 t io_read
000000000000a5b0 0000000000000310 t io_write

Signed-off-by: Dylan Yudaken <dylany@fb.com>
---
 fs/io_uring.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6644d3d87934..6c096fa24c82 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3616,17 +3616,21 @@ static bool need_read_all(struct io_kiocb *req)
 		S_ISBLK(file_inode(req->file)->i_mode);
 }
 
-static inline void
+static inline loff_t*
 io_kiocb_update_pos(struct io_kiocb *req, struct kiocb *kiocb)
 {
+	bool is_stream = req->file->f_mode & FMODE_STREAM;
 	if (kiocb->ki_pos == -1) {
-		if (!(req->file->f_mode & FMODE_STREAM)) {
+		if (!is_stream) {
 			req->flags |= REQ_F_CUR_POS;
 			kiocb->ki_pos = req->file->f_pos;
+			return &kiocb->ki_pos;
 		} else {
 			kiocb->ki_pos = 0;
+			return NULL;
 		}
 	}
+	return is_stream ? NULL : &kiocb->ki_pos;
 }
 
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
@@ -3637,6 +3641,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 	struct io_async_rw *rw;
 	ssize_t ret, ret2;
+	loff_t *ppos;
 
 	if (!req_has_async_data(req)) {
 		ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
@@ -3667,9 +3672,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
-	io_kiocb_update_pos(req, kiocb);
+	ppos = io_kiocb_update_pos(req, kiocb);
 
-	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
+	ret = rw_verify_area(READ, req->file, ppos, req->result);
 	if (unlikely(ret)) {
 		kfree(iovec);
 		return ret;
@@ -3768,6 +3773,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	struct kiocb *kiocb = &req->rw.kiocb;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 	ssize_t ret, ret2;
+	loff_t *ppos;
 
 	if (!req_has_async_data(req)) {
 		ret = io_import_iovec(WRITE, req, &iovec, s, issue_flags);
@@ -3798,9 +3804,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
-	io_kiocb_update_pos(req, kiocb);
+	ppos = io_kiocb_update_pos(req, kiocb);
 
-	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
+	ret = rw_verify_area(WRITE, req->file, ppos, req->result);
 	if (unlikely(ret))
 		goto out_free;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] io_uring: consistent behaviour with linked read/write
  2022-02-17 15:58 [PATCH 0/3] io_uring: consistent behaviour with linked read/write Dylan Yudaken
                   ` (2 preceding siblings ...)
  2022-02-17 15:58 ` [PATCH 3/3] io_uring: do not recalculate ppos unnecessarily Dylan Yudaken
@ 2022-02-17 19:45 ` Jens Axboe
  2022-02-18 17:20   ` Dylan Yudaken
  3 siblings, 1 reply; 7+ messages in thread
From: Jens Axboe @ 2022-02-17 19:45 UTC (permalink / raw)
  To: Dylan Yudaken, Pavel Begunkov, io-uring

On 2/17/22 8:58 AM, Dylan Yudaken wrote:
> Currently submitting multiple read/write for one file with IOSQE_IO_LINK
> and offset = -1 will not behave as if calling read(2)/write(2) multiple
> times. The offset may be pinned to the same value for each submission (for
> example if they are punted to the async worker) and so each read/write will
> have the same offset.
> 
> This patchset fixes this by grabbing the file position at execution time,
> rather than when the job is queued to be run.
> 
> A test for this will be submitted to liburing separately.
> 
> Worth noting that this does not purposefully change the result of
> submitting multiple read/write without IOSQE_IO_LINK (for example as in
> [1]). But then I do not know what the correct approach should be when
> submitting multiple r/w without any explicit ordering.
> 
> [1]: https://lore.kernel.org/io-uring/8a9e55bf-3195-5282-2907-41b2f2b23cc8@kernel.dk/

I think this series looks great, clean and to the point. My only real
question is one you reference here already, which is the fpos locking
that we really should get done. Care to respin the referenced patch on
top of this series? Would hate to make that part harder...

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] io_uring: consistent behaviour with linked read/write
  2022-02-17 19:45 ` [PATCH 0/3] io_uring: consistent behaviour with linked read/write Jens Axboe
@ 2022-02-18 17:20   ` Dylan Yudaken
  2022-02-18 17:25     ` Jens Axboe
  0 siblings, 1 reply; 7+ messages in thread
From: Dylan Yudaken @ 2022-02-18 17:20 UTC (permalink / raw)
  To: axboe, asml.silence, io-uring

On Thu, 2022-02-17 at 12:45 -0700, Jens Axboe wrote:
> On 2/17/22 8:58 AM, Dylan Yudaken wrote:
> > Currently submitting multiple read/write for one file with
> > IOSQE_IO_LINK
> > and offset = -1 will not behave as if calling read(2)/write(2)
> > multiple
> > times. The offset may be pinned to the same value for each
> > submission (for
> > example if they are punted to the async worker) and so each
> > read/write will
> > have the same offset.
> > 
> > This patchset fixes this by grabbing the file position at execution
> > time,
> > rather than when the job is queued to be run.
> > 
> > A test for this will be submitted to liburing separately.
> > 
> > Worth noting that this does not purposefully change the result of
> > submitting multiple read/write without IOSQE_IO_LINK (for example
> > as in
> > [1]). But then I do not know what the correct approach should be
> > when
> > submitting multiple r/w without any explicit ordering.
> > 
> > [1]:
> > https://lore.kernel.org/io-uring/8a9e55bf-3195-5282-2907-41b2f2b23cc8@kernel.dk/
> 
> I think this series looks great, clean and to the point. My only real
> question is one you reference here already, which is the fpos locking
> that we really should get done. Care to respin the referenced patch
> on
> top of this series? Would hate to make that part harder...
> 

Sure, I will try and figure that out and add it to the series.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] io_uring: consistent behaviour with linked read/write
  2022-02-18 17:20   ` Dylan Yudaken
@ 2022-02-18 17:25     ` Jens Axboe
  0 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2022-02-18 17:25 UTC (permalink / raw)
  To: Dylan Yudaken, asml.silence, io-uring

On 2/18/22 10:20 AM, Dylan Yudaken wrote:
> On Thu, 2022-02-17 at 12:45 -0700, Jens Axboe wrote:
>> On 2/17/22 8:58 AM, Dylan Yudaken wrote:
>>> Currently submitting multiple read/write for one file with
>>> IOSQE_IO_LINK
>>> and offset = -1 will not behave as if calling read(2)/write(2)
>>> multiple
>>> times. The offset may be pinned to the same value for each
>>> submission (for
>>> example if they are punted to the async worker) and so each
>>> read/write will
>>> have the same offset.
>>>
>>> This patchset fixes this by grabbing the file position at execution
>>> time,
>>> rather than when the job is queued to be run.
>>>
>>> A test for this will be submitted to liburing separately.
>>>
>>> Worth noting that this does not purposefully change the result of
>>> submitting multiple read/write without IOSQE_IO_LINK (for example
>>> as in
>>> [1]). But then I do not know what the correct approach should be
>>> when
>>> submitting multiple r/w without any explicit ordering.
>>>
>>> [1]:
>>> https://lore.kernel.org/io-uring/8a9e55bf-3195-5282-2907-41b2f2b23cc8@kernel.dk/
>>
>> I think this series looks great, clean and to the point. My only real
>> question is one you reference here already, which is the fpos locking
>> that we really should get done. Care to respin the referenced patch
>> on
>> top of this series? Would hate to make that part harder...
>>
> 
> Sure, I will try and figure that out and add it to the series.

Just for full public disclosure, the later version of the above patch is
below.

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fb2a0cb4aaf8..ab8a4002e0eb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -726,6 +726,7 @@ enum {
 	REQ_F_FAIL_BIT		= 8,
 	REQ_F_INFLIGHT_BIT,
 	REQ_F_CUR_POS_BIT,
+	REQ_F_CUR_POS_SET_BIT,
 	REQ_F_NOWAIT_BIT,
 	REQ_F_LINK_TIMEOUT_BIT,
 	REQ_F_NEED_CLEANUP_BIT,
@@ -765,6 +766,8 @@ enum {
 	REQ_F_INFLIGHT		= BIT(REQ_F_INFLIGHT_BIT),
 	/* read/write uses file position */
 	REQ_F_CUR_POS		= BIT(REQ_F_CUR_POS_BIT),
+	/* request is holding file position lock */
+	REQ_F_CUR_POS_SET	= BIT(REQ_F_CUR_POS_SET_BIT),
 	/* must not punt to workers */
 	REQ_F_NOWAIT		= BIT(REQ_F_NOWAIT_BIT),
 	/* has or had linked timeout */
@@ -2070,6 +2073,8 @@ static __cold void __io_free_req(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 
+	WARN_ON_ONCE(req->flags & REQ_F_CUR_POS_SET);
+
 	io_req_put_rsrc(req, ctx);
 	io_dismantle_req(req);
 	io_put_task(req->task, 1);
@@ -2715,6 +2720,8 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
 		req_set_fail(req);
 		req->result = res;
 	}
+	if (req->flags & REQ_F_CUR_POS && res > 0)
+		req->file->f_pos += res;
 	return false;
 }
 
@@ -2892,12 +2899,15 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	kiocb->ki_pos = READ_ONCE(sqe->off);
 	if (kiocb->ki_pos == -1) {
-		if (!(file->f_mode & FMODE_STREAM)) {
+		/*
+		 * If we end up using the current file position, just set the
+		 * flag and the actual file position will be read in the op
+		 * handler themselves.
+		 */
+		if (!(file->f_mode & FMODE_STREAM))
 			req->flags |= REQ_F_CUR_POS;
-			kiocb->ki_pos = file->f_pos;
-		} else {
+		else
 			kiocb->ki_pos = 0;
-		}
 	}
 	kiocb->ki_flags = iocb_flags(file);
 	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
@@ -2979,8 +2989,6 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 			ret += io->bytes_done;
 	}
 
-	if (req->flags & REQ_F_CUR_POS)
-		req->file->f_pos = kiocb->ki_pos;
 	if (ret >= 0 && (kiocb->ki_complete == io_complete_rw))
 		__io_complete_rw(req, ret, 0, issue_flags);
 	else
@@ -3515,6 +3523,37 @@ static bool need_read_all(struct io_kiocb *req)
 		S_ISBLK(file_inode(req->file)->i_mode);
 }
 
+static bool io_file_pos_lock(struct io_kiocb *req, bool force_nonblock)
+{
+	struct file *file = req->file;
+
+	if (!(req->flags & REQ_F_CUR_POS))
+		return false;
+	WARN_ON_ONCE(req->flags & REQ_F_CUR_POS_SET);
+	if (file->f_mode & FMODE_ATOMIC_POS && file_count(file) > 1) {
+		if (force_nonblock) {
+			if (!mutex_trylock(&file->f_pos_lock))
+				return true;
+		} else {
+			mutex_lock(&file->f_pos_lock);
+		}
+		req->flags |= REQ_F_CUR_POS_SET;
+	}
+	req->rw.kiocb.ki_pos = file->f_pos;
+	file->f_pos += req->result;
+	return false;
+}
+
+static void io_file_pos_unlock(struct io_kiocb *req, ssize_t adjust)
+{
+	if (!(req->flags & REQ_F_CUR_POS_SET))
+		return;
+	if (adjust > 0)
+		req->file->f_pos -= adjust;
+	req->flags &= ~REQ_F_CUR_POS_SET;
+	__f_unlock_pos(req->file);
+}
+
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_rw_state __s, *s = &__s;
@@ -3543,18 +3582,22 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (force_nonblock) {
 		/* If the file doesn't support async, just async punt */
-		if (unlikely(!io_file_supports_nowait(req))) {
+		if (unlikely(!io_file_supports_nowait(req) ||
+			     io_file_pos_lock(req, true))) {
 			ret = io_setup_async_rw(req, iovec, s, true);
+			WARN_ON_ONCE(req->flags & REQ_F_CUR_POS_SET);
 			return ret ?: -EAGAIN;
 		}
 		kiocb->ki_flags |= IOCB_NOWAIT;
 	} else {
 		/* Ensure we clear previously set non-block flag */
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
+		io_file_pos_lock(req, false);
 	}
 
 	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
 	if (unlikely(ret)) {
+		io_file_pos_unlock(req, req->result);
 		kfree(iovec);
 		return ret;
 	}
@@ -3565,10 +3608,10 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		req->flags &= ~REQ_F_REISSUE;
 		/* IOPOLL retry should happen for io-wq threads */
 		if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
-			goto done;
+			goto out_unlock;
 		/* no retry on NONBLOCK nor RWF_NOWAIT */
 		if (req->flags & REQ_F_NOWAIT)
-			goto done;
+			goto out_unlock;
 		ret = 0;
 	} else if (ret == -EIOCBQUEUED) {
 		goto out_free;
@@ -3586,8 +3629,10 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	iov_iter_restore(&s->iter, &s->iter_state);
 
 	ret2 = io_setup_async_rw(req, iovec, s, true);
-	if (ret2)
+	if (ret2) {
+		io_file_pos_unlock(req, ret);
 		return ret2;
+	}
 
 	iovec = NULL;
 	rw = req->async_data;
@@ -3612,6 +3657,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		/* if we can retry, do so with the callbacks armed */
 		if (!io_rw_should_retry(req)) {
 			kiocb->ki_flags &= ~IOCB_WAITQ;
+			io_file_pos_unlock(req, req->result - rw->bytes_done);
 			return -EAGAIN;
 		}
 
@@ -3622,19 +3668,26 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		 * here, and if we do, then just retry at the new offset.
 		 */
 		ret = io_iter_do_read(req, &s->iter);
-		if (ret == -EIOCBQUEUED)
+		if (ret == -EIOCBQUEUED) {
+			io_file_pos_unlock(req, ret);
 			return 0;
+		}
 		/* we got some bytes, but not all. retry. */
 		kiocb->ki_flags &= ~IOCB_WAITQ;
 		iov_iter_restore(&s->iter, &s->iter_state);
 	} while (ret > 0);
+	io_file_pos_unlock(req, req->result - rw->bytes_done);
 done:
 	kiocb_done(kiocb, ret, issue_flags);
 out_free:
 	/* it's faster to check here then delegate to kfree */
 	if (iovec)
 		kfree(iovec);
+	io_file_pos_unlock(req, 0);
 	return 0;
+out_unlock:
+	io_file_pos_unlock(req, ret);
+	goto done;
 }
 
 static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -3668,7 +3721,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (force_nonblock) {
 		/* If the file doesn't support async, just async punt */
-		if (unlikely(!io_file_supports_nowait(req)))
+		if (unlikely(!io_file_supports_nowait(req) ||
+			     io_file_pos_lock(req, true)))
 			goto copy_iov;
 
 		/* file path doesn't support NOWAIT for non-direct_IO */
@@ -3680,6 +3734,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	} else {
 		/* Ensure we clear previously set non-block flag */
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
+		io_file_pos_lock(req, false);
 	}
 
 	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);


-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-02-18 17:25 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-17 15:58 [PATCH 0/3] io_uring: consistent behaviour with linked read/write Dylan Yudaken
2022-02-17 15:58 ` [PATCH 1/3] io_uring: remove duplicated calls to io_kiocb_ppos Dylan Yudaken
2022-02-17 15:58 ` [PATCH 2/3] io_uring: update kiocb->ki_pos at execution time Dylan Yudaken
2022-02-17 15:58 ` [PATCH 3/3] io_uring: do not recalculate ppos unnecessarily Dylan Yudaken
2022-02-17 19:45 ` [PATCH 0/3] io_uring: consistent behaviour with linked read/write Jens Axboe
2022-02-18 17:20   ` Dylan Yudaken
2022-02-18 17:25     ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).