All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] io_uring: improve current file position IO
@ 2021-12-24 14:35 Jens Axboe
  2022-01-03 15:17 ` Jann Horn
  0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2021-12-24 14:35 UTC (permalink / raw)
  To: io-uring

io_uring should be protecting the current file position with the
file position lock, ->f_pos_lock. Grab and track the lock state when
the request is being issued, and make the unlock part of request
cleaning.

Fixes: ba04291eb66e ("io_uring: allow use of offset == -1 to mean file position")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

Main thing I don't like here:

- We're holding the f_pos_lock across the kernel/user boundary, as
  it's held for the duration of the IO. Alternatively we could
  keep it local to io_read() and io_write() and lose REQ_F_CUR_POS_LOCK,
  but will messy up those functions more and add more items to the
  fast path (which current position read/write definitely is not).

Suggestions welcome...


diff --git a/fs/io_uring.c b/fs/io_uring.c
index fb2a0cb4aaf8..6f7713ab2eda 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -112,7 +112,7 @@
 
 #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
 				REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
-				REQ_F_ASYNC_DATA)
+				REQ_F_ASYNC_DATA | REQ_F_CUR_POS_LOCK)
 
 #define IO_TCTX_REFS_CACHE_NR	(1U << 10)
 
@@ -726,6 +726,7 @@ enum {
 	REQ_F_FAIL_BIT		= 8,
 	REQ_F_INFLIGHT_BIT,
 	REQ_F_CUR_POS_BIT,
+	REQ_F_CUR_POS_LOCK_BIT,
 	REQ_F_NOWAIT_BIT,
 	REQ_F_LINK_TIMEOUT_BIT,
 	REQ_F_NEED_CLEANUP_BIT,
@@ -765,6 +766,8 @@ enum {
 	REQ_F_INFLIGHT		= BIT(REQ_F_INFLIGHT_BIT),
 	/* read/write uses file position */
 	REQ_F_CUR_POS		= BIT(REQ_F_CUR_POS_BIT),
+	/* request is holding file position lock */
+	REQ_F_CUR_POS_LOCK	= BIT(REQ_F_CUR_POS_LOCK_BIT),
 	/* must not punt to workers */
 	REQ_F_NOWAIT		= BIT(REQ_F_NOWAIT_BIT),
 	/* has or had linked timeout */
@@ -2715,6 +2718,8 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
 		req_set_fail(req);
 		req->result = res;
 	}
+	if (req->flags & REQ_F_CUR_POS && res > 0)
+		req->file->f_pos += res;
 	return false;
 }
 
@@ -2892,12 +2897,15 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	kiocb->ki_pos = READ_ONCE(sqe->off);
 	if (kiocb->ki_pos == -1) {
-		if (!(file->f_mode & FMODE_STREAM)) {
+		/*
+		 * If we end up using the current file position, just set the
+		 * flag and the actual file position will be read in the op
+		 * handler themselves.
+		 */
+		if (!(file->f_mode & FMODE_STREAM))
 			req->flags |= REQ_F_CUR_POS;
-			kiocb->ki_pos = file->f_pos;
-		} else {
+		else
 			kiocb->ki_pos = 0;
-		}
 	}
 	kiocb->ki_flags = iocb_flags(file);
 	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
@@ -2979,8 +2987,6 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 			ret += io->bytes_done;
 	}
 
-	if (req->flags & REQ_F_CUR_POS)
-		req->file->f_pos = kiocb->ki_pos;
 	if (ret >= 0 && (kiocb->ki_complete == io_complete_rw))
 		__io_complete_rw(req, ret, 0, issue_flags);
 	else
@@ -3515,6 +3521,27 @@ static bool need_read_all(struct io_kiocb *req)
 		S_ISBLK(file_inode(req->file)->i_mode);
 }
 
+static bool io_file_pos_lock(struct io_kiocb *req, bool force_nonblock)
+{
+	struct file *file = req->file;
+
+	if (!(req->flags & REQ_F_CUR_POS))
+		return false;
+	if (req->flags & REQ_F_CUR_POS_LOCK)
+		return false;
+	if (file->f_mode & FMODE_ATOMIC_POS && file_count(file) > 1) {
+		if (force_nonblock) {
+			if (!mutex_trylock(&file->f_pos_lock))
+				return true;
+		} else {
+			mutex_lock(&file->f_pos_lock);
+		}
+		req->flags |= REQ_F_CUR_POS_LOCK;
+	}
+	req->rw.kiocb.ki_pos = file->f_pos;
+	return false;
+}
+
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_rw_state __s, *s = &__s;
@@ -3543,7 +3570,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (force_nonblock) {
 		/* If the file doesn't support async, just async punt */
-		if (unlikely(!io_file_supports_nowait(req))) {
+		if (unlikely(!io_file_supports_nowait(req) ||
+			     io_file_pos_lock(req, true))) {
 			ret = io_setup_async_rw(req, iovec, s, true);
 			return ret ?: -EAGAIN;
 		}
@@ -3551,6 +3579,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	} else {
 		/* Ensure we clear previously set non-block flag */
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
+		io_file_pos_lock(req, false);
 	}
 
 	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
@@ -3668,7 +3697,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (force_nonblock) {
 		/* If the file doesn't support async, just async punt */
-		if (unlikely(!io_file_supports_nowait(req)))
+		if (unlikely(!io_file_supports_nowait(req) ||
+			     io_file_pos_lock(req, true)))
 			goto copy_iov;
 
 		/* file path doesn't support NOWAIT for non-direct_IO */
@@ -3680,6 +3710,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	} else {
 		/* Ensure we clear previously set non-block flag */
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
+		io_file_pos_lock(req, false);
 	}
 
 	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
@@ -6584,6 +6615,8 @@ static void io_clean_op(struct io_kiocb *req)
 		kfree(req->kbuf);
 		req->kbuf = NULL;
 	}
+	if (req->flags & REQ_F_CUR_POS_LOCK)
+		__f_unlock_pos(req->file);
 
 	if (req->flags & REQ_F_NEED_CLEANUP) {
 		switch (req->opcode) {

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH RFC] io_uring: improve current file position IO
  2021-12-24 14:35 [PATCH RFC] io_uring: improve current file position IO Jens Axboe
@ 2022-01-03 15:17 ` Jann Horn
  2022-01-03 15:22   ` Jens Axboe
  0 siblings, 1 reply; 3+ messages in thread
From: Jann Horn @ 2022-01-03 15:17 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Fri, Dec 24, 2021 at 3:35 PM Jens Axboe <axboe@kernel.dk> wrote:
> io_uring should be protecting the current file position with the
> file position lock, ->f_pos_lock. Grab and track the lock state when
> the request is being issued, and make the unlock part of request
> cleaning.
>
> Fixes: ba04291eb66e ("io_uring: allow use of offset == -1 to mean file position")
> Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>
> ---
>
> Main thing I don't like here:
>
> - We're holding the f_pos_lock across the kernel/user boundary, as
>   it's held for the duration of the IO. Alternatively we could
>   keep it local to io_read() and io_write() and lose REQ_F_CUR_POS_LOCK,
>   but will messy up those functions more and add more items to the
>   fast path (which current position read/write definitely is not).
>
> Suggestions welcome...

Oh, that's not pretty... is it guaranteed that the
__f_unlock_pos(req->file) will happen in the same task as the
io_file_pos_lock(req, false), and have you tried running this with
lockdep and mutex debugging enabled? Could a task deadlock if it tried
to do a read() on a file while io_uring is already holding the
position lock?

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH RFC] io_uring: improve current file position IO
  2022-01-03 15:17 ` Jann Horn
@ 2022-01-03 15:22   ` Jens Axboe
  0 siblings, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2022-01-03 15:22 UTC (permalink / raw)
  To: Jann Horn; +Cc: io-uring

On 1/3/22 7:17 AM, Jann Horn wrote:
> On Fri, Dec 24, 2021 at 3:35 PM Jens Axboe <axboe@kernel.dk> wrote:
>> io_uring should be protecting the current file position with the
>> file position lock, ->f_pos_lock. Grab and track the lock state when
>> the request is being issued, and make the unlock part of request
>> cleaning.
>>
>> Fixes: ba04291eb66e ("io_uring: allow use of offset == -1 to mean file position")
>> Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>
>> ---
>>
>> Main thing I don't like here:
>>
>> - We're holding the f_pos_lock across the kernel/user boundary, as
>>   it's held for the duration of the IO. Alternatively we could
>>   keep it local to io_read() and io_write() and lose REQ_F_CUR_POS_LOCK,
>>   but will messy up those functions more and add more items to the
>>   fast path (which current position read/write definitely is not).
>>
>> Suggestions welcome...
> 
> Oh, that's not pretty... is it guaranteed that the

Right, hence why it's an RFC :-)

> __f_unlock_pos(req->file) will happen in the same task as the
> io_file_pos_lock(req, false), and have you tried running this with

It might unlock from a thread off that task, depends on how the execution
happens. And as it stands, it'll also potentially exit the kernel with
the lock held until it completes.

> lockdep and mutex debugging enabled? Could a task deadlock if it tried
> to do a read() on a file while io_uring is already holding the
> position lock?

lockdep will complain about the leaving the kernel with it held aspect
for sure.

I think the better solution here is, as I suggested in the patch, to
keep it local to io_read() and io_write() rather than try and track it.
Which is a bit annoying in terms of adding mostly useless code to the
fast path, but... Don't think there's a better way.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-01-03 15:22 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-24 14:35 [PATCH RFC] io_uring: improve current file position IO Jens Axboe
2022-01-03 15:17 ` Jann Horn
2022-01-03 15:22   ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.