All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-5.15 0/4] not connected 5.15 patches
@ 2021-08-17 19:28 Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 1/4] io_uring: better encapsulate buffer select for rw Pavel Begunkov
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

A set of not connected patches for 5.15.

Pavel Begunkov (4):
  io_uring: better encapsulate buffer select for rw
  io_uring: reuse io_req_complete_post()
  io_uring: improve tctx_task_work() ctx referencing
  io_uring: improve same wq polling

 fs/io_uring.c | 87 +++++++++++++++++----------------------------------
 1 file changed, 29 insertions(+), 58 deletions(-)

-- 
2.32.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/4] io_uring: better encapsulate buffer select for rw
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
@ 2021-08-17 19:28 ` Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 2/4] io_uring: reuse io_req_complete_post() Pavel Begunkov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Make io_put_rw_kbuf() to do the REQ_F_BUFFER_SELECTED check, so all the
callers don't need to hand code it. The number of places where we call
io_put_rw_kbuf() is growing, so saves some pain.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2d76ca0c7218..29e3ec6e9dbf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2284,6 +2284,8 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
 {
 	struct io_buffer *kbuf;
 
+	if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+		return 0;
 	kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
 	return io_put_kbuf(req, kbuf);
 }
@@ -2313,8 +2315,6 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 
 	io_init_req_batch(&rb);
 	while (!list_empty(done)) {
-		int cflags = 0;
-
 		req = list_first_entry(done, struct io_kiocb, inflight_entry);
 		list_del(&req->inflight_entry);
 
@@ -2325,10 +2325,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 			continue;
 		}
 
-		if (req->flags & REQ_F_BUFFER_SELECTED)
-			cflags = io_put_rw_kbuf(req);
-
-		__io_cqring_fill_event(ctx, req->user_data, req->result, cflags);
+		__io_cqring_fill_event(ctx, req->user_data, req->result,
+					io_put_rw_kbuf(req));
 		(*nr_events)++;
 
 		if (req_ref_put_and_test(req))
@@ -2548,11 +2546,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
 
 static void io_req_task_complete(struct io_kiocb *req)
 {
-	int cflags = 0;
-
-	if (req->flags & REQ_F_BUFFER_SELECTED)
-		cflags = io_put_rw_kbuf(req);
-	__io_req_complete(req, 0, req->result, cflags);
+	__io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
 }
 
 static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
@@ -2820,12 +2814,9 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 		if (io_resubmit_prep(req)) {
 			io_req_task_queue_reissue(req);
 		} else {
-			int cflags = 0;
-
 			req_set_fail(req);
-			if (req->flags & REQ_F_BUFFER_SELECTED)
-				cflags = io_put_rw_kbuf(req);
-			__io_req_complete(req, issue_flags, ret, cflags);
+			__io_req_complete(req, issue_flags, ret,
+					  io_put_rw_kbuf(req));
 		}
 	}
 }
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/4] io_uring: reuse io_req_complete_post()
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 1/4] io_uring: better encapsulate buffer select for rw Pavel Begunkov
@ 2021-08-17 19:28 ` Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 3/4] io_uring: improve tctx_task_work() ctx referencing Pavel Begunkov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

We have io_req_complete_post() to post a CQE and put the request. It
takes care of all synchronisation and is more concise and efficent, so
replace all hancoded occurrences of
"lock; post CQE; unlock; + put_req()" with io_req_complete_post().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 48 +++++++++++-------------------------------------
 1 file changed, 11 insertions(+), 37 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 29e3ec6e9dbf..719d62b6e3d5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5521,16 +5521,8 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
 
 static void io_req_task_timeout(struct io_kiocb *req)
 {
-	struct io_ring_ctx *ctx = req->ctx;
-
-	spin_lock(&ctx->completion_lock);
-	io_cqring_fill_event(ctx, req->user_data, -ETIME, 0);
-	io_commit_cqring(ctx);
-	spin_unlock(&ctx->completion_lock);
-
-	io_cqring_ev_posted(ctx);
 	req_set_fail(req);
-	io_put_req(req);
+	io_req_complete_post(req, -ETIME, 0);
 }
 
 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
@@ -5658,14 +5650,9 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
 					io_translate_timeout_mode(tr->flags));
 	spin_unlock_irq(&ctx->timeout_lock);
 
-	spin_lock(&ctx->completion_lock);
-	io_cqring_fill_event(ctx, req->user_data, ret, 0);
-	io_commit_cqring(ctx);
-	spin_unlock(&ctx->completion_lock);
-	io_cqring_ev_posted(ctx);
 	if (ret < 0)
 		req_set_fail(req);
-	io_put_req(req);
+	io_req_complete_post(req, ret, 0);
 	return 0;
 }
 
@@ -5805,7 +5792,6 @@ static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
 }
 
 static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
-	__acquires(&req->ctx->completion_lock)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
@@ -5813,15 +5799,19 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
 	WARN_ON_ONCE(req->task != current);
 
 	ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
-	spin_lock(&ctx->completion_lock);
 	if (ret != -ENOENT)
 		return ret;
+
+	spin_lock(&ctx->completion_lock);
 	spin_lock_irq(&ctx->timeout_lock);
 	ret = io_timeout_cancel(ctx, sqe_addr);
 	spin_unlock_irq(&ctx->timeout_lock);
 	if (ret != -ENOENT)
-		return ret;
-	return io_poll_cancel(ctx, sqe_addr, false);
+		goto out;
+	ret = io_poll_cancel(ctx, sqe_addr, false);
+out:
+	spin_unlock(&ctx->completion_lock);
+	return ret;
 }
 
 static int io_async_cancel_prep(struct io_kiocb *req,
@@ -5848,7 +5838,6 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
 	ret = io_try_cancel_userdata(req, sqe_addr);
 	if (ret != -ENOENT)
 		goto done;
-	spin_unlock(&ctx->completion_lock);
 
 	/* slow path, try all io-wq's */
 	io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
@@ -5861,17 +5850,10 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
 			break;
 	}
 	io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
-
-	spin_lock(&ctx->completion_lock);
 done:
-	io_cqring_fill_event(ctx, req->user_data, ret, 0);
-	io_commit_cqring(ctx);
-	spin_unlock(&ctx->completion_lock);
-	io_cqring_ev_posted(ctx);
-
 	if (ret < 0)
 		req_set_fail(req);
-	io_put_req(req);
+	io_req_complete_post(req, ret, 0);
 	return 0;
 }
 
@@ -6411,20 +6393,12 @@ static inline struct file *io_file_get(struct io_ring_ctx *ctx,
 static void io_req_task_link_timeout(struct io_kiocb *req)
 {
 	struct io_kiocb *prev = req->timeout.prev;
-	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
 
 	if (prev) {
 		ret = io_try_cancel_userdata(req, prev->user_data);
-		if (!ret)
-			ret = -ETIME;
-		io_cqring_fill_event(ctx, req->user_data, ret, 0);
-		io_commit_cqring(ctx);
-		spin_unlock(&ctx->completion_lock);
-		io_cqring_ev_posted(ctx);
-
+		io_req_complete_post(req, ret ?: -ETIME, 0);
 		io_put_req(prev);
-		io_put_req(req);
 	} else {
 		io_req_complete_post(req, -ETIME, 0);
 	}
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/4] io_uring: improve tctx_task_work() ctx referencing
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 1/4] io_uring: better encapsulate buffer select for rw Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 2/4] io_uring: reuse io_req_complete_post() Pavel Begunkov
@ 2021-08-17 19:28 ` Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 4/4] io_uring: improve same wq polling Pavel Begunkov
  2021-08-17 22:07 ` [PATCH for-5.15 0/4] not connected 5.15 patches Jens Axboe
  4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

io_uring processed by tctx_task_work() can't get freed until the
function returns. The reason is that io_ring_exit_work() executes a
task_work after all references are put, where the task works
execution is naturally serialised. Remove extra ctx pinning.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 719d62b6e3d5..202517860c83 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2004,9 +2004,14 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx)
 		io_submit_flush_completions(ctx);
 		mutex_unlock(&ctx->uring_lock);
 	}
-	percpu_ref_put(&ctx->refs);
 }
 
+/*
+ * All the ctxs we operate on here will stay alive until the function returns.
+ * That's because initially they're refcounted by the requests, and after
+ * io_ring_exit_work() synchronises with the current task by injecting and
+ * waiting for a task_work, which can't be executed until it returns.
+ */
 static void tctx_task_work(struct callback_head *cb)
 {
 	struct io_ring_ctx *ctx = NULL;
@@ -2033,7 +2038,6 @@ static void tctx_task_work(struct callback_head *cb)
 			if (req->ctx != ctx) {
 				ctx_flush_and_put(ctx);
 				ctx = req->ctx;
-				percpu_ref_get(&ctx->refs);
 			}
 			req->io_task_work.func(req);
 			node = next;
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/4] io_uring: improve same wq polling
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
                   ` (2 preceding siblings ...)
  2021-08-17 19:28 ` [PATCH 3/4] io_uring: improve tctx_task_work() ctx referencing Pavel Begunkov
@ 2021-08-17 19:28 ` Pavel Begunkov
  2021-08-17 22:07 ` [PATCH for-5.15 0/4] not connected 5.15 patches Jens Axboe
  4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Move earlier the check for whether __io_queue_proc() tries to poll
already polled waitqueue, and do the same for the second poll entry, if
any. Shouldn't really matter, but at least it would have a more
predictable behaviour.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 202517860c83..1be7af620395 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5064,8 +5064,13 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 	if (unlikely(pt->nr_entries)) {
 		struct io_poll_iocb *poll_one = poll;
 
+		/* double add on the same waitqueue head, ignore */
+		if (poll_one->head == head)
+			return;
 		/* already have a 2nd entry, fail a third attempt */
 		if (*poll_ptr) {
+			if ((*poll_ptr)->head == head)
+				return;
 			pt->error = -EINVAL;
 			return;
 		}
@@ -5075,9 +5080,6 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 		 */
 		if (!(poll_one->events & EPOLLONESHOT))
 			poll_one->events |= EPOLLONESHOT;
-		/* double add on the same waitqueue head, ignore */
-		if (poll_one->head == head)
-			return;
 		poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
 		if (!poll) {
 			pt->error = -ENOMEM;
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH for-5.15 0/4] not connected 5.15 patches
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
                   ` (3 preceding siblings ...)
  2021-08-17 19:28 ` [PATCH 4/4] io_uring: improve same wq polling Pavel Begunkov
@ 2021-08-17 22:07 ` Jens Axboe
  4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2021-08-17 22:07 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 8/17/21 1:28 PM, Pavel Begunkov wrote:
> A set of not connected patches for 5.15.
> 
> Pavel Begunkov (4):
>   io_uring: better encapsulate buffer select for rw
>   io_uring: reuse io_req_complete_post()
>   io_uring: improve tctx_task_work() ctx referencing
>   io_uring: improve same wq polling
> 
>  fs/io_uring.c | 87 +++++++++++++++++----------------------------------
>  1 file changed, 29 insertions(+), 58 deletions(-)

Applied, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-08-17 22:07 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
2021-08-17 19:28 ` [PATCH 1/4] io_uring: better encapsulate buffer select for rw Pavel Begunkov
2021-08-17 19:28 ` [PATCH 2/4] io_uring: reuse io_req_complete_post() Pavel Begunkov
2021-08-17 19:28 ` [PATCH 3/4] io_uring: improve tctx_task_work() ctx referencing Pavel Begunkov
2021-08-17 19:28 ` [PATCH 4/4] io_uring: improve same wq polling Pavel Begunkov
2021-08-17 22:07 ` [PATCH for-5.15 0/4] not connected 5.15 patches Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.