io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] Cache tctx cancelation state in the ctx
@ 2023-02-17 15:55 Jens Axboe
  2023-02-17 15:55 ` [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work Jens Axboe
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:55 UTC (permalink / raw)
  To: io-uring

Hi,

One of the more expensive parts of io_req_local_work_add() is that it
has to pull in the remote task tctx to check for the very unlikely event
that we are in a cancelation state.

Cache the cancelation state in each ctx instead. This makes the marking
of cancelation (and clearing) a bit more expensive, but those are very
slow path operations. The upside is that io_req_local_work_add()
becomes a lot cheaper, which is what we care about.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work
  2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
@ 2023-02-17 15:55 ` Jens Axboe
  2023-02-17 15:55 ` [PATCH 2/4] io_uring: rename 'in_idle' to 'in_cancel' Jens Axboe
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:55 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

We've got a few cases of this, move them to one section and just use
gotos to get there. Reduces the text section on both arm64 and x86-64,
using gcc-12.2.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 3b915deb4d08..cbe06deb84ff 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1285,17 +1285,15 @@ static void io_req_local_work_add(struct io_kiocb *req)
 
 	percpu_ref_get(&ctx->refs);
 
-	if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) {
-		percpu_ref_put(&ctx->refs);
-		return;
-	}
+	if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
+		goto put_ref;
+
 	/* needed for the following wake up */
 	smp_mb__after_atomic();
 
 	if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
 		io_move_task_work_from_local(ctx);
-		percpu_ref_put(&ctx->refs);
-		return;
+		goto put_ref;
 	}
 
 	if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
@@ -1305,6 +1303,8 @@ static void io_req_local_work_add(struct io_kiocb *req)
 
 	if (READ_ONCE(ctx->cq_waiting))
 		wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
+
+put_ref:
 	percpu_ref_put(&ctx->refs);
 }
 
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/4] io_uring: rename 'in_idle' to 'in_cancel'
  2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
  2023-02-17 15:55 ` [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work Jens Axboe
@ 2023-02-17 15:55 ` Jens Axboe
  2023-02-17 15:55 ` [PATCH 3/4] io_uring: cache task cancelation state in the ctx Jens Axboe
  2023-02-17 15:56 ` [PATCH 4/4] io_uring: use local ctx cancelation state for io_req_local_work_add() Jens Axboe
  3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:55 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

This better describes what it does - it's incremented when the task is
currently undergoing a cancelation operation, due to exiting or exec'ing.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  2 +-
 io_uring/io_uring.c            | 18 +++++++++---------
 io_uring/tctx.c                |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 0efe4d784358..00689c12f6ab 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -58,7 +58,7 @@ struct io_uring_task {
 
 	struct xarray			xa;
 	struct wait_queue_head		wait;
-	atomic_t			in_idle;
+	atomic_t			in_cancel;
 	atomic_t			inflight_tracked;
 	struct percpu_counter		inflight;
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cbe06deb84ff..64e07df034d1 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -719,7 +719,7 @@ static void io_put_task_remote(struct task_struct *task, int nr)
 	struct io_uring_task *tctx = task->io_uring;
 
 	percpu_counter_sub(&tctx->inflight, nr);
-	if (unlikely(atomic_read(&tctx->in_idle)))
+	if (unlikely(atomic_read(&tctx->in_cancel)))
 		wake_up(&tctx->wait);
 	put_task_struct_many(task, nr);
 }
@@ -1258,8 +1258,8 @@ void tctx_task_work(struct callback_head *cb)
 
 	ctx_flush_and_put(ctx, &uring_locked);
 
-	/* relaxed read is enough as only the task itself sets ->in_idle */
-	if (unlikely(atomic_read(&tctx->in_idle)))
+	/* relaxed read is enough as only the task itself sets ->in_cancel */
+	if (unlikely(atomic_read(&tctx->in_cancel)))
 		io_uring_drop_tctx_refs(current);
 
 	trace_io_uring_task_work_run(tctx, count, loops);
@@ -1291,7 +1291,7 @@ static void io_req_local_work_add(struct io_kiocb *req)
 	/* needed for the following wake up */
 	smp_mb__after_atomic();
 
-	if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
+	if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
 		io_move_task_work_from_local(ctx);
 		goto put_ref;
 	}
@@ -2937,12 +2937,12 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb)
 
 	work = container_of(cb, struct io_tctx_exit, task_work);
 	/*
-	 * When @in_idle, we're in cancellation and it's racy to remove the
+	 * When @in_cancel, we're in cancellation and it's racy to remove the
 	 * node. It'll be removed by the end of cancellation, just ignore it.
 	 * tctx can be NULL if the queueing of this task_work raced with
 	 * work cancelation off the exec path.
 	 */
-	if (tctx && !atomic_read(&tctx->in_idle))
+	if (tctx && !atomic_read(&tctx->in_cancel))
 		io_uring_del_tctx_node((unsigned long)work->ctx);
 	complete(&work->completion);
 }
@@ -3210,7 +3210,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
 	if (tctx->io_wq)
 		io_wq_exit_start(tctx->io_wq);
 
-	atomic_inc(&tctx->in_idle);
+	atomic_inc(&tctx->in_cancel);
 	do {
 		bool loop = false;
 
@@ -3261,9 +3261,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
 	if (cancel_all) {
 		/*
 		 * We shouldn't run task_works after cancel, so just leave
-		 * ->in_idle set for normal exit.
+		 * ->in_cancel set for normal exit.
 		 */
-		atomic_dec(&tctx->in_idle);
+		atomic_dec(&tctx->in_cancel);
 		/* for exec all current's requests should be gone, kill tctx */
 		__io_uring_free(current);
 	}
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 4324b1cf1f6a..3a8d1dd97e1b 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -83,7 +83,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
 
 	xa_init(&tctx->xa);
 	init_waitqueue_head(&tctx->wait);
-	atomic_set(&tctx->in_idle, 0);
+	atomic_set(&tctx->in_cancel, 0);
 	atomic_set(&tctx->inflight_tracked, 0);
 	task->io_uring = tctx;
 	init_llist_head(&tctx->task_list);
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/4] io_uring: cache task cancelation state in the ctx
  2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
  2023-02-17 15:55 ` [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work Jens Axboe
  2023-02-17 15:55 ` [PATCH 2/4] io_uring: rename 'in_idle' to 'in_cancel' Jens Axboe
@ 2023-02-17 15:55 ` Jens Axboe
  2023-02-20 14:43   ` Gabriel Krisman Bertazi
  2023-02-17 15:56 ` [PATCH 4/4] io_uring: use local ctx cancelation state for io_req_local_work_add() Jens Axboe
  3 siblings, 1 reply; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:55 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

It can be quite expensive for the fast paths to deference
req->task->io_uring->in_cancel for the (very) unlikely scenario that
we're currently undergoing cancelations.

Add a ctx bit to indicate if we're currently canceling or not, so that
the hot path may check this rather than dip into the remote task
state.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  2 ++
 io_uring/io_uring.c            | 44 ++++++++++++++++++++++++++++++++--
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 00689c12f6ab..42d704adb9c6 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -211,6 +211,8 @@ struct io_ring_ctx {
 		enum task_work_notify_mode	notify_method;
 		struct io_rings			*rings;
 		struct task_struct		*submitter_task;
+		/* local ctx cache of task cancel state */
+		unsigned long			in_cancel;
 		struct percpu_ref		refs;
 	} ____cacheline_aligned_in_smp;
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 64e07df034d1..0fcb532db1fc 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3192,6 +3192,46 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
 	return percpu_counter_sum(&tctx->inflight);
 }
 
+static __cold void io_uring_dec_cancel(struct io_uring_task *tctx,
+				       struct io_sq_data *sqd)
+{
+	if (!atomic_dec_return(&tctx->in_cancel))
+		return;
+
+	if (!sqd) {
+		struct io_tctx_node *node;
+		unsigned long index;
+
+		xa_for_each(&tctx->xa, index, node)
+			clear_bit(0, &node->ctx->in_cancel);
+	} else {
+		struct io_ring_ctx *ctx;
+
+		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+			clear_bit(0, &ctx->in_cancel);
+	}
+}
+
+static __cold void io_uring_inc_cancel(struct io_uring_task *tctx,
+				       struct io_sq_data *sqd)
+{
+	if (atomic_inc_return(&tctx->in_cancel) != 1)
+		return;
+
+	if (!sqd) {
+		struct io_tctx_node *node;
+		unsigned long index;
+
+		xa_for_each(&tctx->xa, index, node)
+			set_bit(0, &node->ctx->in_cancel);
+	} else {
+		struct io_ring_ctx *ctx;
+
+		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+			set_bit(0, &ctx->in_cancel);
+	}
+}
+
 /*
  * Find any io_uring ctx that this task has registered or done IO on, and cancel
  * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
@@ -3210,7 +3250,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
 	if (tctx->io_wq)
 		io_wq_exit_start(tctx->io_wq);
 
-	atomic_inc(&tctx->in_cancel);
+	io_uring_inc_cancel(tctx, sqd);
 	do {
 		bool loop = false;
 
@@ -3263,7 +3303,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
 		 * We shouldn't run task_works after cancel, so just leave
 		 * ->in_cancel set for normal exit.
 		 */
-		atomic_dec(&tctx->in_cancel);
+		io_uring_dec_cancel(tctx, sqd);
 		/* for exec all current's requests should be gone, kill tctx */
 		__io_uring_free(current);
 	}
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/4] io_uring: use local ctx cancelation state for io_req_local_work_add()
  2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
                   ` (2 preceding siblings ...)
  2023-02-17 15:55 ` [PATCH 3/4] io_uring: cache task cancelation state in the ctx Jens Axboe
@ 2023-02-17 15:56 ` Jens Axboe
  3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2023-02-17 15:56 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Rather than look into the remote task state and multiple layers of
memory dereferencing, just use the local state instead.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0fcb532db1fc..792ab44393c2 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1291,7 +1291,7 @@ static void io_req_local_work_add(struct io_kiocb *req)
 	/* needed for the following wake up */
 	smp_mb__after_atomic();
 
-	if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
+	if (unlikely(test_bit(0, &ctx->in_cancel))) {
 		io_move_task_work_from_local(ctx);
 		goto put_ref;
 	}
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 3/4] io_uring: cache task cancelation state in the ctx
  2023-02-17 15:55 ` [PATCH 3/4] io_uring: cache task cancelation state in the ctx Jens Axboe
@ 2023-02-20 14:43   ` Gabriel Krisman Bertazi
  0 siblings, 0 replies; 6+ messages in thread
From: Gabriel Krisman Bertazi @ 2023-02-20 14:43 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

Jens Axboe <axboe@kernel.dk> writes:

> It can be quite expensive for the fast paths to deference
> req->task->io_uring->in_cancel for the (very) unlikely scenario that
> we're currently undergoing cancelations.
>
> Add a ctx bit to indicate if we're currently canceling or not, so that
> the hot path may check this rather than dip into the remote task
> state.
>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  include/linux/io_uring_types.h |  2 ++
>  io_uring/io_uring.c            | 44 ++++++++++++++++++++++++++++++++--
>  2 files changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index 00689c12f6ab..42d704adb9c6 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -211,6 +211,8 @@ struct io_ring_ctx {
>  		enum task_work_notify_mode	notify_method;
>  		struct io_rings			*rings;
>  		struct task_struct		*submitter_task;
> +		/* local ctx cache of task cancel state */
> +		unsigned long			in_cancel;

minor nit:

even though the real tctx value is ulong, the cache could just be a bitfield
alongside the many others in this structure. you only care if it is >0
or 0 when peeking the cache.

either way,

Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>




>  		struct percpu_ref		refs;
>  	} ____cacheline_aligned_in_smp;
>  
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 64e07df034d1..0fcb532db1fc 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -3192,6 +3192,46 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
>  	return percpu_counter_sum(&tctx->inflight);
>  }
>  
> +static __cold void io_uring_dec_cancel(struct io_uring_task *tctx,
> +				       struct io_sq_data *sqd)
> +{
> +	if (!atomic_dec_return(&tctx->in_cancel))
> +		return;
> +
> +	if (!sqd) {
> +		struct io_tctx_node *node;
> +		unsigned long index;
> +
> +		xa_for_each(&tctx->xa, index, node)
> +			clear_bit(0, &node->ctx->in_cancel);
> +	} else {
> +		struct io_ring_ctx *ctx;
> +
> +		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
> +			clear_bit(0, &ctx->in_cancel);
> +	}
> +}
> +
> +static __cold void io_uring_inc_cancel(struct io_uring_task *tctx,
> +				       struct io_sq_data *sqd)
> +{
> +	if (atomic_inc_return(&tctx->in_cancel) != 1)
> +		return;
> +
> +	if (!sqd) {
> +		struct io_tctx_node *node;
> +		unsigned long index;
> +
> +		xa_for_each(&tctx->xa, index, node)
> +			set_bit(0, &node->ctx->in_cancel);
> +	} else {
> +		struct io_ring_ctx *ctx;
> +
> +		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
> +			set_bit(0, &ctx->in_cancel);
> +	}
> +}
> +
>  /*
>   * Find any io_uring ctx that this task has registered or done IO on, and cancel
>   * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
> @@ -3210,7 +3250,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
>  	if (tctx->io_wq)
>  		io_wq_exit_start(tctx->io_wq);
>  
> -	atomic_inc(&tctx->in_cancel);
> +	io_uring_inc_cancel(tctx, sqd);
>  	do {
>  		bool loop = false;
>  
> @@ -3263,7 +3303,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
>  		 * We shouldn't run task_works after cancel, so just leave
>  		 * ->in_cancel set for normal exit.
>  		 */
> -		atomic_dec(&tctx->in_cancel);
> +		io_uring_dec_cancel(tctx, sqd);
>  		/* for exec all current's requests should be gone, kill tctx */
>  		__io_uring_free(current);
>  	}

-- 
Gabriel Krisman Bertazi

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-02-20 14:43 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-17 15:55 [PATCH 0/4] Cache tctx cancelation state in the ctx Jens Axboe
2023-02-17 15:55 ` [PATCH 1/4] io_uring: consolidate the put_ref-and-return section of adding work Jens Axboe
2023-02-17 15:55 ` [PATCH 2/4] io_uring: rename 'in_idle' to 'in_cancel' Jens Axboe
2023-02-17 15:55 ` [PATCH 3/4] io_uring: cache task cancelation state in the ctx Jens Axboe
2023-02-20 14:43   ` Gabriel Krisman Bertazi
2023-02-17 15:56 ` [PATCH 4/4] io_uring: use local ctx cancelation state for io_req_local_work_add() Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).