All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org
Cc: linux-block@vger.kernel.org, Jens Axboe <axboe@kernel.dk>,
	asml.silence@gmail.com, Kanchan Joshi <joshi.k@samsung.com>,
	Ming Lei <ming.lei@redhat.com>
Subject: [PATCH v2 11/14] io_uring: get rid of intermediate aux cqe caches
Date: Mon, 18 Mar 2024 00:41:56 +0000	[thread overview]
Message-ID: <a23c620eaf39d5f303e5b1f3f1fa37693382621e.1710720150.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1710720150.git.asml.silence@gmail.com>

io_post_aux_cqe(), which is used for multishot requests, delays
completions by putting CQEs into a temporary array for the purpose
completion lock/flush batching.

DEFER_TASKRUN doesn't need any locking, so for it we can put completions
directly into the CQ and defer post completion handling with a flag.
That leaves !DEFER_TASKRUN, which is not that interesting / hot for
multishot requests, so have conditional locking with deferred flush
for them.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/0eb3f55722540a11b036d3c90771220eb082d65e.1710514702.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  3 +-
 io_uring/io_uring.c            | 64 ++++++++--------------------------
 2 files changed, 15 insertions(+), 52 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 5a2afbc93887..ea7e5488b3be 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -205,6 +205,7 @@ struct io_submit_state {
 
 	bool			plug_started;
 	bool			need_plug;
+	bool			cq_flush;
 	unsigned short		submit_nr;
 	unsigned int		cqes_count;
 	struct blk_plug		plug;
@@ -342,8 +343,6 @@ struct io_ring_ctx {
 		unsigned		cq_last_tm_flush;
 	} ____cacheline_aligned_in_smp;
 
-	struct io_uring_cqe	completion_cqes[16];
-
 	spinlock_t		completion_lock;
 
 	/* IRQ completion list, under ->completion_lock */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 30542dda1473..8c485bcb5cb7 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -176,7 +176,7 @@ static struct ctl_table kernel_io_uring_disabled_table[] = {
 static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
 {
 	if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
-	    ctx->submit_state.cqes_count)
+	    ctx->submit_state.cq_flush)
 		__io_submit_flush_completions(ctx);
 }
 
@@ -636,6 +636,12 @@ static inline void __io_cq_lock(struct io_ring_ctx *ctx)
 		spin_lock(&ctx->completion_lock);
 }
 
+static inline void __io_cq_unlock(struct io_ring_ctx *ctx)
+{
+	if (!ctx->lockless_cq)
+		spin_unlock(&ctx->completion_lock);
+}
+
 static inline void io_cq_lock(struct io_ring_ctx *ctx)
 	__acquires(ctx->completion_lock)
 {
@@ -888,31 +894,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
 	return false;
 }
 
-static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
-	__must_hold(&ctx->uring_lock)
-{
-	struct io_submit_state *state = &ctx->submit_state;
-	unsigned int i;
-
-	lockdep_assert_held(&ctx->uring_lock);
-	for (i = 0; i < state->cqes_count; i++) {
-		struct io_uring_cqe *cqe = &ctx->completion_cqes[i];
-
-		if (!io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags)) {
-			if (ctx->lockless_cq) {
-				spin_lock(&ctx->completion_lock);
-				io_cqring_event_overflow(ctx, cqe->user_data,
-							cqe->res, cqe->flags, 0, 0);
-				spin_unlock(&ctx->completion_lock);
-			} else {
-				io_cqring_event_overflow(ctx, cqe->user_data,
-							cqe->res, cqe->flags, 0, 0);
-			}
-		}
-	}
-	state->cqes_count = 0;
-}
-
 bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 {
 	bool filled;
@@ -933,31 +914,16 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
 bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	u64 user_data = req->cqe.user_data;
-	struct io_uring_cqe *cqe;
+	bool posted;
 
 	lockdep_assert(!io_wq_current_is_worker());
 	lockdep_assert_held(&ctx->uring_lock);
 
-	if (ctx->submit_state.cqes_count == ARRAY_SIZE(ctx->completion_cqes)) {
-		__io_cq_lock(ctx);
-		__io_flush_post_cqes(ctx);
-		/* no need to flush - flush is deferred */
-		__io_cq_unlock_post(ctx);
-	}
-
-	/* For defered completions this is not as strict as it is otherwise,
-	 * however it's main job is to prevent unbounded posted completions,
-	 * and in that it works just as well.
-	 */
-	if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
-		return false;
-
-	cqe = &ctx->completion_cqes[ctx->submit_state.cqes_count++];
-	cqe->user_data = user_data;
-	cqe->res = res;
-	cqe->flags = cflags;
-	return true;
+	__io_cq_lock(ctx);
+	posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
+	ctx->submit_state.cq_flush = true;
+	__io_cq_unlock_post(ctx);
+	return posted;
 }
 
 static void __io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
@@ -1551,9 +1517,6 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 	struct io_wq_work_node *node;
 
 	__io_cq_lock(ctx);
-	/* must come first to preserve CQE ordering in failure cases */
-	if (state->cqes_count)
-		__io_flush_post_cqes(ctx);
 	__wq_list_for_each(node, &state->compl_reqs) {
 		struct io_kiocb *req = container_of(node, struct io_kiocb,
 					    comp_list);
@@ -1575,6 +1538,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 		io_free_batch_list(ctx, state->compl_reqs.first);
 		INIT_WQ_LIST(&state->compl_reqs);
 	}
+	ctx->submit_state.cq_flush = false;
 }
 
 static unsigned io_cqring_events(struct io_ring_ctx *ctx)
-- 
2.44.0


  parent reply	other threads:[~2024-03-18  0:43 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-18  0:41 [PATCH v2 00/14] remove aux CQE caches Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 01/14] io_uring/cmd: kill one issue_flags to tw conversion Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 02/14] io_uring/cmd: fix tw <-> issue_flags conversion Pavel Begunkov
2024-03-18  2:23   ` Ming Lei
2024-03-18  2:25     ` Jens Axboe
2024-03-18  2:32       ` Pavel Begunkov
2024-03-18  2:40         ` Jens Axboe
2024-03-18  2:43           ` Pavel Begunkov
2024-03-18  2:46             ` Jens Axboe
2024-03-18  2:47           ` Ming Lei
2024-03-18  3:11             ` Jens Axboe
2024-03-18  3:24               ` Pavel Begunkov
2024-03-18  6:59               ` Ming Lei
2024-03-18 11:45                 ` Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 03/14] io_uring/cmd: make io_uring_cmd_done irq safe Pavel Begunkov
2024-03-18  8:10   ` Ming Lei
2024-03-18 11:50     ` Pavel Begunkov
2024-03-18 11:59       ` Ming Lei
2024-03-18 12:46         ` Pavel Begunkov
2024-03-18 13:09           ` Ming Lei
2024-03-18  0:41 ` [PATCH v2 04/14] io_uring/cmd: introduce io_uring_cmd_complete Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 05/14] ublk: don't hard code IO_URING_F_UNLOCKED Pavel Begunkov
2024-03-18  8:16   ` Ming Lei
2024-03-18 12:52     ` Pavel Begunkov
2024-03-18 13:37       ` Pavel Begunkov
2024-03-18 14:32         ` Pavel Begunkov
2024-03-18 14:39           ` Ming Lei
2024-03-18 14:34       ` Ming Lei
2024-03-18 15:08         ` Pavel Begunkov
2024-03-18 15:16           ` Ming Lei
2024-03-18  0:41 ` [PATCH v2 06/14] nvme/io_uring: " Pavel Begunkov
2024-03-18 13:26   ` Kanchan Joshi
2024-03-18 13:38     ` Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 07/14] io_uring/rw: avoid punting to io-wq directly Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 08/14] io_uring: force tw ctx locking Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 09/14] io_uring: remove struct io_tw_state::locked Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 10/14] io_uring: refactor io_fill_cqe_req_aux Pavel Begunkov
2024-03-18  0:41 ` Pavel Begunkov [this message]
2024-03-18  0:41 ` [PATCH v2 12/14] io_uring: remove current check from complete_post Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 13/14] io_uring: refactor io_req_complete_post() Pavel Begunkov
2024-03-18  0:41 ` [PATCH v2 14/14] io_uring: clean up io_lockdep_assert_cq_locked Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a23c620eaf39d5f303e5b1f3f1fa37693382621e.1710720150.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=joshi.k@samsung.com \
    --cc=linux-block@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.