io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: Jens Axboe <axboe@kernel.dk>,
	io-uring@vger.kernel.org, iuyun01@kylinos.cn
Subject: [PATCH] io_uring: drain next sqe instead of shadowing
Date: Thu, 21 Nov 2019 11:54:28 +0300	[thread overview]
Message-ID: <5e8a8176e29a61ec79004521bc2ef28e4d9715b1.1574325863.git.asml.silence@gmail.com> (raw)
In-Reply-To: <2005c339-5ed3-6c2e-f011-5bc89dac3f5c@kernel.dk>

If there is a DRAIN in the middle of a link, it uses shadow req. Defer
the next request/link instead. This:

Pros:
1. removes semi-duplicated code
2. doesn't allocate memory for shadows
3. works better if only the head marked for drain
4. doesn't need complex synchronisation

Cons:
1. removes shadow->seq = last_draind_in_link->seq optimisation
It shouldn't be a common case, and can be added back

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>

---
Hmm... How about to go in another way and just remove this shadow
requests? I think this patch makes things easier.


 fs/io_uring.c | 86 +++++++++++----------------------------------------
 1 file changed, 18 insertions(+), 68 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6175e2e195c0..dd220f415c39 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -186,6 +186,7 @@ struct io_ring_ctx {
 		bool			compat;
 		bool			account_mem;
 		bool			cq_overflow_flushed;
+		bool			drain_next;
 
 		/*
 		 * Ring buffer of indices into array of io_uring_sqe, which is
@@ -346,7 +347,7 @@ struct io_kiocb {
 #define REQ_F_LINK		64	/* linked sqes */
 #define REQ_F_LINK_TIMEOUT	128	/* has linked timeout */
 #define REQ_F_FAIL_LINK		256	/* fail rest of links */
-#define REQ_F_SHADOW_DRAIN	512	/* link-drain shadow req */
+#define REQ_F_DRAIN_LINK	512	/* link should be fully drained */
 #define REQ_F_TIMEOUT		1024	/* timeout request */
 #define REQ_F_ISREG		2048	/* regular file */
 #define REQ_F_MUST_PUNT		4096	/* must be punted even for NONBLOCK */
@@ -615,11 +616,6 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
 	__io_commit_cqring(ctx);
 
 	while ((req = io_get_deferred_req(ctx)) != NULL) {
-		if (req->flags & REQ_F_SHADOW_DRAIN) {
-			/* Just for drain, free it. */
-			__io_free_req(req);
-			continue;
-		}
 		req->flags |= REQ_F_IO_DRAINED;
 		io_queue_async_work(req);
 	}
@@ -2956,6 +2952,12 @@ static void io_queue_sqe(struct io_kiocb *req)
 {
 	int ret;
 
+	if (unlikely(req->ctx->drain_next)) {
+		req->flags |= REQ_F_IO_DRAIN;
+		req->ctx->drain_next = false;
+	}
+	req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK);
+
 	ret = io_req_defer(req);
 	if (ret) {
 		if (ret != -EIOCBQUEUED) {
@@ -2968,57 +2970,16 @@ static void io_queue_sqe(struct io_kiocb *req)
 		__io_queue_sqe(req);
 }
 
-static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
+static inline void io_queue_link_head(struct io_kiocb *req)
 {
-	int ret;
-	int need_submit = false;
-	struct io_ring_ctx *ctx = req->ctx;
-
 	if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
-		ret = -ECANCELED;
-		goto err;
-	}
-	if (!shadow) {
+		io_cqring_add_event(req, -ECANCELED);
+		io_double_put_req(req);
+	} else
 		io_queue_sqe(req);
-		return;
-	}
-
-	/*
-	 * Mark the first IO in link list as DRAIN, let all the following
-	 * IOs enter the defer list. all IO needs to be completed before link
-	 * list.
-	 */
-	req->flags |= REQ_F_IO_DRAIN;
-	ret = io_req_defer(req);
-	if (ret) {
-		if (ret != -EIOCBQUEUED) {
-err:
-			io_cqring_add_event(req, ret);
-			if (req->flags & REQ_F_LINK)
-				req->flags |= REQ_F_FAIL_LINK;
-			io_double_put_req(req);
-			if (shadow)
-				__io_free_req(shadow);
-			return;
-		}
-	} else {
-		/*
-		 * If ret == 0 means that all IOs in front of link io are
-		 * running done. let's queue link head.
-		 */
-		need_submit = true;
-	}
-
-	/* Insert shadow req to defer_list, blocking next IOs */
-	spin_lock_irq(&ctx->completion_lock);
-	trace_io_uring_defer(ctx, shadow, true);
-	list_add_tail(&shadow->list, &ctx->defer_list);
-	spin_unlock_irq(&ctx->completion_lock);
-
-	if (need_submit)
-		__io_queue_sqe(req);
 }
 
+
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
 
 static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
@@ -3055,6 +3016,9 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
 		struct io_kiocb *prev = *link;
 		struct io_uring_sqe *sqe_copy;
 
+		if (s->sqe->flags & IOSQE_IO_DRAIN)
+			(*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN;
+
 		if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
 			ret = io_timeout_setup(req);
 			/* common setup allows offset being set, we don't */
@@ -3173,7 +3137,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 {
 	struct io_submit_state state, *statep = NULL;
 	struct io_kiocb *link = NULL;
-	struct io_kiocb *shadow_req = NULL;
 	int i, submitted = 0;
 	bool mm_fault = false;
 
@@ -3212,18 +3175,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 
 		sqe_flags = req->submit.sqe->flags;
 
-		if (link && (sqe_flags & IOSQE_IO_DRAIN)) {
-			if (!shadow_req) {
-				shadow_req = io_get_req(ctx, NULL);
-				if (unlikely(!shadow_req))
-					goto out;
-				shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
-				refcount_dec(&shadow_req->refs);
-			}
-			shadow_req->sequence = req->submit.sequence;
-		}
-
-out:
 		req->submit.ring_file = ring_file;
 		req->submit.ring_fd = ring_fd;
 		req->submit.has_user = *mm != NULL;
@@ -3239,14 +3190,13 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		 * that's the end of the chain. Submit the previous link.
 		 */
 		if (!(sqe_flags & IOSQE_IO_LINK) && link) {
-			io_queue_link_head(link, shadow_req);
+			io_queue_link_head(link);
 			link = NULL;
-			shadow_req = NULL;
 		}
 	}
 
 	if (link)
-		io_queue_link_head(link, shadow_req);
+		io_queue_link_head(link);
 	if (statep)
 		io_submit_state_end(&state);
 
-- 
2.24.0


  parent reply	other threads:[~2019-11-21  8:55 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-20 23:07 [PATCH] io_uring: fix race with shadow drain deferrals Jens Axboe
2019-11-20 23:58 ` Jens Axboe
2019-11-21  1:32   ` Jackie Liu
2019-11-21  1:35     ` Jackie Liu
2019-11-21  1:40       ` Jens Axboe
2019-11-21  1:49         ` Jens Axboe
2019-11-21  1:57           ` Jackie Liu
2019-11-20 23:14             ` Jens Axboe
     [not found]               ` <57EF3B0C-A6D3-45D5-A689-B8090F750C1E@kylinos.cn>
2019-11-20 23:03                 ` Jens Axboe
2019-11-21  8:54           ` Pavel Begunkov [this message]
     [not found]             ` <A12FD0FF-3C4F-46BE-8ABB-AA732002A9CA@kylinos.cn>
2019-11-21  9:43               ` [PATCH] io_uring: drain next sqe instead of shadowing Pavel Begunkov
     [not found]                 ` <5dd68282.1c69fb81.110a.43a7SMTPIN_ADDED_BROKEN@mx.google.com>
2019-11-21 12:40                   ` Pavel Begunkov
     [not found]                     ` <5dd68820.1c69fb81.64e0b.4340SMTPIN_ADDED_BROKEN@mx.google.com>
2019-11-21 13:47                       ` Jens Axboe
     [not found]                         ` <5dd69c7f.1c69fb81.8868.e3c2SMTPIN_ADDED_BROKEN@mx.google.com>
2019-11-21 13:54                           ` Jens Axboe
     [not found]                         ` <5dd69c43.1c69fb81.6589a.b4f1SMTPIN_ADDED_BROKEN@mx.google.com>
2019-11-21 14:28                           ` Pavel Begunkov
2019-11-21 13:53                             ` Jens Axboe
2019-11-21 15:23                               ` Pavel Begunkov
2019-11-21 13:50                                 ` Jens Axboe
2019-11-21  1:39     ` [PATCH] io_uring: fix race with shadow drain deferrals Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5e8a8176e29a61ec79004521bc2ef28e4d9715b1.1574325863.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=iuyun01@kylinos.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).