Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 1/2] io_uring: fix wrong sequence setting logic
@ 2019-09-09 12:50 Jackie Liu
  2019-09-09 12:50 ` [PATCH 2/2] io_uring: add support for link with drain Jackie Liu
  2019-09-09 22:14 ` [PATCH 1/2] io_uring: fix wrong sequence setting logic Jens Axboe
  0 siblings, 2 replies; 3+ messages in thread
From: Jackie Liu @ 2019-09-09 12:50 UTC (permalink / raw)
  To: axboe; +Cc: linux-block, Jackie Liu

Sqo_thread will get sqring in batches, which will cause
ctx->cached_sq_head to be added in batches. if one of these
sqes is set with the DRAIN flag, then he will never get a
chance to process, and finally sqo_thread will not exit.

Fixes: de0617e4671 ("io_uring: add support for marking commands as draining")
Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
---
 fs/io_uring.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index cfb48bd088e1..06d048341fa4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -288,6 +288,7 @@ struct io_ring_ctx {
 struct sqe_submit {
 	const struct io_uring_sqe	*sqe;
 	unsigned short			index;
+	u32				sequence;
 	bool				has_user;
 	bool				needs_lock;
 	bool				needs_fixed_file;
@@ -2040,7 +2041,7 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
 
 	if (flags & IOSQE_IO_DRAIN) {
 		req->flags |= REQ_F_IO_DRAIN;
-		req->sequence = ctx->cached_sq_head - 1;
+		req->sequence = s->sequence;
 	}
 
 	if (!io_op_needs_file(s->sqe))
@@ -2247,6 +2248,7 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
 	if (head < ctx->sq_entries) {
 		s->index = head;
 		s->sqe = &ctx->sq_sqes[head];
+		s->sequence = ctx->cached_sq_head;
 		ctx->cached_sq_head++;
 		return true;
 	}
-- 
2.23.0




^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2/2] io_uring: add support for link with drain
  2019-09-09 12:50 [PATCH 1/2] io_uring: fix wrong sequence setting logic Jackie Liu
@ 2019-09-09 12:50 ` Jackie Liu
  2019-09-09 22:14 ` [PATCH 1/2] io_uring: fix wrong sequence setting logic Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jackie Liu @ 2019-09-09 12:50 UTC (permalink / raw)
  To: axboe; +Cc: linux-block, Jackie Liu

To support the link with drain, we need to do two parts.

There is an sqes:

    0     1     2     3     4     5     6
 +-----+-----+-----+-----+-----+-----+-----+
 |  N  |  L  |  L  | L+D |  N  |  N  |  N  |
 +-----+-----+-----+-----+-----+-----+-----+

First, we need to ensure that the io before the link is completed,
there is a easy way is set drain flag to the link list's head, so
all subsequent io will be inserted into the defer_list.

	+-----+
    (0) |  N  |
	+-----+
           |          (2)         (3)         (4)
	+-----+     +-----+     +-----+     +-----+
    (1) | L+D | --> |  L  | --> | L+D | --> |  N  |
	+-----+     +-----+     +-----+     +-----+
           |
	+-----+
    (5) |  N  |
	+-----+
           |
	+-----+
    (6) |  N  |
	+-----+

Second, ensure that the following IO will not be completed first,
an easy way is to create a mirror of drain io and insert it into
defer_list, in this way, as long as drain io is not processed, the
following io in the defer_list will not be actively process.

	+-----+
    (0) |  N  |
	+-----+
           |          (2)         (3)         (4)
	+-----+     +-----+     +-----+     +-----+
    (1) | L+D | --> |  L  | --> | L+D | --> |  N  |
	+-----+     +-----+     +-----+     +-----+
           |
	+-----+
   ('3) |  D  |   <== This is a shadow of (3)
	+-----+
           |
	+-----+
    (5) |  N  |
	+-----+
           |
	+-----+
    (6) |  N  |
	+-----+

Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
---
 fs/io_uring.c | 114 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 97 insertions(+), 17 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 06d048341fa4..b03630ced9ae 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -336,6 +336,7 @@ struct io_kiocb {
 #define REQ_F_LINK		64	/* linked sqes */
 #define REQ_F_LINK_DONE		128	/* linked sqes done */
 #define REQ_F_FAIL_LINK		256	/* fail rest of links */
+#define REQ_F_SHADOW_DRAIN	512	/* link-drain shadow req */
 	u64			user_data;
 	u32			result;
 	u32			sequence;
@@ -367,6 +368,7 @@ struct io_submit_state {
 };
 
 static void io_sq_wq_submit_work(struct work_struct *work);
+static void __io_free_req(struct io_kiocb *req);
 
 static struct kmem_cache *req_cachep;
 
@@ -472,6 +474,11 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
 	__io_commit_cqring(ctx);
 
 	while ((req = io_get_deferred_req(ctx)) != NULL) {
+		if (req->flags & REQ_F_SHADOW_DRAIN) {
+			/* Just for drain, free it. */
+			__io_free_req(req);
+			continue;
+		}
 		req->flags |= REQ_F_IO_DRAINED;
 		queue_work(ctx->sqo_wq, &req->work);
 	}
@@ -2039,10 +2046,14 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
 	flags = READ_ONCE(s->sqe->flags);
 	fd = READ_ONCE(s->sqe->fd);
 
-	if (flags & IOSQE_IO_DRAIN) {
+	if (flags & IOSQE_IO_DRAIN)
 		req->flags |= REQ_F_IO_DRAIN;
-		req->sequence = s->sequence;
-	}
+	/*
+	 * All io need record the previous position, if LINK vs DARIN,
+	 * it can be used to mark the position of the first IO in the
+	 * link list.
+	 */
+	req->sequence = s->sequence;
 
 	if (!io_op_needs_file(s->sqe))
 		return 0;
@@ -2064,20 +2075,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
 	return 0;
 }
 
-static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 			struct sqe_submit *s)
 {
 	int ret;
 
-	ret = io_req_defer(ctx, req, s->sqe);
-	if (ret) {
-		if (ret != -EIOCBQUEUED) {
-			io_free_req(req);
-			io_cqring_add_event(ctx, s->sqe->user_data, ret);
-		}
-		return 0;
-	}
-
 	ret = __io_submit_sqe(ctx, req, s, true);
 	if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
 		struct io_uring_sqe *sqe_copy;
@@ -2120,6 +2122,64 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	return ret;
 }
 
+static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+			struct sqe_submit *s)
+{
+	int ret;
+
+	ret = io_req_defer(ctx, req, s->sqe);
+	if (ret) {
+		if (ret != -EIOCBQUEUED) {
+			io_free_req(req);
+			io_cqring_add_event(ctx, s->sqe->user_data, ret);
+		}
+		return 0;
+	}
+
+	return __io_queue_sqe(ctx, req, s);
+}
+
+static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
+			      struct sqe_submit *s, struct io_kiocb *shadow)
+{
+	int ret;
+	int need_submit = false;
+
+	if (!shadow)
+		return io_queue_sqe(ctx, req, s);
+
+	/*
+	 * Mark the first IO in link list as DRAIN, let all the following
+	 * IOs enter the defer list. all IO needs to be completed before link
+	 * list.
+	 */
+	req->flags |= REQ_F_IO_DRAIN;
+	ret = io_req_defer(ctx, req, s->sqe);
+	if (ret) {
+		if (ret != -EIOCBQUEUED) {
+			io_free_req(req);
+			io_cqring_add_event(ctx, s->sqe->user_data, ret);
+			return 0;
+		}
+	} else {
+		/*
+		 * If ret == 0 means that all IOs in front of link io are
+		 * running done. let's queue link head.
+		 */
+		need_submit = true;
+	}
+
+	/* Insert shadow req to defer_list, blocking next IOs */
+	spin_lock_irq(&ctx->completion_lock);
+	list_add_tail(&shadow->list, &ctx->defer_list);
+	spin_unlock_irq(&ctx->completion_lock);
+
+	if (need_submit)
+		return __io_queue_sqe(ctx, req, s);
+
+	return 0;
+}
+
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
 
 static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
@@ -2264,6 +2324,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
 {
 	struct io_submit_state state, *statep = NULL;
 	struct io_kiocb *link = NULL;
+	struct io_kiocb *shadow_req = NULL;
 	bool prev_was_link = false;
 	int i, submitted = 0;
 
@@ -2278,11 +2339,20 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
 		 * that's the end of the chain. Submit the previous link.
 		 */
 		if (!prev_was_link && link) {
-			io_queue_sqe(ctx, link, &link->submit);
+			io_queue_link_head(ctx, link, &link->submit, shadow_req);
 			link = NULL;
 		}
 		prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
 
+		if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) {
+			if (!shadow_req) {
+				shadow_req = io_get_req(ctx, NULL);
+				shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
+				refcount_dec(&shadow_req->refs);
+			}
+			shadow_req->sequence = sqes[i].sequence;
+		}
+
 		if (unlikely(mm_fault)) {
 			io_cqring_add_event(ctx, sqes[i].sqe->user_data,
 						-EFAULT);
@@ -2296,7 +2366,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
 	}
 
 	if (link)
-		io_queue_sqe(ctx, link, &link->submit);
+		io_queue_link_head(ctx, link, &link->submit, shadow_req);
 	if (statep)
 		io_submit_state_end(&state);
 
@@ -2432,6 +2502,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 {
 	struct io_submit_state state, *statep = NULL;
 	struct io_kiocb *link = NULL;
+	struct io_kiocb *shadow_req = NULL;
 	bool prev_was_link = false;
 	int i, submit = 0;
 
@@ -2451,11 +2522,20 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 		 * that's the end of the chain. Submit the previous link.
 		 */
 		if (!prev_was_link && link) {
-			io_queue_sqe(ctx, link, &link->submit);
+			io_queue_link_head(ctx, link, &link->submit, shadow_req);
 			link = NULL;
 		}
 		prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
 
+		if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
+			if (!shadow_req) {
+				shadow_req = io_get_req(ctx, NULL);
+				shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
+				refcount_dec(&shadow_req->refs);
+			}
+			shadow_req->sequence = s.sequence;
+		}
+
 		s.has_user = true;
 		s.needs_lock = false;
 		s.needs_fixed_file = false;
@@ -2465,7 +2545,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 	io_commit_sqring(ctx);
 
 	if (link)
-		io_queue_sqe(ctx, link, &link->submit);
+		io_queue_link_head(ctx, link, &link->submit, shadow_req);
 	if (statep)
 		io_submit_state_end(statep);
 
-- 
2.23.0




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/2] io_uring: fix wrong sequence setting logic
  2019-09-09 12:50 [PATCH 1/2] io_uring: fix wrong sequence setting logic Jackie Liu
  2019-09-09 12:50 ` [PATCH 2/2] io_uring: add support for link with drain Jackie Liu
@ 2019-09-09 22:14 ` Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2019-09-09 22:14 UTC (permalink / raw)
  To: Jackie Liu; +Cc: linux-block

On 9/9/19 6:50 AM, Jackie Liu wrote:
> Sqo_thread will get sqring in batches, which will cause
> ctx->cached_sq_head to be added in batches. if one of these
> sqes is set with the DRAIN flag, then he will never get a
> chance to process, and finally sqo_thread will not exit.

This (and 2/2) look good to me, and they test out fine. Thanks!
Applied for 5.4.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, back to index

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-09 12:50 [PATCH 1/2] io_uring: fix wrong sequence setting logic Jackie Liu
2019-09-09 12:50 ` [PATCH 2/2] io_uring: add support for link with drain Jackie Liu
2019-09-09 22:14 ` [PATCH 1/2] io_uring: fix wrong sequence setting logic Jens Axboe

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org linux-block@archiver.kernel.org
	public-inbox-index linux-block


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/ public-inbox