IO-Uring Archive on lore.kernel.org
 help / color / Atom feed
* [PATCHSET 0/4][io_uring-post] cleanup after linked timeouts
@ 2019-11-19 20:32 Pavel Begunkov
  2019-11-19 20:32 ` [PATCH 1/4] io_uring: Always REQ_F_FREE_SQE for allocated sqe Pavel Begunkov
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Pavel Begunkov @ 2019-11-19 20:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

The second fixes mild misbehaving for a very rare case.
The last one fixes a nasty bug, which leaks request with a uring
reference, so hunging a process.

Pavel Begunkov (4):
  io_uring: Always REQ_F_FREE_SQE for allocated sqe
  io_uring: break links for failed defer
  io_uring: remove redundant check
  io_uring: Fix leaking linked timeouts

 fs/io_uring.c | 58 ++++++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 31 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/4] io_uring: Always REQ_F_FREE_SQE for allocated sqe
  2019-11-19 20:32 [PATCHSET 0/4][io_uring-post] cleanup after linked timeouts Pavel Begunkov
@ 2019-11-19 20:32 ` Pavel Begunkov
  2019-11-19 22:10   ` Jens Axboe
  2019-11-19 20:32 ` [PATCH 2/4] io_uring: break links for failed defer Pavel Begunkov
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 9+ messages in thread
From: Pavel Begunkov @ 2019-11-19 20:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Always mark requests with allocated sqe and deallocate it in
__io_free_req(). It's easier to follow and doesn't add edge cases.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 49 ++++++++++++++++++++++---------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 09a96516f690..ca3c5e979918 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -829,6 +829,8 @@ static void __io_free_req(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 
+	if (req->flags & REQ_F_FREE_SQE)
+		kfree(req->submit.sqe);
 	if (req->file && !(req->flags & REQ_F_FIXED_FILE))
 		fput(req->file);
 	if (req->flags & REQ_F_INFLIGHT) {
@@ -924,16 +926,11 @@ static void io_fail_links(struct io_kiocb *req)
 	spin_lock_irqsave(&ctx->completion_lock, flags);
 
 	while (!list_empty(&req->link_list)) {
-		const struct io_uring_sqe *sqe_to_free = NULL;
-
 		link = list_first_entry(&req->link_list, struct io_kiocb, list);
 		list_del_init(&link->list);
 
 		trace_io_uring_fail_link(req, link);
 
-		if (link->flags & REQ_F_FREE_SQE)
-			sqe_to_free = link->submit.sqe;
-
 		if ((req->flags & REQ_F_LINK_TIMEOUT) &&
 		    link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
 			io_link_cancel_timeout(link);
@@ -941,7 +938,6 @@ static void io_fail_links(struct io_kiocb *req)
 			io_cqring_fill_event(link, -ECANCELED);
 			__io_double_put_req(link);
 		}
-		kfree(sqe_to_free);
 	}
 
 	io_commit_cqring(ctx);
@@ -1083,7 +1079,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 			 * completions for those, only batch free for fixed
 			 * file and non-linked commands.
 			 */
-			if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
+			if (((req->flags &
+				(REQ_F_FIXED_FILE|REQ_F_LINK|REQ_F_FREE_SQE)) ==
 			    REQ_F_FIXED_FILE) && !io_is_fallback_req(req)) {
 				reqs[to_free++] = req;
 				if (to_free == ARRAY_SIZE(reqs))
@@ -2566,6 +2563,7 @@ static int io_req_defer(struct io_kiocb *req)
 	}
 
 	memcpy(sqe_copy, sqe, sizeof(*sqe_copy));
+	req->flags |= REQ_F_FREE_SQE;
 	req->submit.sqe = sqe_copy;
 
 	trace_io_uring_defer(ctx, req, false);
@@ -2660,7 +2658,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
 	struct io_wq_work *work = *workptr;
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
 	struct sqe_submit *s = &req->submit;
-	const struct io_uring_sqe *sqe = s->sqe;
 	struct io_kiocb *nxt = NULL;
 	int ret = 0;
 
@@ -2696,9 +2693,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
 		io_put_req(req);
 	}
 
-	/* async context always use a copy of the sqe */
-	kfree(sqe);
-
 	/* if a dependent link is ready, pass it back */
 	if (!ret && nxt) {
 		struct io_kiocb *link;
@@ -2896,23 +2890,24 @@ static void __io_queue_sqe(struct io_kiocb *req)
 		struct io_uring_sqe *sqe_copy;
 
 		sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
-		if (sqe_copy) {
-			s->sqe = sqe_copy;
-			if (req->work.flags & IO_WQ_WORK_NEEDS_FILES) {
-				ret = io_grab_files(req);
-				if (ret) {
-					kfree(sqe_copy);
-					goto err;
-				}
-			}
+		if (!sqe_copy)
+			goto err;
 
-			/*
-			 * Queued up for async execution, worker will release
-			 * submit reference when the iocb is actually submitted.
-			 */
-			io_queue_async_work(req);
-			return;
+		s->sqe = sqe_copy;
+		req->flags |= REQ_F_FREE_SQE;
+
+		if (req->work.flags & IO_WQ_WORK_NEEDS_FILES) {
+			ret = io_grab_files(req);
+			if (ret)
+				goto err;
 		}
+
+		/*
+		 * Queued up for async execution, worker will release
+		 * submit reference when the iocb is actually submitted.
+		 */
+		io_queue_async_work(req);
+		return;
 	}
 
 err:
@@ -3003,7 +2998,6 @@ static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
 static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
 			  struct io_kiocb **link)
 {
-	struct io_uring_sqe *sqe_copy;
 	struct sqe_submit *s = &req->submit;
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
@@ -3033,6 +3027,7 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
 	 */
 	if (*link) {
 		struct io_kiocb *prev = *link;
+		struct io_uring_sqe *sqe_copy;
 
 		if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
 			ret = io_timeout_setup(req);
-- 
2.24.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/4] io_uring: break links for failed defer
  2019-11-19 20:32 [PATCHSET 0/4][io_uring-post] cleanup after linked timeouts Pavel Begunkov
  2019-11-19 20:32 ` [PATCH 1/4] io_uring: Always REQ_F_FREE_SQE for allocated sqe Pavel Begunkov
@ 2019-11-19 20:32 ` Pavel Begunkov
  2019-11-19 20:32 ` [PATCH 3/4] io_uring: remove redundant check Pavel Begunkov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Pavel Begunkov @ 2019-11-19 20:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

If io_req_defer() failed, it needs to cancel a dependant link.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ca3c5e979918..637818cc3e5b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2938,6 +2938,8 @@ static void io_queue_sqe(struct io_kiocb *req)
 	if (ret) {
 		if (ret != -EIOCBQUEUED) {
 			io_cqring_add_event(req, ret);
+			if (req->flags & REQ_F_LINK)
+				req->flags |= REQ_F_FAIL_LINK;
 			io_double_put_req(req);
 		}
 	} else
@@ -2970,6 +2972,8 @@ static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
 		if (ret != -EIOCBQUEUED) {
 err:
 			io_cqring_add_event(req, ret);
+			if (req->flags & REQ_F_LINK)
+				req->flags |= REQ_F_FAIL_LINK;
 			io_double_put_req(req);
 			if (shadow)
 				__io_free_req(shadow);
-- 
2.24.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3/4] io_uring: remove redundant check
  2019-11-19 20:32 [PATCHSET 0/4][io_uring-post] cleanup after linked timeouts Pavel Begunkov
  2019-11-19 20:32 ` [PATCH 1/4] io_uring: Always REQ_F_FREE_SQE for allocated sqe Pavel Begunkov
  2019-11-19 20:32 ` [PATCH 2/4] io_uring: break links for failed defer Pavel Begunkov
@ 2019-11-19 20:32 ` Pavel Begunkov
  2019-11-19 20:32 ` [PATCH 4/4] io_uring: Fix leaking linked timeouts Pavel Begunkov
  2019-11-19 22:10 ` [PATCHSET 0/4][io_uring-post] cleanup after " Jens Axboe
  4 siblings, 0 replies; 9+ messages in thread
From: Pavel Begunkov @ 2019-11-19 20:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Pass any IORING_OP_LINK_TIMEOUT request further, where it will
eventually fail in io_issue_sqe().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 637818cc3e5b..aa6c9fb8f640 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3059,10 +3059,6 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
 
 		INIT_LIST_HEAD(&req->link_list);
 		*link = req;
-	} else if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
-		/* Only valid as a linked SQE */
-		ret = -EINVAL;
-		goto err_req;
 	} else {
 		io_queue_sqe(req);
 	}
-- 
2.24.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 4/4] io_uring: Fix leaking linked timeouts
  2019-11-19 20:32 [PATCHSET 0/4][io_uring-post] cleanup after linked timeouts Pavel Begunkov
                   ` (2 preceding siblings ...)
  2019-11-19 20:32 ` [PATCH 3/4] io_uring: remove redundant check Pavel Begunkov
@ 2019-11-19 20:32 ` Pavel Begunkov
  2019-11-19 22:10 ` [PATCHSET 0/4][io_uring-post] cleanup after " Jens Axboe
  4 siblings, 0 replies; 9+ messages in thread
From: Pavel Begunkov @ 2019-11-19 20:32 UTC (permalink / raw)
  To: Jens Axboe, io-uring

let have a dependant link: REQ -> LINK_TIMEOUT -> LINK_TIMEOUT

1. submission stage: submission references for REQ and LINK_TIMEOUT
are dropped. So, references respectively (1,1,2)

2. io_put(REQ) + FAIL_LINKS stage: calls io_fail_links(), which for all
linked timeouts will call cancel_timeout() and drop 1 reference.
So, references after: (0,0,1). That's a leak.

Make it treat only the first linked timeout as such, and pass others
through __io_double_put_req().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index aa6c9fb8f640..4de7d0192666 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -934,6 +934,7 @@ static void io_fail_links(struct io_kiocb *req)
 		if ((req->flags & REQ_F_LINK_TIMEOUT) &&
 		    link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
 			io_link_cancel_timeout(link);
+			req->flags &= ~REQ_F_LINK_TIMEOUT;
 		} else {
 			io_cqring_fill_event(link, -ECANCELED);
 			__io_double_put_req(link);
-- 
2.24.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] io_uring: Always REQ_F_FREE_SQE for allocated sqe
  2019-11-19 20:32 ` [PATCH 1/4] io_uring: Always REQ_F_FREE_SQE for allocated sqe Pavel Begunkov
@ 2019-11-19 22:10   ` Jens Axboe
  2019-11-20  9:03     ` Pavel Begunkov
  0 siblings, 1 reply; 9+ messages in thread
From: Jens Axboe @ 2019-11-19 22:10 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/19/19 1:32 PM, Pavel Begunkov wrote:
> Always mark requests with allocated sqe and deallocate it in
> __io_free_req(). It's easier to follow and doesn't add edge cases.

I did consider doing this, and it is tempting as a cleanup, but it
also moves it into the fastpath instead of keeping it in the slow
link fail path. Any allocated sqe is normally freed when the work
is processed, which is generally also a better path to do it in as
we know we're not under locks/irq disabled.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCHSET 0/4][io_uring-post] cleanup after linked timeouts
  2019-11-19 20:32 [PATCHSET 0/4][io_uring-post] cleanup after linked timeouts Pavel Begunkov
                   ` (3 preceding siblings ...)
  2019-11-19 20:32 ` [PATCH 4/4] io_uring: Fix leaking linked timeouts Pavel Begunkov
@ 2019-11-19 22:10 ` " Jens Axboe
  4 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2019-11-19 22:10 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/19/19 1:32 PM, Pavel Begunkov wrote:
> The second fixes mild misbehaving for a very rare case.
> The last one fixes a nasty bug, which leaks request with a uring
> reference, so hunging a process.
> 
> Pavel Begunkov (4):
>    io_uring: Always REQ_F_FREE_SQE for allocated sqe
>    io_uring: break links for failed defer
>    io_uring: remove redundant check
>    io_uring: Fix leaking linked timeouts
> 
>   fs/io_uring.c | 58 ++++++++++++++++++++++++---------------------------
>   1 file changed, 27 insertions(+), 31 deletions(-)

Thanks, added 2-4 for now.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] io_uring: Always REQ_F_FREE_SQE for allocated sqe
  2019-11-19 22:10   ` Jens Axboe
@ 2019-11-20  9:03     ` Pavel Begunkov
  2019-11-20 14:25       ` Jens Axboe
  0 siblings, 1 reply; 9+ messages in thread
From: Pavel Begunkov @ 2019-11-20  9:03 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/20/2019 1:10 AM, Jens Axboe wrote:
> and it is tempting as a cleanup, but it
> also moves it into the fastpath instead of keeping it in the slow
> link fail path. Any allocated sqe is normally freed when the work
> is processed, which is generally also a better path to do it in as
> we know we're not under locks/irq disabled.

Good point with locks/irq, but io_put_req() is usually called out of
locking. Isn't it? E.g. @completion_lock is taken mainly by *add_cq_event().

SQE needs to be freed in any case, and there is only one extra "if",
what we really should not worry about. There are much heavier parts.
e.g. excessive atomics/locking, I'll send an RFC for that, if needed.

I find the complexity of much more concern here.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] io_uring: Always REQ_F_FREE_SQE for allocated sqe
  2019-11-20  9:03     ` Pavel Begunkov
@ 2019-11-20 14:25       ` Jens Axboe
  0 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2019-11-20 14:25 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/20/19 2:03 AM, Pavel Begunkov wrote:
> On 11/20/2019 1:10 AM, Jens Axboe wrote:
>> and it is tempting as a cleanup, but it
>> also moves it into the fastpath instead of keeping it in the slow
>> link fail path. Any allocated sqe is normally freed when the work
>> is processed, which is generally also a better path to do it in as
>> we know we're not under locks/irq disabled.
> 
> Good point with locks/irq, but io_put_req() is usually called out of
> locking. Isn't it? E.g. @completion_lock is taken mainly by *add_cq_event().

It is, yes.

> SQE needs to be freed in any case, and there is only one extra "if",
> what we really should not worry about. There are much heavier parts.
> e.g. excessive atomics/locking, I'll send an RFC for that, if needed.
> 
> I find the complexity of much more concern here.

Fair enough, it's not a big addition and it does help clean it up.
I'll apply it.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, back to index

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-19 20:32 [PATCHSET 0/4][io_uring-post] cleanup after linked timeouts Pavel Begunkov
2019-11-19 20:32 ` [PATCH 1/4] io_uring: Always REQ_F_FREE_SQE for allocated sqe Pavel Begunkov
2019-11-19 22:10   ` Jens Axboe
2019-11-20  9:03     ` Pavel Begunkov
2019-11-20 14:25       ` Jens Axboe
2019-11-19 20:32 ` [PATCH 2/4] io_uring: break links for failed defer Pavel Begunkov
2019-11-19 20:32 ` [PATCH 3/4] io_uring: remove redundant check Pavel Begunkov
2019-11-19 20:32 ` [PATCH 4/4] io_uring: Fix leaking linked timeouts Pavel Begunkov
2019-11-19 22:10 ` [PATCHSET 0/4][io_uring-post] cleanup after " Jens Axboe

IO-Uring Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/io-uring/0 io-uring/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 io-uring io-uring/ https://lore.kernel.org/io-uring \
		io-uring@vger.kernel.org
	public-inbox-index io-uring

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.io-uring


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git