All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: Jens Axboe <axboe@kernel.dk>, io-uring@vger.kernel.org
Cc: Joakim Hassila <joj@mac.com>, stable@vger.kernel.org
Subject: [PATCH 2/2] io_uring: fix shared sqpoll cancellation hangs
Date: Sun, 18 Apr 2021 14:52:09 +0100	[thread overview]
Message-ID: <1bded7e6c6b32e0bae25fce36be2868e46b116a0.1618752958.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1618752958.git.asml.silence@gmail.com>

[  736.982891] INFO: task iou-sqp-4294:4295 blocked for more than 122 seconds.
[  736.982897] Call Trace:
[  736.982901]  schedule+0x68/0xe0
[  736.982903]  io_uring_cancel_sqpoll+0xdb/0x110
[  736.982908]  io_sqpoll_cancel_cb+0x24/0x30
[  736.982911]  io_run_task_work_head+0x28/0x50
[  736.982913]  io_sq_thread+0x4e3/0x720

We call io_uring_cancel_sqpoll() one by one for each ctx either in
sq_thread() itself or via task works, and it's intended to cancel all
requests of a specified context. However the function uses per-task
counters to track the number of inflight requests, so it counts more
requests than available via currect io_uring ctx and goes to sleep for
them to appear (e.g. from IRQ), that will never happen.

Cancel a bit more than before, i.e. all ctxs that share sqpoll
and continue to use shared counters. Don't forget that we should not
remove ctx from the list before running that task_work sqpoll-cancel,
otherwise the function wouldn't be able to find the context and will
hang.

Reported-by: Joakim Hassila <joj@mac.com>
Reported-by: Jens Axboe <axboe@kernel.dk>
Fixes: 37d1e2e3642e2 ("io_uring: move SQPOLL thread io-wq forked worker")
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fb41725204f0..99e55f7f6c34 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1022,7 +1022,7 @@ static void io_uring_del_task_file(unsigned long index);
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct task_struct *task,
 					 struct files_struct *files);
-static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx);
+static void io_uring_cancel_sqpoll(struct io_sq_data *sqd);
 static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
 
 static bool io_cqring_fill_event(struct io_kiocb *req, long res, unsigned cflags);
@@ -6867,15 +6867,14 @@ static int io_sq_thread(void *data)
 		timeout = jiffies + sqd->sq_thread_idle;
 	}
 
-	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
-		io_uring_cancel_sqpoll(ctx);
+	io_uring_cancel_sqpoll(sqd);
 	sqd->thread = NULL;
 	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
 		io_ring_set_wakeup_flag(ctx);
-	mutex_unlock(&sqd->lock);
-
 	io_run_task_work();
 	io_run_task_work_head(&sqd->park_task_work);
+	mutex_unlock(&sqd->lock);
+
 	complete(&sqd->exited);
 	do_exit(0);
 }
@@ -8867,11 +8866,11 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
 static void io_sqpoll_cancel_cb(struct callback_head *cb)
 {
 	struct io_tctx_exit *work = container_of(cb, struct io_tctx_exit, task_work);
-	struct io_ring_ctx *ctx = work->ctx;
-	struct io_sq_data *sqd = ctx->sq_data;
+	struct io_sq_data *sqd = work->ctx->sq_data;
 
 	if (sqd->thread)
-		io_uring_cancel_sqpoll(ctx);
+		io_uring_cancel_sqpoll(sqd);
+	list_del_init(&work->ctx->sqd_list);
 	complete(&work->completion);
 }
 
@@ -8882,7 +8881,6 @@ static void io_sqpoll_cancel_sync(struct io_ring_ctx *ctx)
 	struct task_struct *task;
 
 	io_sq_thread_park(sqd);
-	list_del_init(&ctx->sqd_list);
 	io_sqd_update_thread_idle(sqd);
 	task = sqd->thread;
 	if (task) {
@@ -8890,6 +8888,8 @@ static void io_sqpoll_cancel_sync(struct io_ring_ctx *ctx)
 		init_task_work(&work.task_work, io_sqpoll_cancel_cb);
 		io_task_work_add_head(&sqd->park_task_work, &work.task_work);
 		wake_up_process(task);
+	} else {
+		list_del_init(&ctx->sqd_list);
 	}
 	io_sq_thread_unpark(sqd);
 
@@ -8915,14 +8915,14 @@ static void io_uring_try_cancel(struct files_struct *files)
 }
 
 /* should only be called by SQPOLL task */
-static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
+static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
 {
-	struct io_sq_data *sqd = ctx->sq_data;
 	struct io_uring_task *tctx = current->io_uring;
+	struct io_ring_ctx *ctx;
 	s64 inflight;
 	DEFINE_WAIT(wait);
 
-	WARN_ON_ONCE(!sqd || ctx->sq_data->thread != current);
+	WARN_ON_ONCE(!sqd || sqd->thread != current);
 
 	atomic_inc(&tctx->in_idle);
 	do {
@@ -8930,7 +8930,8 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 		inflight = tctx_inflight(tctx, false);
 		if (!inflight)
 			break;
-		io_uring_try_cancel_requests(ctx, current, NULL);
+		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+			io_uring_try_cancel_requests(ctx, current, NULL);
 
 		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
 		/*
-- 
2.31.1


  parent reply	other threads:[~2021-04-18 13:52 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-18 13:52 [PATCH v2 0/2] fix hangs with shared sqpoll Pavel Begunkov
2021-04-18 13:52 ` [PATCH 1/2] io_uring: remove extra sqpoll submission halting Pavel Begunkov
2021-04-18 13:52 ` Pavel Begunkov [this message]
2021-04-19 17:34 ` [PATCH v2 0/2] fix hangs with shared sqpoll Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2021-04-16  0:22 [PATCH " Pavel Begunkov
2021-04-16  0:22 ` [PATCH 2/2] io_uring: fix shared sqpoll cancellation hangs Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1bded7e6c6b32e0bae25fce36be2868e46b116a0.1618752958.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=joj@mac.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.