All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] sqpoll fixes
@ 2021-03-11 23:29 Pavel Begunkov
  2021-03-11 23:29 ` [PATCH 1/4] io_uring: cancel deferred requests in try_cancel Pavel Begunkov
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Pavel Begunkov @ 2021-03-11 23:29 UTC (permalink / raw)
  To: Jens Axboe, io-uring

1-3 are relatively easy. 2/3 is rather a cleanup, but it's better to
throw it out of the equation, makes life easier.

4/4 removes the park/unpark dance for cancellations, and fixes a couple
of issues (will send one test later).
Signals in io_sq_thread() add troubles adding more cases to think about
and care about races b/w dying SQPOLL task, dying ctxs and going away
userspace tasks.

Pavel Begunkov (4):
  io_uring: cancel deferred requests in try_cancel
  io_uring: remove useless ->startup completion
  io_uring: prevent racy sqd->thread checks
  io_uring: cancel sqpoll via task_work

 fs/io_uring.c | 195 +++++++++++++++++++++++++-------------------------
 1 file changed, 99 insertions(+), 96 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/4] io_uring: cancel deferred requests in try_cancel
  2021-03-11 23:29 [PATCH 0/4] sqpoll fixes Pavel Begunkov
@ 2021-03-11 23:29 ` Pavel Begunkov
  2021-03-11 23:29 ` [PATCH 2/4] io_uring: remove useless ->startup completion Pavel Begunkov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2021-03-11 23:29 UTC (permalink / raw)
  To: Jens Axboe, io-uring

As io_uring_cancel_files() and others let SQO to run between
io_uring_try_cancel_requests(), SQO may generate new deferred requests,
so it's safer to try to cancel them in it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 49f85f49e1c3..56f3d8f408c9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8577,11 +8577,11 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
 	return ret;
 }
 
-static void io_cancel_defer_files(struct io_ring_ctx *ctx,
+static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
 				  struct task_struct *task,
 				  struct files_struct *files)
 {
-	struct io_defer_entry *de = NULL;
+	struct io_defer_entry *de;
 	LIST_HEAD(list);
 
 	spin_lock_irq(&ctx->completion_lock);
@@ -8592,6 +8592,8 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
 		}
 	}
 	spin_unlock_irq(&ctx->completion_lock);
+	if (list_empty(&list))
+		return false;
 
 	while (!list_empty(&list)) {
 		de = list_first_entry(&list, struct io_defer_entry, list);
@@ -8601,6 +8603,7 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
 		io_req_complete(de->req, -ECANCELED);
 		kfree(de);
 	}
+	return true;
 }
 
 static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
@@ -8666,6 +8669,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 			}
 		}
 
+		ret |= io_cancel_defer_files(ctx, task, files);
 		ret |= io_poll_remove_all(ctx, task, files);
 		ret |= io_kill_timeouts(ctx, task, files);
 		ret |= io_run_task_work();
@@ -8734,8 +8738,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
 			atomic_inc(&task->io_uring->in_idle);
 	}
 
-	io_cancel_defer_files(ctx, task, files);
-
 	io_uring_cancel_files(ctx, task, files);
 	if (!files)
 		io_uring_try_cancel_requests(ctx, task, NULL);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] io_uring: remove useless ->startup completion
  2021-03-11 23:29 [PATCH 0/4] sqpoll fixes Pavel Begunkov
  2021-03-11 23:29 ` [PATCH 1/4] io_uring: cancel deferred requests in try_cancel Pavel Begunkov
@ 2021-03-11 23:29 ` Pavel Begunkov
  2021-03-11 23:29 ` [PATCH 3/4] io_uring: prevent racy sqd->thread checks Pavel Begunkov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2021-03-11 23:29 UTC (permalink / raw)
  To: Jens Axboe, io-uring

We always do complete(&sqd->startup) almost right after sqd->thread
creation, either in the success path or in io_sq_thread_finish(). It's
specifically created not started for us to be able to set some stuff
like sqd->thread and io_uring_alloc_task_context() before following
right after wake_up_new_task().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 56f3d8f408c9..6349374d715d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -272,7 +272,6 @@ struct io_sq_data {
 	pid_t			task_tgid;
 
 	unsigned long		state;
-	struct completion	startup;
 	struct completion	exited;
 };
 
@@ -6656,8 +6655,6 @@ static int io_sq_thread(void *data)
 		set_cpus_allowed_ptr(current, cpu_online_mask);
 	current->flags |= PF_NO_SETAFFINITY;
 
-	wait_for_completion(&sqd->startup);
-
 	down_read(&sqd->rw_lock);
 
 	while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) {
@@ -7080,7 +7077,6 @@ static void io_sq_thread_finish(struct io_ring_ctx *ctx)
 	struct io_sq_data *sqd = ctx->sq_data;
 
 	if (sqd) {
-		complete(&sqd->startup);
 		io_sq_thread_park(sqd);
 		list_del(&ctx->sqd_list);
 		io_sqd_update_thread_idle(sqd);
@@ -7144,7 +7140,6 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
 	INIT_LIST_HEAD(&sqd->ctx_list);
 	init_rwsem(&sqd->rw_lock);
 	init_waitqueue_head(&sqd->wait);
-	init_completion(&sqd->startup);
 	init_completion(&sqd->exited);
 	return sqd;
 }
@@ -7856,7 +7851,6 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 		wake_up_new_task(tsk);
 		if (ret)
 			goto err;
-		complete(&sqd->startup);
 	} else if (p->flags & IORING_SETUP_SQ_AFF) {
 		/* Can't have SQ_AFF without SQPOLL */
 		ret = -EINVAL;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] io_uring: prevent racy sqd->thread checks
  2021-03-11 23:29 [PATCH 0/4] sqpoll fixes Pavel Begunkov
  2021-03-11 23:29 ` [PATCH 1/4] io_uring: cancel deferred requests in try_cancel Pavel Begunkov
  2021-03-11 23:29 ` [PATCH 2/4] io_uring: remove useless ->startup completion Pavel Begunkov
@ 2021-03-11 23:29 ` Pavel Begunkov
  2021-03-11 23:29 ` [PATCH 4/4] io_uring: cancel sqpoll via task_work Pavel Begunkov
  2021-03-12 18:19 ` [PATCH 0/4] sqpoll fixes Jens Axboe
  4 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2021-03-11 23:29 UTC (permalink / raw)
  To: Jens Axboe, io-uring

SQPOLL thread to which we're trying to attach may be going away, it's
not nice but a more serious problem is if io_sq_offload_create() sees
sqd->thread==NULL, and tries to init it with a new thread. There are
tons of ways it can be exploited or fail.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6349374d715d..cdec59510433 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7119,14 +7119,18 @@ static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
 	return sqd;
 }
 
-static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
+static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
+					 bool *attached)
 {
 	struct io_sq_data *sqd;
 
+	*attached = false;
 	if (p->flags & IORING_SETUP_ATTACH_WQ) {
 		sqd = io_attach_sq_data(p);
-		if (!IS_ERR(sqd))
+		if (!IS_ERR(sqd)) {
+			*attached = true;
 			return sqd;
+		}
 		/* fall through for EPERM case, setup new sqd/task */
 		if (PTR_ERR(sqd) != -EPERM)
 			return sqd;
@@ -7799,12 +7803,13 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		struct task_struct *tsk;
 		struct io_sq_data *sqd;
+		bool attached;
 
 		ret = -EPERM;
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
 			goto err;
 
-		sqd = io_get_sq_data(p);
+		sqd = io_get_sq_data(p, &attached);
 		if (IS_ERR(sqd)) {
 			ret = PTR_ERR(sqd);
 			goto err;
@@ -7816,13 +7821,24 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 		if (!ctx->sq_thread_idle)
 			ctx->sq_thread_idle = HZ;
 
+		ret = 0;
 		io_sq_thread_park(sqd);
-		list_add(&ctx->sqd_list, &sqd->ctx_list);
-		io_sqd_update_thread_idle(sqd);
+		/* don't attach to a dying SQPOLL thread, would be racy */
+		if (attached && !sqd->thread) {
+			ret = -ENXIO;
+		} else {
+			list_add(&ctx->sqd_list, &sqd->ctx_list);
+			io_sqd_update_thread_idle(sqd);
+		}
 		io_sq_thread_unpark(sqd);
 
-		if (sqd->thread)
+		if (ret < 0) {
+			io_put_sq_data(sqd);
+			ctx->sq_data = NULL;
+			return ret;
+		} else if (attached) {
 			return 0;
+		}
 
 		if (p->flags & IORING_SETUP_SQ_AFF) {
 			int cpu = p->sq_thread_cpu;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] io_uring: cancel sqpoll via task_work
  2021-03-11 23:29 [PATCH 0/4] sqpoll fixes Pavel Begunkov
                   ` (2 preceding siblings ...)
  2021-03-11 23:29 ` [PATCH 3/4] io_uring: prevent racy sqd->thread checks Pavel Begunkov
@ 2021-03-11 23:29 ` Pavel Begunkov
  2021-03-12 19:35   ` Pavel Begunkov
  2021-03-12 18:19 ` [PATCH 0/4] sqpoll fixes Jens Axboe
  4 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2021-03-11 23:29 UTC (permalink / raw)
  To: Jens Axboe, io-uring

1) The first problem is io_uring_cancel_sqpoll() ->
io_uring_cancel_task_requests() basically doing park(); park(); and so
hanging.

2) Another one is more subtle, when the master task is doing cancellations,
but SQPOLL task submits in-between the end of the cancellation but
before finish() requests taking a ref to the ctx, and so eternally
locking it up.

3) Yet another is a dying SQPOLL task doing io_uring_cancel_sqpoll() and
same io_uring_cancel_sqpoll() from the owner task, they race for
tctx->wait events. And there probably more of them.

Instead do SQPOLL cancellations from within SQPOLL task context via
task_work, see io_sqpoll_cancel_sync(). With that we don't need temporal
park()/unpark() during cancellation, which is ugly, subtle and anyway
doesn't allow to do io_run_task_work() properly.

io_uring_cancel_sqpoll() is called only from SQPOLL task context and
under sqd locking, so all parking is removed from there. And so,
io_sq_thread_[un]park() and io_sq_thread_stop() are not used now by
SQPOLL task, and that spare us from some headache.

Also remove ctx->sqd_list early to avoid 2). And kill tctx->sqpoll,
which is not used anymore.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 151 ++++++++++++++++++++++++--------------------------
 1 file changed, 71 insertions(+), 80 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index cdec59510433..70286b393c0e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6665,6 +6665,7 @@ static int io_sq_thread(void *data)
 			up_read(&sqd->rw_lock);
 			cond_resched();
 			down_read(&sqd->rw_lock);
+			io_run_task_work();
 			timeout = jiffies + sqd->sq_thread_idle;
 			continue;
 		}
@@ -6720,18 +6721,22 @@ static int io_sq_thread(void *data)
 		finish_wait(&sqd->wait, &wait);
 		timeout = jiffies + sqd->sq_thread_idle;
 	}
-
-	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
-		io_uring_cancel_sqpoll(ctx);
 	up_read(&sqd->rw_lock);
-
+	down_write(&sqd->rw_lock);
+	/*
+	 * someone may have parked and added a cancellation task_work, run
+	 * it first because we don't want it in io_uring_cancel_sqpoll()
+	 */
 	io_run_task_work();
 
-	down_write(&sqd->rw_lock);
+	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+		io_uring_cancel_sqpoll(ctx);
 	sqd->thread = NULL;
 	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
 		io_ring_set_wakeup_flag(ctx);
 	up_write(&sqd->rw_lock);
+
+	io_run_task_work();
 	complete(&sqd->exited);
 	do_exit(0);
 }
@@ -7033,8 +7038,8 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 static void io_sq_thread_unpark(struct io_sq_data *sqd)
 	__releases(&sqd->rw_lock)
 {
-	if (sqd->thread == current)
-		return;
+	WARN_ON_ONCE(sqd->thread == current);
+
 	clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
 	up_write(&sqd->rw_lock);
 }
@@ -7042,8 +7047,8 @@ static void io_sq_thread_unpark(struct io_sq_data *sqd)
 static void io_sq_thread_park(struct io_sq_data *sqd)
 	__acquires(&sqd->rw_lock)
 {
-	if (sqd->thread == current)
-		return;
+	WARN_ON_ONCE(sqd->thread == current);
+
 	set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
 	down_write(&sqd->rw_lock);
 	/* set again for consistency, in case concurrent parks are happening */
@@ -7054,8 +7059,8 @@ static void io_sq_thread_park(struct io_sq_data *sqd)
 
 static void io_sq_thread_stop(struct io_sq_data *sqd)
 {
-	if (test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state))
-		return;
+	WARN_ON_ONCE(sqd->thread == current);
+
 	down_write(&sqd->rw_lock);
 	set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
 	if (sqd->thread)
@@ -7078,7 +7083,7 @@ static void io_sq_thread_finish(struct io_ring_ctx *ctx)
 
 	if (sqd) {
 		io_sq_thread_park(sqd);
-		list_del(&ctx->sqd_list);
+		list_del_init(&ctx->sqd_list);
 		io_sqd_update_thread_idle(sqd);
 		io_sq_thread_unpark(sqd);
 
@@ -7760,7 +7765,6 @@ static int io_uring_alloc_task_context(struct task_struct *task,
 	init_waitqueue_head(&tctx->wait);
 	tctx->last = NULL;
 	atomic_set(&tctx->in_idle, 0);
-	tctx->sqpoll = false;
 	task->io_uring = tctx;
 	spin_lock_init(&tctx->task_lock);
 	INIT_WQ_LIST(&tctx->task_list);
@@ -8719,43 +8723,12 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 
 		io_uring_try_cancel_requests(ctx, task, files);
 
-		if (ctx->sq_data)
-			io_sq_thread_unpark(ctx->sq_data);
 		prepare_to_wait(&task->io_uring->wait, &wait,
 				TASK_UNINTERRUPTIBLE);
 		if (inflight == io_uring_count_inflight(ctx, task, files))
 			schedule();
 		finish_wait(&task->io_uring->wait, &wait);
-		if (ctx->sq_data)
-			io_sq_thread_park(ctx->sq_data);
-	}
-}
-
-/*
- * We need to iteratively cancel requests, in case a request has dependent
- * hard links. These persist even for failure of cancelations, hence keep
- * looping until none are found.
- */
-static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
-					  struct files_struct *files)
-{
-	struct task_struct *task = current;
-
-	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
-		io_sq_thread_park(ctx->sq_data);
-		task = ctx->sq_data->thread;
-		if (task)
-			atomic_inc(&task->io_uring->in_idle);
 	}
-
-	io_uring_cancel_files(ctx, task, files);
-	if (!files)
-		io_uring_try_cancel_requests(ctx, task, NULL);
-
-	if (task)
-		atomic_dec(&task->io_uring->in_idle);
-	if (ctx->sq_data)
-		io_sq_thread_unpark(ctx->sq_data);
 }
 
 /*
@@ -8796,15 +8769,6 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx)
 		}
 		tctx->last = ctx;
 	}
-
-	/*
-	 * This is race safe in that the task itself is doing this, hence it
-	 * cannot be going through the exit/cancel paths at the same time.
-	 * This cannot be modified while exit/cancel is running.
-	 */
-	if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL))
-		tctx->sqpoll = true;
-
 	return 0;
 }
 
@@ -8847,6 +8811,44 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
 	}
 }
 
+static s64 tctx_inflight(struct io_uring_task *tctx)
+{
+	return percpu_counter_sum(&tctx->inflight);
+}
+
+static void io_sqpoll_cancel_cb(struct callback_head *cb)
+{
+	struct io_tctx_exit *work = container_of(cb, struct io_tctx_exit, task_work);
+	struct io_ring_ctx *ctx = work->ctx;
+	struct io_sq_data *sqd = ctx->sq_data;
+
+	if (sqd->thread)
+		io_uring_cancel_sqpoll(ctx);
+	complete(&work->completion);
+}
+
+static void io_sqpoll_cancel_sync(struct io_ring_ctx *ctx)
+{
+	struct io_sq_data *sqd = ctx->sq_data;
+	struct io_tctx_exit work = { .ctx = ctx, };
+	struct task_struct *task;
+
+	io_sq_thread_park(sqd);
+	list_del_init(&ctx->sqd_list);
+	io_sqd_update_thread_idle(sqd);
+	task = sqd->thread;
+	if (task) {
+		init_completion(&work.completion);
+		init_task_work(&work.task_work, io_sqpoll_cancel_cb);
+		WARN_ON_ONCE(task_work_add(task, &work.task_work, TWA_SIGNAL));
+		wake_up_process(task);
+	}
+	io_sq_thread_unpark(sqd);
+
+	if (task)
+		wait_for_completion(&work.completion);
+}
+
 void __io_uring_files_cancel(struct files_struct *files)
 {
 	struct io_uring_task *tctx = current->io_uring;
@@ -8855,41 +8857,40 @@ void __io_uring_files_cancel(struct files_struct *files)
 
 	/* make sure overflow events are dropped */
 	atomic_inc(&tctx->in_idle);
-	xa_for_each(&tctx->xa, index, node)
-		io_uring_cancel_task_requests(node->ctx, files);
+	xa_for_each(&tctx->xa, index, node) {
+		struct io_ring_ctx *ctx = node->ctx;
+
+		if (ctx->sq_data) {
+			io_sqpoll_cancel_sync(ctx);
+			continue;
+		}
+		io_uring_cancel_files(ctx, current, files);
+		if (!files)
+			io_uring_try_cancel_requests(ctx, current, NULL);
+	}
 	atomic_dec(&tctx->in_idle);
 
 	if (files)
 		io_uring_clean_tctx(tctx);
 }
 
-static s64 tctx_inflight(struct io_uring_task *tctx)
-{
-	return percpu_counter_sum(&tctx->inflight);
-}
-
+/* should only be called by SQPOLL task */
 static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 {
 	struct io_sq_data *sqd = ctx->sq_data;
-	struct io_uring_task *tctx;
+	struct io_uring_task *tctx = current->io_uring;
 	s64 inflight;
 	DEFINE_WAIT(wait);
 
-	if (!sqd)
-		return;
-	io_sq_thread_park(sqd);
-	if (!sqd->thread || !sqd->thread->io_uring) {
-		io_sq_thread_unpark(sqd);
-		return;
-	}
-	tctx = ctx->sq_data->thread->io_uring;
+	WARN_ON_ONCE(!sqd || ctx->sq_data->thread != current);
+
 	atomic_inc(&tctx->in_idle);
 	do {
 		/* read completions before cancelations */
 		inflight = tctx_inflight(tctx);
 		if (!inflight)
 			break;
-		io_uring_cancel_task_requests(ctx, NULL);
+		io_uring_try_cancel_requests(ctx, current, NULL);
 
 		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
 		/*
@@ -8902,7 +8903,6 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 		finish_wait(&tctx->wait, &wait);
 	} while (1);
 	atomic_dec(&tctx->in_idle);
-	io_sq_thread_unpark(sqd);
 }
 
 /*
@@ -8917,15 +8917,6 @@ void __io_uring_task_cancel(void)
 
 	/* make sure overflow events are dropped */
 	atomic_inc(&tctx->in_idle);
-
-	if (tctx->sqpoll) {
-		struct io_tctx_node *node;
-		unsigned long index;
-
-		xa_for_each(&tctx->xa, index, node)
-			io_uring_cancel_sqpoll(node->ctx);
-	}
-
 	do {
 		/* read completions before cancelations */
 		inflight = tctx_inflight(tctx);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/4] sqpoll fixes
  2021-03-11 23:29 [PATCH 0/4] sqpoll fixes Pavel Begunkov
                   ` (3 preceding siblings ...)
  2021-03-11 23:29 ` [PATCH 4/4] io_uring: cancel sqpoll via task_work Pavel Begunkov
@ 2021-03-12 18:19 ` Jens Axboe
  4 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2021-03-12 18:19 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 3/11/21 4:29 PM, Pavel Begunkov wrote:
> 1-3 are relatively easy. 2/3 is rather a cleanup, but it's better to
> throw it out of the equation, makes life easier.
> 
> 4/4 removes the park/unpark dance for cancellations, and fixes a couple
> of issues (will send one test later).
> Signals in io_sq_thread() add troubles adding more cases to think about
> and care about races b/w dying SQPOLL task, dying ctxs and going away
> userspace tasks.
> 
> Pavel Begunkov (4):
>   io_uring: cancel deferred requests in try_cancel
>   io_uring: remove useless ->startup completion
>   io_uring: prevent racy sqd->thread checks
>   io_uring: cancel sqpoll via task_work
> 
>  fs/io_uring.c | 195 +++++++++++++++++++++++++-------------------------
>  1 file changed, 99 insertions(+), 96 deletions(-)

Added for 5.12, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] io_uring: cancel sqpoll via task_work
  2021-03-11 23:29 ` [PATCH 4/4] io_uring: cancel sqpoll via task_work Pavel Begunkov
@ 2021-03-12 19:35   ` Pavel Begunkov
  2021-03-12 19:40     ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2021-03-12 19:35 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/03/2021 23:29, Pavel Begunkov wrote:
> 1) The first problem is io_uring_cancel_sqpoll() ->
> io_uring_cancel_task_requests() basically doing park(); park(); and so
> hanging.
> 
> 2) Another one is more subtle, when the master task is doing cancellations,
> but SQPOLL task submits in-between the end of the cancellation but
> before finish() requests taking a ref to the ctx, and so eternally
> locking it up.
> 
> 3) Yet another is a dying SQPOLL task doing io_uring_cancel_sqpoll() and
> same io_uring_cancel_sqpoll() from the owner task, they race for
> tctx->wait events. And there probably more of them.
> 
> Instead do SQPOLL cancellations from within SQPOLL task context via
> task_work, see io_sqpoll_cancel_sync(). With that we don't need temporal
> park()/unpark() during cancellation, which is ugly, subtle and anyway
> doesn't allow to do io_run_task_work() properly.> 
> io_uring_cancel_sqpoll() is called only from SQPOLL task context and
> under sqd locking, so all parking is removed from there. And so,
> io_sq_thread_[un]park() and io_sq_thread_stop() are not used now by
> SQPOLL task, and that spare us from some headache.
> 
> Also remove ctx->sqd_list early to avoid 2). And kill tctx->sqpoll,
> which is not used anymore.


Looks, the chunk below somehow slipped from the patch. Not important
for 5.12, but can can be folded anyway

diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 9761a0ec9f95..c24c62b47745 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -22,7 +22,6 @@ struct io_uring_task {
 	void			*io_wq;
 	struct percpu_counter	inflight;
 	atomic_t		in_idle;
-	bool			sqpoll;
 
 	spinlock_t		task_lock;
 	struct io_wq_work_list	task_list;


-- 
Pavel Begunkov

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] io_uring: cancel sqpoll via task_work
  2021-03-12 19:35   ` Pavel Begunkov
@ 2021-03-12 19:40     ` Jens Axboe
  2021-03-12 21:41       ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2021-03-12 19:40 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 3/12/21 12:35 PM, Pavel Begunkov wrote:
> On 11/03/2021 23:29, Pavel Begunkov wrote:
>> 1) The first problem is io_uring_cancel_sqpoll() ->
>> io_uring_cancel_task_requests() basically doing park(); park(); and so
>> hanging.
>>
>> 2) Another one is more subtle, when the master task is doing cancellations,
>> but SQPOLL task submits in-between the end of the cancellation but
>> before finish() requests taking a ref to the ctx, and so eternally
>> locking it up.
>>
>> 3) Yet another is a dying SQPOLL task doing io_uring_cancel_sqpoll() and
>> same io_uring_cancel_sqpoll() from the owner task, they race for
>> tctx->wait events. And there probably more of them.
>>
>> Instead do SQPOLL cancellations from within SQPOLL task context via
>> task_work, see io_sqpoll_cancel_sync(). With that we don't need temporal
>> park()/unpark() during cancellation, which is ugly, subtle and anyway
>> doesn't allow to do io_run_task_work() properly.> 
>> io_uring_cancel_sqpoll() is called only from SQPOLL task context and
>> under sqd locking, so all parking is removed from there. And so,
>> io_sq_thread_[un]park() and io_sq_thread_stop() are not used now by
>> SQPOLL task, and that spare us from some headache.
>>
>> Also remove ctx->sqd_list early to avoid 2). And kill tctx->sqpoll,
>> which is not used anymore.
> 
> 
> Looks, the chunk below somehow slipped from the patch. Not important
> for 5.12, but can can be folded anyway
> 
> diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
> index 9761a0ec9f95..c24c62b47745 100644
> --- a/include/linux/io_uring.h
> +++ b/include/linux/io_uring.h
> @@ -22,7 +22,6 @@ struct io_uring_task {
>  	void			*io_wq;
>  	struct percpu_counter	inflight;
>  	atomic_t		in_idle;
> -	bool			sqpoll;
>  
>  	spinlock_t		task_lock;
>  	struct io_wq_work_list	task_list;

Let's do it as a separate patch instead.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] io_uring: cancel sqpoll via task_work
  2021-03-12 19:40     ` Jens Axboe
@ 2021-03-12 21:41       ` Pavel Begunkov
  2021-03-13  2:44         ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2021-03-12 21:41 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 12/03/2021 19:40, Jens Axboe wrote:
> On 3/12/21 12:35 PM, Pavel Begunkov wrote:
>> On 11/03/2021 23:29, Pavel Begunkov wrote:
>>> 1) The first problem is io_uring_cancel_sqpoll() ->
>>> io_uring_cancel_task_requests() basically doing park(); park(); and so
>>> hanging.
>>>
>>> 2) Another one is more subtle, when the master task is doing cancellations,
>>> but SQPOLL task submits in-between the end of the cancellation but
>>> before finish() requests taking a ref to the ctx, and so eternally
>>> locking it up.
>>>
>>> 3) Yet another is a dying SQPOLL task doing io_uring_cancel_sqpoll() and
>>> same io_uring_cancel_sqpoll() from the owner task, they race for
>>> tctx->wait events. And there probably more of them.
>>>
>>> Instead do SQPOLL cancellations from within SQPOLL task context via
>>> task_work, see io_sqpoll_cancel_sync(). With that we don't need temporal
>>> park()/unpark() during cancellation, which is ugly, subtle and anyway
>>> doesn't allow to do io_run_task_work() properly.> 
>>> io_uring_cancel_sqpoll() is called only from SQPOLL task context and
>>> under sqd locking, so all parking is removed from there. And so,
>>> io_sq_thread_[un]park() and io_sq_thread_stop() are not used now by
>>> SQPOLL task, and that spare us from some headache.
>>>
>>> Also remove ctx->sqd_list early to avoid 2). And kill tctx->sqpoll,
>>> which is not used anymore.
>>
>>
>> Looks, the chunk below somehow slipped from the patch. Not important
>> for 5.12, but can can be folded anyway
>>
>> diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
>> index 9761a0ec9f95..c24c62b47745 100644
>> --- a/include/linux/io_uring.h
>> +++ b/include/linux/io_uring.h
>> @@ -22,7 +22,6 @@ struct io_uring_task {
>>  	void			*io_wq;
>>  	struct percpu_counter	inflight;
>>  	atomic_t		in_idle;
>> -	bool			sqpoll;
>>  
>>  	spinlock_t		task_lock;
>>  	struct io_wq_work_list	task_list;
> 
> Let's do it as a separate patch instead.

Ok, I'll send it for-5.13 when it's appropriate.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] io_uring: cancel sqpoll via task_work
  2021-03-12 21:41       ` Pavel Begunkov
@ 2021-03-13  2:44         ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2021-03-13  2:44 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 3/12/21 2:41 PM, Pavel Begunkov wrote:
> On 12/03/2021 19:40, Jens Axboe wrote:
>> On 3/12/21 12:35 PM, Pavel Begunkov wrote:
>>> On 11/03/2021 23:29, Pavel Begunkov wrote:
>>>> 1) The first problem is io_uring_cancel_sqpoll() ->
>>>> io_uring_cancel_task_requests() basically doing park(); park(); and so
>>>> hanging.
>>>>
>>>> 2) Another one is more subtle, when the master task is doing cancellations,
>>>> but SQPOLL task submits in-between the end of the cancellation but
>>>> before finish() requests taking a ref to the ctx, and so eternally
>>>> locking it up.
>>>>
>>>> 3) Yet another is a dying SQPOLL task doing io_uring_cancel_sqpoll() and
>>>> same io_uring_cancel_sqpoll() from the owner task, they race for
>>>> tctx->wait events. And there probably more of them.
>>>>
>>>> Instead do SQPOLL cancellations from within SQPOLL task context via
>>>> task_work, see io_sqpoll_cancel_sync(). With that we don't need temporal
>>>> park()/unpark() during cancellation, which is ugly, subtle and anyway
>>>> doesn't allow to do io_run_task_work() properly.> 
>>>> io_uring_cancel_sqpoll() is called only from SQPOLL task context and
>>>> under sqd locking, so all parking is removed from there. And so,
>>>> io_sq_thread_[un]park() and io_sq_thread_stop() are not used now by
>>>> SQPOLL task, and that spare us from some headache.
>>>>
>>>> Also remove ctx->sqd_list early to avoid 2). And kill tctx->sqpoll,
>>>> which is not used anymore.
>>>
>>>
>>> Looks, the chunk below somehow slipped from the patch. Not important
>>> for 5.12, but can can be folded anyway
>>>
>>> diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
>>> index 9761a0ec9f95..c24c62b47745 100644
>>> --- a/include/linux/io_uring.h
>>> +++ b/include/linux/io_uring.h
>>> @@ -22,7 +22,6 @@ struct io_uring_task {
>>>  	void			*io_wq;
>>>  	struct percpu_counter	inflight;
>>>  	atomic_t		in_idle;
>>> -	bool			sqpoll;
>>>  
>>>  	spinlock_t		task_lock;
>>>  	struct io_wq_work_list	task_list;
>>
>> Let's do it as a separate patch instead.
> 
> Ok, I'll send it for-5.13 when it's appropriate.

Yeah that's fine, obviously no rush. I'll rebase for-5.13/io_uring
when -rc3 is out.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-03-13  2:45 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-11 23:29 [PATCH 0/4] sqpoll fixes Pavel Begunkov
2021-03-11 23:29 ` [PATCH 1/4] io_uring: cancel deferred requests in try_cancel Pavel Begunkov
2021-03-11 23:29 ` [PATCH 2/4] io_uring: remove useless ->startup completion Pavel Begunkov
2021-03-11 23:29 ` [PATCH 3/4] io_uring: prevent racy sqd->thread checks Pavel Begunkov
2021-03-11 23:29 ` [PATCH 4/4] io_uring: cancel sqpoll via task_work Pavel Begunkov
2021-03-12 19:35   ` Pavel Begunkov
2021-03-12 19:40     ` Jens Axboe
2021-03-12 21:41       ` Pavel Begunkov
2021-03-13  2:44         ` Jens Axboe
2021-03-12 18:19 ` [PATCH 0/4] sqpoll fixes Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.