io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: io-uring@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>, "Paul E . McKenney" <paulmck@kernel.org>
Subject: [PATCH 2/2] io-wq: ensure task is valid before sending it a signal
Date: Wed, 13 Nov 2019 12:43:55 -0700	[thread overview]
Message-ID: <20191113194355.12107-3-axboe@kernel.dk> (raw)
In-Reply-To: <20191113194355.12107-1-axboe@kernel.dk>

While we're always under RCU read protection when finding the worker
to signal, that only protects the worker from being freed. The task
could very well be exiting, if we get unlucky enough.

Same is true for ->cur_work, which is currently under protection by
the wqe->lock that this worker belongs to. Add a specific worker lock
that protects whether the task is exiting and also the current work
item. Then we can guarantee that the task we're sending a signal is:

1) Currently processing the exact work we think it is
2) It's not currently exiting

It's important to not use the wqe->lock for ->cur_work, as we can run
into lock ordering issues with io_poll_wake() being called under the
signal lock if we're polling a signal fd, and io_poll_wake() then
needing to call io_wq_enqueue() which grabs wqe->lock. For cancel, the
ordering is exactly the opposite.

Reported-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 66 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 22 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 26d81540c1fc..f035460b9776 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -49,7 +49,10 @@ struct io_worker {
 	struct task_struct *task;
 	wait_queue_head_t wait;
 	struct io_wqe *wqe;
+
 	struct io_wq_work *cur_work;
+	spinlock_t lock;
+	int exiting;
 
 	struct rcu_head rcu;
 	struct mm_struct *mm;
@@ -223,6 +226,10 @@ static void io_worker_exit(struct io_worker *worker)
 	if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
 		complete(&wqe->wq->done);
 
+	spin_lock_irq(&worker->lock);
+	worker->exiting = true;
+	spin_unlock_irq(&worker->lock);
+
 	kfree_rcu(worker, rcu);
 }
 
@@ -323,7 +330,6 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
 		hlist_nulls_add_head_rcu(&worker->nulls_node,
 						&wqe->busy_list.head);
 	}
-	worker->cur_work = work;
 
 	/*
 	 * If worker is moving from bound to unbound (or vice versa), then
@@ -402,17 +408,6 @@ static void io_worker_handle_work(struct io_worker *worker)
 	do {
 		unsigned hash = -1U;
 
-		/*
-		 * Signals are either sent to cancel specific work, or to just
-		 * cancel all work items. For the former, ->cur_work must
-		 * match. ->cur_work is NULL at this point, since we haven't
-		 * assigned any work, so it's safe to flush signals for that
-		 * case. For the latter case of cancelling all work, the caller
-		 * wil have set IO_WQ_BIT_CANCEL.
-		 */
-		if (signal_pending(current))
-			flush_signals(current);
-
 		/*
 		 * If we got some work, mark us as busy. If we didn't, but
 		 * the list isn't empty, it means we stalled on hashed work.
@@ -432,6 +427,14 @@ static void io_worker_handle_work(struct io_worker *worker)
 		if (!work)
 			break;
 next:
+		/* flush any pending signals before assigning new work */
+		if (signal_pending(current))
+			flush_signals(current);
+
+		spin_lock_irq(&worker->lock);
+		worker->cur_work = work;
+		spin_unlock_irq(&worker->lock);
+
 		if ((work->flags & IO_WQ_WORK_NEEDS_FILES) &&
 		    current->files != work->files) {
 			task_lock(current);
@@ -457,8 +460,12 @@ static void io_worker_handle_work(struct io_worker *worker)
 		old_work = work;
 		work->func(&work);
 
-		spin_lock_irq(&wqe->lock);
+		spin_lock_irq(&worker->lock);
 		worker->cur_work = NULL;
+		spin_unlock_irq(&worker->lock);
+
+		spin_lock_irq(&wqe->lock);
+
 		if (hash != -1U) {
 			wqe->hash_map &= ~BIT_ULL(hash);
 			wqe->flags &= ~IO_WQE_FLAG_STALLED;
@@ -577,6 +584,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 	worker->nulls_node.pprev = NULL;
 	init_waitqueue_head(&worker->wait);
 	worker->wqe = wqe;
+	spin_lock_init(&worker->lock);
 
 	worker->task = kthread_create_on_node(io_wqe_worker, worker, wqe->node,
 				"io_wqe_worker-%d/%d", index, wqe->node);
@@ -721,7 +729,10 @@ void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val)
 
 static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
 {
-	send_sig(SIGINT, worker->task, 1);
+	spin_lock_irq(&worker->lock);
+	if (!worker->exiting)
+		send_sig(SIGINT, worker->task, 1);
+	spin_unlock_irq(&worker->lock);
 	return false;
 }
 
@@ -783,7 +794,6 @@ struct io_cb_cancel_data {
 static bool io_work_cancel(struct io_worker *worker, void *cancel_data)
 {
 	struct io_cb_cancel_data *data = cancel_data;
-	struct io_wqe *wqe = data->wqe;
 	unsigned long flags;
 	bool ret = false;
 
@@ -791,13 +801,14 @@ static bool io_work_cancel(struct io_worker *worker, void *cancel_data)
 	 * Hold the lock to avoid ->cur_work going out of scope, caller
 	 * may deference the passed in work.
 	 */
-	spin_lock_irqsave(&wqe->lock, flags);
+	spin_lock_irqsave(&worker->lock, flags);
 	if (worker->cur_work &&
 	    data->cancel(worker->cur_work, data->caller_data)) {
-		send_sig(SIGINT, worker->task, 1);
+		if (!worker->exiting)
+			send_sig(SIGINT, worker->task, 1);
 		ret = true;
 	}
-	spin_unlock_irqrestore(&wqe->lock, flags);
+	spin_unlock_irqrestore(&worker->lock, flags);
 
 	return ret;
 }
@@ -864,13 +875,21 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
 static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
 {
 	struct io_wq_work *work = data;
+	unsigned long flags;
+	bool ret = false;
+
+	if (worker->cur_work != work)
+		return false;
 
+	spin_lock_irqsave(&worker->lock, flags);
 	if (worker->cur_work == work) {
-		send_sig(SIGINT, worker->task, 1);
-		return true;
+		if (!worker->exiting)
+			send_sig(SIGINT, worker->task, 1);
+		ret = true;
 	}
+	spin_unlock_irqrestore(&worker->lock, flags);
 
-	return false;
+	return ret;
 }
 
 static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
@@ -1049,7 +1068,10 @@ struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
 
 static bool io_wq_worker_wake(struct io_worker *worker, void *data)
 {
-	wake_up_process(worker->task);
+	spin_lock_irq(&worker->lock);
+	if (!worker->exiting)
+		wake_up_process(worker->task);
+	spin_unlock_irq(&worker->lock);
 	return false;
 }
 
-- 
2.24.0


  parent reply	other threads:[~2019-11-13 19:44 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-13 19:43 [PATCHSET] Improve io_uring cancellations Jens Axboe
2019-11-13 19:43 ` [PATCH 1/2] io_wq: add get/put_work handlers to io_wq_create() Jens Axboe
2019-11-13 19:43 ` Jens Axboe [this message]
2019-11-13 19:55   ` [PATCH 2/2] io-wq: ensure task is valid before sending it a signal Paul E. McKenney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191113194355.12107-3-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=paulmck@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).