All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yu Kuai <yukuai3@huawei.com>
To: <axboe@kernel.dk>, <ming.lei@redhat.com>
Cc: <jack@suse.cz>, <linux-block@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>, <yukuai3@huawei.com>,
	<yi.zhang@huawei.com>
Subject: [PATCH RFC -next] sbitmap: fix possible io hung due to lost wakeups
Date: Fri, 17 Jun 2022 22:11:25 +0800	[thread overview]
Message-ID: <20220617141125.3024491-1-yukuai3@huawei.com> (raw)

Currently, same waitqueue might be woken up continuously:

__sbq_wake_up		__sbq_wake_up
 sbq_wake_ptr -> assume	0
			 sbq_wake_ptr -> 0
 atomic_dec_return
			atomic_dec_return
 atomic_cmpxchg -> succeed
			 atomic_cmpxchg -> failed
			  return true

			__sbq_wake_up
			 sbq_wake_ptr
			  atomic_read(&sbq->wake_index) -> still 0
 sbq_index_atomic_inc -> inc to 1
			  if (waitqueue_active(&ws->wait))
			   if (wake_index != atomic_read(&sbq->wake_index))
			    atomic_set -> reset from 1 to 0
 wake_up_nr -> wake up first waitqueue
			    // continue to wake up in first waitqueue

What's worse, io hung is possible in theory because wakeups might be
missed. For example, 2 * wake_batch tags are put, while only wake_batch
threads are worken:

__sbq_wake_up
 atomic_cmpxchg -> reset wait_cnt
			__sbq_wake_up -> decrease wait_cnt
			...
			__sbq_wake_up -> wait_cnt is decreased to 0 again
			 atomic_cmpxchg
			 sbq_index_atomic_inc -> increase wake_index
			 wake_up_nr -> wake up and waitqueue might be empty
 sbq_index_atomic_inc -> increase again, one waitqueue is skipped
 wake_up_nr -> invalid wake up because old wakequeue might be empty

To fix the problem, refactor to make sure waitqueues will be woken up
one by one,

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 lib/sbitmap.c | 83 ++++++++++++++++++++++++++-------------------------
 1 file changed, 43 insertions(+), 40 deletions(-)

diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index ae4fd4de9ebe..dc2959cb188c 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -574,66 +574,69 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
 
-static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
+static void sbq_update_wake_index(struct sbitmap_queue *sbq,
+				  int old_wake_index)
 {
 	int i, wake_index;
-
-	if (!atomic_read(&sbq->ws_active))
-		return NULL;
+	struct sbq_wait_state *ws;
 
 	wake_index = atomic_read(&sbq->wake_index);
-	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
-		struct sbq_wait_state *ws = &sbq->ws[wake_index];
+	if (old_wake_index != wake_index)
+		return;
 
+	for (i = 1; i < SBQ_WAIT_QUEUES; i++) {
+		wake_index = sbq_index_inc(wake_index);
+		ws = &sbq->ws[wake_index];
+		/* Find the next active waitqueue in round robin manner */
 		if (waitqueue_active(&ws->wait)) {
-			if (wake_index != atomic_read(&sbq->wake_index))
-				atomic_set(&sbq->wake_index, wake_index);
-			return ws;
+			atomic_cmpxchg(&sbq->wake_index, old_wake_index,
+				       wake_index);
+			return;
 		}
-
-		wake_index = sbq_index_inc(wake_index);
 	}
-
-	return NULL;
 }
 
 static bool __sbq_wake_up(struct sbitmap_queue *sbq)
 {
 	struct sbq_wait_state *ws;
 	unsigned int wake_batch;
-	int wait_cnt;
+	int wait_cnt, wake_index;
 
-	ws = sbq_wake_ptr(sbq);
-	if (!ws)
+	if (!atomic_read(&sbq->ws_active))
 		return false;
 
-	wait_cnt = atomic_dec_return(&ws->wait_cnt);
-	if (wait_cnt <= 0) {
-		int ret;
-
-		wake_batch = READ_ONCE(sbq->wake_batch);
-
-		/*
-		 * Pairs with the memory barrier in sbitmap_queue_resize() to
-		 * ensure that we see the batch size update before the wait
-		 * count is reset.
-		 */
-		smp_mb__before_atomic();
+	wake_index = atomic_read(&sbq->wake_index);
+	ws = &sbq->ws[wake_index];
+	/*
+	 * This can only happen in the first wakeup when sbitmap waitqueues
+	 * are no longer idle.
+	 */
+	if (!waitqueue_active(&ws->wait)) {
+		sbq_update_wake_index(sbq, wake_index);
+		return true;
+	}
 
-		/*
-		 * For concurrent callers of this, the one that failed the
-		 * atomic_cmpxhcg() race should call this function again
-		 * to wakeup a new batch on a different 'ws'.
-		 */
-		ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
-		if (ret == wait_cnt) {
-			sbq_index_atomic_inc(&sbq->wake_index);
-			wake_up_nr(&ws->wait, wake_batch);
-			return false;
-		}
+	wait_cnt = atomic_dec_return(&ws->wait_cnt);
+	if (wait_cnt > 0)
+		return false;
 
+	sbq_update_wake_index(sbq, wake_index);
+	/*
+	 * Concurrent callers should call this function again
+	 * to wakeup a new batch on a different 'ws'.
+	 */
+	if (wait_cnt < 0)
 		return true;
-	}
+
+	wake_batch = READ_ONCE(sbq->wake_batch);
+	/*
+	 * Pairs with the memory barrier in sbitmap_queue_resize() to
+	 * ensure that we see the batch size update before the wait
+	 * count is reset.
+	 */
+	smp_mb__before_atomic();
+	atomic_set(&ws->wait_cnt, wake_batch);
+	wake_up_nr(&ws->wait, wake_batch);
 
 	return false;
 }
-- 
2.31.1


             reply	other threads:[~2022-06-17 13:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-17 14:11 Yu Kuai [this message]
2022-06-17 14:04 ` [PATCH RFC -next] sbitmap: fix possible io hung due to lost wakeups Keith Busch
2022-06-20 12:24 ` Jan Kara
2022-06-20 12:48   ` Jan Kara
2022-06-20 13:44     ` Yu Kuai
2022-06-20 17:02       ` Jan Kara
2022-06-22  3:58         ` Yu Kuai
2022-06-22  6:41           ` Yu Kuai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220617141125.3024491-1-yukuai3@huawei.com \
    --to=yukuai3@huawei.com \
    --cc=axboe@kernel.dk \
    --cc=jack@suse.cz \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=yi.zhang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.