All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bob Liu <bob.liu@oracle.com>
To: axboe@kernel.dk
Cc: linux-block@vger.kernel.org, hare@suse.com, ming.lei@redhat.com,
	bvanassche@acm.org, hch@lst.de, martin.petersen@oracle.com,
	jinpuwang@gmail.com, rpenyaev@suse.de,
	Bob Liu <bob.liu@oracle.com>
Subject: [PATCH v4] blk-mq: fix hang caused by freeze/unfreeze sequence
Date: Thu, 25 Apr 2019 03:28:46 -0700 (PDT)	[thread overview]
Message-ID: <20190425102846.28911-1-bob.liu@oracle.com> (raw)

The following is a description of a hang in blk_mq_freeze_queue_wait().
The hang happens on attempting to freeze a queue while another task does
queue unfreeze.

The root cause is an incorrect sequence of percpu_ref_resurrect() and
percpu_ref_kill() and as a result those two can be swapped:

 CPU#0                         CPU#1
 ----------------              -----------------
 q1 = blk_mq_init_queue(shared_tags)

                                q2 = blk_mq_init_queue(shared_tags)
                                  blk_mq_add_queue_tag_set(shared_tags)
                                    blk_mq_update_tag_set_depth(shared_tags)
				     list_for_each_entry()
                                      blk_mq_freeze_queue(q1)
                                       > percpu_ref_kill()
                                       > blk_mq_freeze_queue_wait()

 blk_cleanup_queue(q1)
  blk_mq_freeze_queue(q1)
   > percpu_ref_kill()
                 ^^^^^^ freeze_depth can't guarantee the order

                                      blk_mq_unfreeze_queue()
                                        > percpu_ref_resurrect()

   > blk_mq_freeze_queue_wait()
                 ^^^^^^ Hang here!!!!

This wrong sequence raises kernel warning:
percpu_ref_kill_and_confirm called more than once on blk_queue_usage_counter_release!
WARNING: CPU: 0 PID: 11854 at lib/percpu-refcount.c:336 percpu_ref_kill_and_confirm+0x99/0xb0

But the most unpleasant effect is a hang of a blk_mq_freeze_queue_wait(),
which waits for a zero of a q_usage_counter, which never happens
because percpu-ref was reinited (instead of being killed) and stays in
PERCPU state forever.

How to reproduce:
 - "insmod null_blk.ko shared_tags=1 nr_devices=0 queue_mode=2"
 - cpu0: python Script.py 0; taskset the corresponding process running on cpu0
 - cpu1: python Script.py 1; taskset the corresponding process running on cpu1

 Script.py:
 ------
 #!/usr/bin/python3

import os
import sys

while True:
    on = "echo 1 > /sys/kernel/config/nullb/%s/power" % sys.argv[1]
    off = "echo 0 > /sys/kernel/config/nullb/%s/power" % sys.argv[1]
    os.system(on)
    os.system(off)
------

This bug was first reported and fixed by Roman, previous discussion:
[1] Message id: 1443287365-4244-7-git-send-email-akinobu.mita@gmail.com
[2] Message id: 1443563240-29306-6-git-send-email-tj@kernel.org
[3] https://patchwork.kernel.org/patch/9268199/

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 v4:
   - Update commit log
 v3:
   - rebase to v5.1
 v2:
   - forgotten hunk from local repo
   - minor tweaks in the commit message
---
 block/blk-core.c       |  3 ++-
 block/blk-mq.c         | 19 ++++++++++---------
 include/linux/blkdev.h |  7 ++++++-
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a55389b..fb97497 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -433,7 +433,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 		smp_rmb();

 		wait_event(q->mq_freeze_wq,
-			   (atomic_read(&q->mq_freeze_depth) == 0 &&
+			   (!q->mq_freeze_depth &&
 			    (pm || (blk_pm_request_resume(q),
 				    !blk_queue_pm_only(q)))) ||
 			   blk_queue_dying(q));
@@ -523,6 +523,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	spin_lock_init(&q->queue_lock);

 	init_waitqueue_head(&q->mq_freeze_wq);
+	mutex_init(&q->mq_freeze_lock);

 	/*
 	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a935483..373af60 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -143,13 +143,14 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,

 void blk_freeze_queue_start(struct request_queue *q)
 {
-	int freeze_depth;
-
-	freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
-	if (freeze_depth == 1) {
+	mutex_lock(&q->mq_freeze_lock);
+	if (++q->mq_freeze_depth == 1) {
 		percpu_ref_kill(&q->q_usage_counter);
+		mutex_unlock(&q->mq_freeze_lock);
 		if (queue_is_mq(q))
 			blk_mq_run_hw_queues(q, false);
+	} else {
+		mutex_unlock(&q->mq_freeze_lock);
 	}
 }
 EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
@@ -198,14 +199,14 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);

 void blk_mq_unfreeze_queue(struct request_queue *q)
 {
-	int freeze_depth;
-
-	freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
-	WARN_ON_ONCE(freeze_depth < 0);
-	if (!freeze_depth) {
+	mutex_lock(&q->mq_freeze_lock);
+	q->mq_freeze_depth--;
+	WARN_ON_ONCE(q->mq_freeze_depth < 0);
+	if (!q->mq_freeze_depth) {
 		percpu_ref_resurrect(&q->q_usage_counter);
 		wake_up_all(&q->mq_freeze_wq);
 	}
+	mutex_unlock(&q->mq_freeze_lock);
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c58a3b..64f7683 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -535,7 +535,7 @@ struct request_queue {

 	struct mutex		sysfs_lock;

-	atomic_t		mq_freeze_depth;
+	int			mq_freeze_depth;

 #if defined(CONFIG_BLK_DEV_BSG)
 	struct bsg_class_device bsg_dev;
@@ -547,6 +547,11 @@ struct request_queue {
 #endif
 	struct rcu_head		rcu_head;
 	wait_queue_head_t	mq_freeze_wq;
+	/*
+	 * Protect concurrent access to q_usage_counter by
+	 * percpu_ref_kill() and percpu_ref_reinit().
+	 */
+	struct mutex		mq_freeze_lock;
 	struct percpu_ref	q_usage_counter;
 	struct list_head	all_q_node;

--
2.9.5


             reply	other threads:[~2019-04-25 10:32 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-25 10:28 Bob Liu [this message]
2019-04-29  3:42 ` [PATCH v4] blk-mq: fix hang caused by freeze/unfreeze sequence Ming Lei
2019-04-29 18:04 ` Bart Van Assche
2019-05-08  9:13 ` Bob Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190425102846.28911-1-bob.liu@oracle.com \
    --to=bob.liu@oracle.com \
    --cc=axboe@kernel.dk \
    --cc=bvanassche@acm.org \
    --cc=hare@suse.com \
    --cc=hch@lst.de \
    --cc=jinpuwang@gmail.com \
    --cc=linux-block@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=ming.lei@redhat.com \
    --cc=rpenyaev@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.