All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zhang Wensheng <zhangwensheng@huaweicloud.com>
To: stable@vger.kernel.org
Cc: axboe@kernel.dk, ast@kernel.org, daniel@iogearbox.net,
	andrii@kernel.org, kafai@fb.com, songliubraving@fb.com,
	yhs@fb.com, john.fastabend@gmail.com, kpsingh@kernel.org,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	zhangwensheng5@huawei.com, yukuai3@huawei.com
Subject: [PATCH 5.10] block: fix null-deref in percpu_ref_put
Date: Fri, 29 Jul 2022 14:52:43 +0800	[thread overview]
Message-ID: <20220729065243.1786222-1-zhangwensheng@huaweicloud.com> (raw)

From: Zhang Wensheng <zhangwensheng5@huawei.com>

In the use of q_usage_counter of request_queue, blk_cleanup_queue using
"wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter))"
to wait q_usage_counter becoming zero. however, if the q_usage_counter
becoming zero quickly, and percpu_ref_exit will execute and ref->data
will be freed, maybe another process will cause a null-defef problem
like below:

	CPU0                             CPU1
blk_cleanup_queue
 blk_freeze_queue
  blk_mq_freeze_queue_wait
				scsi_end_request
				 percpu_ref_get
				 ...
				 percpu_ref_put
				  atomic_long_sub_and_test
  percpu_ref_exit
   ref->data -> NULL
   				   ref->data->release(ref) -> null-deref

Fix it by setting flag(QUEUE_FLAG_USAGE_COUNT_SYNC) to add synchronization
mechanism, when ref->data->release is called, the flag will be setted,
and the "wait_event" in blk_mq_freeze_queue_wait must wait flag becoming
true as well, which will limit percpu_ref_exit to execute ahead of time.

Signed-off-by: Zhang Wensheng <zhangwensheng5@huawei.com>
---
 block/blk-core.c       | 4 +++-
 block/blk-mq.c         | 7 +++++++
 include/linux/blk-mq.h | 1 +
 include/linux/blkdev.h | 2 ++
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 26664f2a139e..238d0f3cd279 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -385,7 +385,8 @@ void blk_cleanup_queue(struct request_queue *q)
 	 * prevent that blk_mq_run_hw_queues() accesses the hardware queues
 	 * after draining finished.
 	 */
-	blk_freeze_queue(q);
+	blk_freeze_queue_start(q);
+	blk_mq_freeze_queue_wait_sync(q);
 
 	rq_qos_exit(q);
 
@@ -500,6 +501,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
 	struct request_queue *q =
 		container_of(ref, struct request_queue, q_usage_counter);
 
+	blk_queue_flag_set(QUEUE_FLAG_USAGE_COUNT_SYNC, q);
 	wake_up_all(&q->mq_freeze_wq);
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c5d82b21a1cc..15c4e4530c87 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -134,6 +134,7 @@ void blk_freeze_queue_start(struct request_queue *q)
 {
 	mutex_lock(&q->mq_freeze_lock);
 	if (++q->mq_freeze_depth == 1) {
+		blk_queue_flag_clear(QUEUE_FLAG_USAGE_COUNT_SYNC, q);
 		percpu_ref_kill(&q->q_usage_counter);
 		mutex_unlock(&q->mq_freeze_lock);
 		if (queue_is_mq(q))
@@ -144,6 +145,12 @@ void blk_freeze_queue_start(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
 
+void blk_mq_freeze_queue_wait_sync(struct request_queue *q)
+{
+	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter) &&
+			test_bit(QUEUE_FLAG_USAGE_COUNT_SYNC, &q->queue_flags));
+}
+
 void blk_mq_freeze_queue_wait(struct request_queue *q)
 {
 	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f8ea27423d1d..95b2c904375f 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -522,6 +522,7 @@ void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_freeze_queue_start(struct request_queue *q);
 void blk_mq_freeze_queue_wait(struct request_queue *q);
+void blk_mq_freeze_queue_wait_sync(struct request_queue *q);
 int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
 				     unsigned long timeout);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 98fdf5a31fd6..b61461e3a734 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -629,6 +629,8 @@ struct request_queue {
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
+/* sync for q_usage_counter */
+#define QUEUE_FLAG_USAGE_COUNT_SYNC    30
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP) |		\
-- 
2.31.1


             reply	other threads:[~2022-07-29  6:59 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-29  6:52 Zhang Wensheng [this message]
2022-07-29  7:48 ` [PATCH 5.10] block: fix null-deref in percpu_ref_put Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220729065243.1786222-1-zhangwensheng@huaweicloud.com \
    --to=zhangwensheng@huaweicloud.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=daniel@iogearbox.net \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=stable@vger.kernel.org \
    --cc=yhs@fb.com \
    --cc=yukuai3@huawei.com \
    --cc=zhangwensheng5@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.