Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] block: revert pushing the final release of request_queue to a workqueue.
@ 2020-02-06 11:10 yu kuai
  2020-02-07  4:09 ` Bart Van Assche
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: yu kuai @ 2020-02-06 11:10 UTC (permalink / raw)
  To: axboe, ming.lei, chaitanya.kulkarni, damien.lemoal, bvanassche,
	dhowells, asml.silence, ajay.joshi
  Cc: linux-block, linux-kernel, yukuai3, yi.zhang, zhangxiaoxu5, luoshijie1

syzbot is reporting use after free bug in debugfs_remove[1].

This is because in request_queue, 'q->debugfs_dir' and
'q->blk_trace->dir' could be the same dir. And in __blk_release_queue(),
blk_mq_debugfs_unregister() will remove everything inside the dir.

With futher investigation of the reporduce repro, the problem can be
reporduced by following procedure:

1. LOOP_CTL_ADD, create a request_queue q1, blk_mq_debugfs_register() will
create the dir.
2. LOOP_CTL_REMOVE, blk_release_queue() will add q1 to release queue.
3. LOOP_CTL_ADD, create another request_queue q2,blk_mq_debugfs_register()
will fail because the dir aready exist.
4. BLKTRACESETUP, create two files(msg and dropped) inside the dir.
5. call __blk_release_queue() for q1, debugfs_remove_recursive() will
delete the files created in step 4.
6. LOOP_CTL_REMOVE, blk_release_queue() will add q2 to release queue.
And when __blk_release_queue() is called for q2, blk_trace_shutdown() will
try to release the two files created in step 4, wich are aready released
in step 5.

|thread1		  |kworker	             |thread2               |
| ----------------------- | ------------------------ | -------------------- |
|loop_control_ioctl       |                          |                      |
| loop_add                |                          |                      |
|  blk_mq_debugfs_register|                          |                      |
|   debugfs_create_dir    |                          |                      |
|loop_control_ioctl       |                          |                      |
| loop_remove		  |                          |                      |
|  blk_release_queue      |                          |                      |
|   schedule_work         |                          |                      |
|			  |			     |loop_control_ioctl    |
|			  |			     | loop_add             |
|			  |			     |  ...                 |
|			  |			     |blk_trace_ioctl       |
|			  |			     | __blk_trace_setup    |
|			  |			     |   debugfs_create_file|
|			  |__blk_release_queue       |                      |
|			  | blk_mq_debugfs_unregister|                      |
|			  |  debugfs_remove_recursive|                      |
|			  |			     |loop_control_ioctl    |
|			  |			     | loop_remove          |
|			  |			     |  ...                 |
|			  |__blk_release_queue       |                      |
|			  | blk_trace_shutdown       |                      |
|			  |  debugfs_remove          |                      |

commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression") pushed the
final release of request_queue to a workqueue, witch is not necessary
since commit 1e9364283764 ("blk-sysfs: Rework documention of
__blk_release_queue").

[1] https://syzkaller.appspot.com/bug?extid=903b72a010ad6b7a40f2
References: CVE-2019-19770
Fixes: commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression")
Reported-by: syzbot <syz...@syzkaller.appspotmail.com>
Signed-off-by: yu kuai <yukuai3@huawei.com>
---
 block/blk-sysfs.c      | 18 +++++-------------
 include/linux/blkdev.h |  2 --
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index fca9b158f4a0..3f448292099d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -862,8 +862,8 @@ static void blk_exit_queue(struct request_queue *q)
 
 
 /**
- * __blk_release_queue - release a request queue
- * @work: pointer to the release_work member of the request queue to be released
+ * blk_release_queue - release a request queue
+ * @@kobj:    the kobj belonging to the request queue to be released
  *
  * Description:
  *     This function is called when a block device is being unregistered. The
@@ -873,9 +873,10 @@ static void blk_exit_queue(struct request_queue *q)
  *     of the request queue reaches zero, blk_release_queue is called to release
  *     all allocated resources of the request queue.
  */
-static void __blk_release_queue(struct work_struct *work)
+static void blk_release_queue(struct kobject *kobj)
 {
-	struct request_queue *q = container_of(work, typeof(*q), release_work);
+	struct request_queue *q =
+		container_of(kobj, struct request_queue, kobj);
 
 	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
 		blk_stat_remove_callback(q, q->poll_cb);
@@ -904,15 +905,6 @@ static void __blk_release_queue(struct work_struct *work)
 	call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
 
-static void blk_release_queue(struct kobject *kobj)
-{
-	struct request_queue *q =
-		container_of(kobj, struct request_queue, kobj);
-
-	INIT_WORK(&q->release_work, __blk_release_queue);
-	schedule_work(&q->release_work);
-}
-
 static const struct sysfs_ops queue_sysfs_ops = {
 	.show	= queue_attr_show,
 	.store	= queue_attr_store,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 04cfa798a365..dff4d032c78a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -580,8 +580,6 @@ struct request_queue {
 
 	size_t			cmd_size;
 
-	struct work_struct	release_work;
-
 #define BLK_MAX_WRITE_HINTS	5
 	u64			write_hints[BLK_MAX_WRITE_HINTS];
 };
-- 
2.17.2


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-06 11:10 [PATCH] block: revert pushing the final release of request_queue to a workqueue yu kuai
@ 2020-02-07  4:09 ` Bart Van Assche
  2020-02-07  6:02   ` yukuai (C)
  2020-02-07  9:30 ` Ming Lei
  2020-02-10  1:00 ` Bart Van Assche
  2 siblings, 1 reply; 14+ messages in thread
From: Bart Van Assche @ 2020-02-07  4:09 UTC (permalink / raw)
  To: yu kuai, axboe, ming.lei, chaitanya.kulkarni, damien.lemoal,
	dhowells, asml.silence, ajay.joshi
  Cc: linux-block, linux-kernel, yi.zhang, zhangxiaoxu5, luoshijie1

On 2020-02-06 03:10, yu kuai wrote:
> commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression") pushed the
> final release of request_queue to a workqueue, witch is not necessary
> since commit 1e9364283764 ("blk-sysfs: Rework documention of
> __blk_release_queue").

I think the second commit reference is wrong. Did you perhaps want to
refer to commit 7b36a7189fc3 ("block: don't call ioc_exit_icq() with the
queue lock held for blk-mq")? That is the commit that removed the
locking from blk_release_queue() and that makes it safe to revert commit
dc9edc44de6c.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-07  4:09 ` Bart Van Assche
@ 2020-02-07  6:02   ` yukuai (C)
  2020-02-07  7:10     ` yukuai (C)
  0 siblings, 1 reply; 14+ messages in thread
From: yukuai (C) @ 2020-02-07  6:02 UTC (permalink / raw)
  To: Bart Van Assche, axboe, ming.lei, chaitanya.kulkarni,
	damien.lemoal, dhowells, asml.silence, ajay.joshi
  Cc: linux-block, linux-kernel, yi.zhang, zhangxiaoxu5, luoshijie1

On 2020/2/7 12:09, Bart Van Assche wrote:
> I think the second commit reference is wrong. Did you perhaps want to
> refer to commit 7b36a7189fc3 ("block: don't call ioc_exit_icq() with the
> queue lock held for blk-mq")? That is the commit that removed the
> locking from blk_release_queue() and that makes it safe to revert commit
> dc9edc44de6c.

Thank you for your response.

Commit 1e9364283764 just fix some comments, real funtional modification
should before that. And I do agree that commit 7b36a7189fc3 is the right
one.

By the way, do you agree the way I fix the CVE? I can send a V2 patch if
you do.

Thanks!
Yu Kuai


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-07  6:02   ` yukuai (C)
@ 2020-02-07  7:10     ` yukuai (C)
  0 siblings, 0 replies; 14+ messages in thread
From: yukuai (C) @ 2020-02-07  7:10 UTC (permalink / raw)
  To: Bart Van Assche, axboe, ming.lei, chaitanya.kulkarni,
	damien.lemoal, dhowells, asml.silence, ajay.joshi
  Cc: linux-block, linux-kernel, yi.zhang, zhangxiaoxu5, luoshijie1

On 2020/2/7 14:02, yukuai (C) wrote:
> And I do agree that commit 7b36a7189fc3 is the right
> one.

My apologize that this is a mistake. Commit db6d99523560 is the one that
makes it safe to revert commit dc9edc44de6c. Because Commit db6d99523560
remove blk_exit_rl() from blkg_free().

blkg_release
   call_rcu --> can't sleep inside this
     __blkg_release
       blkg_free
         blk_exit_rl
	  blk_put_queue


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-06 11:10 [PATCH] block: revert pushing the final release of request_queue to a workqueue yu kuai
  2020-02-07  4:09 ` Bart Van Assche
@ 2020-02-07  9:30 ` Ming Lei
  2020-02-07 10:26   ` yukuai (C)
  2020-02-10  1:00 ` Bart Van Assche
  2 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2020-02-07  9:30 UTC (permalink / raw)
  To: yu kuai
  Cc: axboe, chaitanya.kulkarni, damien.lemoal, bvanassche, dhowells,
	asml.silence, ajay.joshi, linux-block, linux-kernel, yi.zhang,
	zhangxiaoxu5, luoshijie1

On Thu, Feb 06, 2020 at 07:10:52PM +0800, yu kuai wrote:
> syzbot is reporting use after free bug in debugfs_remove[1].
> 
> This is because in request_queue, 'q->debugfs_dir' and
> 'q->blk_trace->dir' could be the same dir. And in __blk_release_queue(),
> blk_mq_debugfs_unregister() will remove everything inside the dir.
> 
> With futher investigation of the reporduce repro, the problem can be
> reporduced by following procedure:
> 
> 1. LOOP_CTL_ADD, create a request_queue q1, blk_mq_debugfs_register() will
> create the dir.
> 2. LOOP_CTL_REMOVE, blk_release_queue() will add q1 to release queue.
> 3. LOOP_CTL_ADD, create another request_queue q2,blk_mq_debugfs_register()
> will fail because the dir aready exist.

Looks we should have called blk_mq_debugfs_unregister() from
blk_unregister_queue() because blk-mq debugfs uses disk name as debugfs
dir. Not sure why blk_mq_debugfs_unregister() is called from queue's
release handler.


> 4. BLKTRACESETUP, create two files(msg and dropped) inside the dir.
> 5. call __blk_release_queue() for q1, debugfs_remove_recursive() will
> delete the files created in step 4.
> 6. LOOP_CTL_REMOVE, blk_release_queue() will add q2 to release queue.
> And when __blk_release_queue() is called for q2, blk_trace_shutdown() will
> try to release the two files created in step 4, wich are aready released
> in step 5.
> 
> |thread1		  |kworker	             |thread2               |
> | ----------------------- | ------------------------ | -------------------- |
> |loop_control_ioctl       |                          |                      |
> | loop_add                |                          |                      |
> |  blk_mq_debugfs_register|                          |                      |
> |   debugfs_create_dir    |                          |                      |
> |loop_control_ioctl       |                          |                      |
> | loop_remove		  |                          |                      |
> |  blk_release_queue      |                          |                      |
> |   schedule_work         |                          |                      |
> |			  |			     |loop_control_ioctl    |
> |			  |			     | loop_add             |
> |			  |			     |  ...                 |
> |			  |			     |blk_trace_ioctl       |
> |			  |			     | __blk_trace_setup    |
> |			  |			     |   debugfs_create_file|
> |			  |__blk_release_queue       |                      |
> |			  | blk_mq_debugfs_unregister|                      |
> |			  |  debugfs_remove_recursive|                      |
> |			  |			     |loop_control_ioctl    |
> |			  |			     | loop_remove          |
> |			  |			     |  ...                 |
> |			  |__blk_release_queue       |                      |
> |			  | blk_trace_shutdown       |                      |
> |			  |  debugfs_remove          |                      |
> 
> commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression") pushed the
> final release of request_queue to a workqueue, witch is not necessary
> since commit 1e9364283764 ("blk-sysfs: Rework documention of
> __blk_release_queue").
> 
> [1] https://syzkaller.appspot.com/bug?extid=903b72a010ad6b7a40f2
> References: CVE-2019-19770

I guess your test case is more complicated than the above CVE, which
should be triggered in single queue case.

> Fixes: commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression")

As Bart mentioned, the above tag is wrong.

> Reported-by: syzbot <syz...@syzkaller.appspotmail.com>
> Signed-off-by: yu kuai <yukuai3@huawei.com>
> ---
>  block/blk-sysfs.c      | 18 +++++-------------
>  include/linux/blkdev.h |  2 --
>  2 files changed, 5 insertions(+), 15 deletions(-)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index fca9b158f4a0..3f448292099d 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -862,8 +862,8 @@ static void blk_exit_queue(struct request_queue *q)
>  
>  
>  /**
> - * __blk_release_queue - release a request queue
> - * @work: pointer to the release_work member of the request queue to be released
> + * blk_release_queue - release a request queue
> + * @@kobj:    the kobj belonging to the request queue to be released
>   *
>   * Description:
>   *     This function is called when a block device is being unregistered. The
> @@ -873,9 +873,10 @@ static void blk_exit_queue(struct request_queue *q)
>   *     of the request queue reaches zero, blk_release_queue is called to release
>   *     all allocated resources of the request queue.
>   */
> -static void __blk_release_queue(struct work_struct *work)
> +static void blk_release_queue(struct kobject *kobj)
>  {
> -	struct request_queue *q = container_of(work, typeof(*q), release_work);
> +	struct request_queue *q =
> +		container_of(kobj, struct request_queue, kobj);
>  
>  	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
>  		blk_stat_remove_callback(q, q->poll_cb);
> @@ -904,15 +905,6 @@ static void __blk_release_queue(struct work_struct *work)
>  	call_rcu(&q->rcu_head, blk_free_queue_rcu);
>  }
>  
> -static void blk_release_queue(struct kobject *kobj)
> -{
> -	struct request_queue *q =
> -		container_of(kobj, struct request_queue, kobj);
> -
> -	INIT_WORK(&q->release_work, __blk_release_queue);
> -	schedule_work(&q->release_work);
> -}
> -
>  static const struct sysfs_ops queue_sysfs_ops = {
>  	.show	= queue_attr_show,
>  	.store	= queue_attr_store,
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 04cfa798a365..dff4d032c78a 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -580,8 +580,6 @@ struct request_queue {
>  
>  	size_t			cmd_size;
>  
> -	struct work_struct	release_work;
> -

Looks this approach isn't correct:

1) there are other sleepers in __blk_release_queue(), such blk-mq sysfs
kobject_put(), or cancel_delayed_work_sync(), ...

2) wrt. loop, the request queue's release handler may not be called yet
after loop_remove() returns, so this patch may not avoid the issue in
your step 3 in which blk_mq_debugfs_register fails when adding new loop
device. So release not by wq just reduces the chance, instead of fixing
it completely.

Thanks,
Ming


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-07  9:30 ` Ming Lei
@ 2020-02-07 10:26   ` yukuai (C)
  2020-02-07 12:24     ` yukuai (C)
  0 siblings, 1 reply; 14+ messages in thread
From: yukuai (C) @ 2020-02-07 10:26 UTC (permalink / raw)
  To: Ming Lei
  Cc: axboe, chaitanya.kulkarni, damien.lemoal, bvanassche, dhowells,
	asml.silence, ajay.joshi, linux-block, linux-kernel, yi.zhang,
	zhangxiaoxu5, luoshijie1

On 2020/2/7 17:30, Ming Lei wrote:

> I guess your test case is more complicated than the above CVE, which
> should be triggered in single queue case.

No, the test case is from Syzkaller, you can get it from [1]
> Looks this approach isn't correct:
> 
> 1) there are other sleepers in __blk_release_queue(), such blk-mq sysfs
> kobject_put(), or cancel_delayed_work_sync(), ...
> 
commit dc9edc44de6c pushing the final release of request_queue to a
workqueue because sleepers are not allowed. However, since since
commit db6d99523560, sleeper is ok because blk_exit_rl() is removed
form blkg_free().

> 2) wrt. loop, the request queue's release handler may not be called yet
> after loop_remove() returns, so this patch may not avoid the issue in
> your step 3 in which blk_mq_debugfs_register fails when adding new loop
> device. So release not by wq just reduces the chance, instead of fixing
> it completely.
> 
The reason of the problem is because the final release of request_queue
may be called after loop_remove() returns.
And I think it will be fixed if we revert commit db6d99523560.

Thanks
Yu Kuai
> 
> 
> .
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-07 10:26   ` yukuai (C)
@ 2020-02-07 12:24     ` yukuai (C)
  2020-02-07 13:04       ` Ming Lei
  0 siblings, 1 reply; 14+ messages in thread
From: yukuai (C) @ 2020-02-07 12:24 UTC (permalink / raw)
  To: Ming Lei
  Cc: axboe, chaitanya.kulkarni, damien.lemoal, bvanassche, dhowells,
	asml.silence, ajay.joshi, linux-block, linux-kernel, yi.zhang,
	zhangxiaoxu5, luoshijie1

On 2020/2/7 18:26, yukuai (C) wrote:
> The reason of the problem is because the final release of request_queue
> may be called after loop_remove() returns.

The description is not accurate. The reason of the problem is that
__blk_trace_setup() called before the final release of request_queue
returns.(step 4 before step 5)

Thanks!
Yu Kuai


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-07 12:24     ` yukuai (C)
@ 2020-02-07 13:04       ` Ming Lei
  2020-02-08  6:12         ` yukuai (C)
  0 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2020-02-07 13:04 UTC (permalink / raw)
  To: yukuai (C)
  Cc: axboe, chaitanya.kulkarni, damien.lemoal, bvanassche, dhowells,
	asml.silence, ajay.joshi, linux-block, linux-kernel, yi.zhang,
	zhangxiaoxu5, luoshijie1

On Fri, Feb 07, 2020 at 08:24:59PM +0800, yukuai (C) wrote:
> On 2020/2/7 18:26, yukuai (C) wrote:
> > The reason of the problem is because the final release of request_queue
> > may be called after loop_remove() returns.
> 
> The description is not accurate. The reason of the problem is that
> __blk_trace_setup() called before the final release of request_queue
> returns.(step 4 before step 5)

But blk_mq_debugfs_register() in your step 3 for adding loop still may
fail, that is why I suggest to consider to move
blk_mq_debugfs_register() into blk_unregister_queue().


Thanks,
Ming


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-07 13:04       ` Ming Lei
@ 2020-02-08  6:12         ` yukuai (C)
  0 siblings, 0 replies; 14+ messages in thread
From: yukuai (C) @ 2020-02-08  6:12 UTC (permalink / raw)
  To: Ming Lei
  Cc: axboe, chaitanya.kulkarni, damien.lemoal, bvanassche, dhowells,
	asml.silence, ajay.joshi, linux-block, linux-kernel, yi.zhang,
	zhangxiaoxu5, luoshijie1

On 2020/2/7 21:04, Ming Lei wrote:
> But blk_mq_debugfs_register() in your step 3 for adding loop still may
> fail, that is why I suggest to consider to move
> blk_mq_debugfs_register() into blk_unregister_queue().

I think therer might be a problem.

static void loop_remove(struct loop_device *lo)
{
         del_gendisk(lo->lo_disk);
         blk_cleanup_queue(lo->lo_queue);
         blk_mq_free_tag_set(&lo->tag_set);
         put_disk(lo->lo_disk);
         kfree(lo);
}

blk_unregister_queue() is called in del_gendisk(), while
blk_cleanup_queue() remove other files or dirs. And
blk_mq_debugfs_register() should be called at last since it
will remove everything.

Thanks
Yu Kuai


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-06 11:10 [PATCH] block: revert pushing the final release of request_queue to a workqueue yu kuai
  2020-02-07  4:09 ` Bart Van Assche
  2020-02-07  9:30 ` Ming Lei
@ 2020-02-10  1:00 ` Bart Van Assche
  2020-02-10  2:13   ` yukuai (C)
  2 siblings, 1 reply; 14+ messages in thread
From: Bart Van Assche @ 2020-02-10  1:00 UTC (permalink / raw)
  To: yu kuai, axboe, ming.lei, chaitanya.kulkarni, damien.lemoal,
	dhowells, asml.silence, ajay.joshi
  Cc: linux-block, linux-kernel, yi.zhang, zhangxiaoxu5, luoshijie1, jan kara

On 2020-02-06 03:10, yu kuai wrote:
> syzbot is reporting use after free bug in debugfs_remove[1].
> 
> This is because in request_queue, 'q->debugfs_dir' and
> 'q->blk_trace->dir' could be the same dir. And in __blk_release_queue(),
> blk_mq_debugfs_unregister() will remove everything inside the dir.

Hi Yu,

Have you already noticed patch "[PATCH] blktrace: Protect q->blk_trace
with RCU"? If not, have you already tried to verify whether that patch
fixes the use-after-free detected by syzbot? See also
https://lore.kernel.org/linux-block/BYAPR04MB57492F689BA17786A24F08EE86190@BYAPR04MB5749.namprd04.prod.outlook.com/T/#mce8ffe534d93716f678d52178b4e34d4d1c3c597

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-10  1:00 ` Bart Van Assche
@ 2020-02-10  2:13   ` yukuai (C)
  2020-02-10  2:32     ` Ming Lei
  2020-02-10  3:14     ` Bart Van Assche
  0 siblings, 2 replies; 14+ messages in thread
From: yukuai (C) @ 2020-02-10  2:13 UTC (permalink / raw)
  To: Bart Van Assche, axboe, ming.lei, chaitanya.kulkarni,
	damien.lemoal, dhowells, asml.silence, ajay.joshi
  Cc: linux-block, linux-kernel, yi.zhang, zhangxiaoxu5, luoshijie1, jan kara

On 2020/2/10 9:00, Bart Van Assche wrote:
> Have you already noticed patch "[PATCH] blktrace: Protect q->blk_trace
> with RCU"? If not, have you already tried to verify whether that patch
> fixes the use-after-free detected by syzbot?

I just tested and confirmed the patch didn't fix the problem.

By the way, I think Ming is right about "So release not by wq just
reduces the chance, instead of fixing it completely.", and "move
blk_mq_debugfs_unregister() into blk_unregister_queue()" is a good
choice. However, blk_trace_shutdown() and blk_mq_exit_queue() also
remove some files or dirs, and they may need to move to
blk_unregister_queue().

I tested following patch fixes the problem, however I'm not sure if
move blk_trace_shutdown() and blk_mu_exit_queue() is ok or we should
just move debgfs-related part.

---
  block/blk-core.c  |  3 ---
  block/blk-sysfs.c | 12 ++++++------
  2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 50a5de025d5e..1ab9808e73c7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -395,9 +395,6 @@ void blk_cleanup_queue(struct request_queue *q)
         del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
         blk_sync_queue(q);

-       if (queue_is_mq(q))
-               blk_mq_exit_queue(q);
-
         /*
         ┊* In theory, request pool of sched_tags belongs to request queue.
         ┊* However, the current implementation requires tag_set for freeing
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index fca9b158f4a0..a0f64d641cb0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -893,11 +893,6 @@ static void __blk_release_queue(struct work_struct 
*work)
         if (queue_is_mq(q))
                 blk_mq_release(q);

-       blk_trace_shutdown(q);
-
-       if (queue_is_mq(q))
-               blk_mq_debugfs_unregister(q);
-
         bioset_exit(&q->bio_split);

         ida_simple_remove(&blk_queue_ida, q->id);
@@ -1043,8 +1038,13 @@ void blk_unregister_queue(struct gendisk *disk)
         ┊* Remove the sysfs attributes before unregistering the queue data
         ┊* structures that can be modified through sysfs.
         ┊*/
-       if (queue_is_mq(q))
+
+       blk_trace_shutdown(q);
+       if (queue_is_mq(q)) {
                 blk_mq_unregister_dev(disk_to_dev(disk), q);
+               blk_mq_exit_queue(q);
+               blk_mq_debugfs_unregister(q);
+       }

         kobject_uevent(&q->kobj, KOBJ_REMOVE);
         kobject_del(&q->kobj);
--
2.17.2



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-10  2:13   ` yukuai (C)
@ 2020-02-10  2:32     ` Ming Lei
  2020-02-10  3:14     ` Bart Van Assche
  1 sibling, 0 replies; 14+ messages in thread
From: Ming Lei @ 2020-02-10  2:32 UTC (permalink / raw)
  To: yukuai (C)
  Cc: Bart Van Assche, axboe, chaitanya.kulkarni, damien.lemoal,
	dhowells, asml.silence, ajay.joshi, linux-block, linux-kernel,
	yi.zhang, zhangxiaoxu5, luoshijie1, jan kara

On Mon, Feb 10, 2020 at 10:13:22AM +0800, yukuai (C) wrote:
> On 2020/2/10 9:00, Bart Van Assche wrote:
> > Have you already noticed patch "[PATCH] blktrace: Protect q->blk_trace
> > with RCU"? If not, have you already tried to verify whether that patch
> > fixes the use-after-free detected by syzbot?
> 
> I just tested and confirmed the patch didn't fix the problem.

Right, the two are for fixing different issue.

> 
> By the way, I think Ming is right about "So release not by wq just
> reduces the chance, instead of fixing it completely.", and "move
> blk_mq_debugfs_unregister() into blk_unregister_queue()" is a good
> choice. However, blk_trace_shutdown() and blk_mq_exit_queue() also
> remove some files or dirs, and they may need to move to
> blk_unregister_queue().

Right.

Fortunately, we hold sysfs_dir_lock in blk_unregister_queue(), so
the issue can be fixed by check & remove with holding the same lock
in blk_trace_free().


Thanks, 
Ming


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-10  2:13   ` yukuai (C)
  2020-02-10  2:32     ` Ming Lei
@ 2020-02-10  3:14     ` Bart Van Assche
  2020-02-10  8:49       ` yukuai (C)
  1 sibling, 1 reply; 14+ messages in thread
From: Bart Van Assche @ 2020-02-10  3:14 UTC (permalink / raw)
  To: yukuai (C),
	axboe, ming.lei, chaitanya.kulkarni, damien.lemoal, dhowells,
	asml.silence, ajay.joshi
  Cc: linux-block, linux-kernel, yi.zhang, zhangxiaoxu5, luoshijie1, jan kara

On 2020-02-09 18:13, yukuai (C) wrote:
> I tested following patch fixes the problem, however I'm not sure if
> move blk_trace_shutdown() and blk_mu_exit_queue() is ok or we should
> just move debgfs-related part.

From blk-mq.c:

/* tags can _not_ be used after returning from blk_mq_exit_queue */
void blk_mq_exit_queue(struct request_queue *q)
{
	struct blk_mq_tag_set	*set = q->tag_set;

	blk_mq_del_queue_tag_set(q);
	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
}

I think that calling blk_mq_exit_queue() from blk_unregister_queue()
would break at least the sd driver. The sd driver can issue I/O after
having called del_gendisk(). See also the sd_sync_cache() call in
sd_shutdown().

Bart.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] block: revert pushing the final release of request_queue to a workqueue.
  2020-02-10  3:14     ` Bart Van Assche
@ 2020-02-10  8:49       ` yukuai (C)
  0 siblings, 0 replies; 14+ messages in thread
From: yukuai (C) @ 2020-02-10  8:49 UTC (permalink / raw)
  To: Bart Van Assche, axboe, ming.lei, chaitanya.kulkarni,
	damien.lemoal, dhowells, asml.silence, ajay.joshi
  Cc: linux-block, linux-kernel, yi.zhang, zhangxiaoxu5, luoshijie1, jan kara

On 2020/2/10 11:14, Bart Van Assche wrote:
> I think that calling blk_mq_exit_queue() from blk_unregister_queue()
> would break at least the sd driver. The sd driver can issue I/O after
> having called del_gendisk(). See also the sd_sync_cache() call in
> sd_shutdown().

If blk_mq_exit_queue() can't move to blk_unregister_queue(),
neither can blk_mq_debugfs_unregister(). It'a dead end.

The purpose is that when __blk_trace_setup() is called, the cleanup of
last loop_device(__blk_release_queue()) should finish aready.

I wonder if we can test that if the dir still exist in loop_add():

static int loop_add(struct loop_device **l, int i)
{
...
           char disk_name[DISK_NAME_LEN];
           struct dentry *dir, *root;

           sprintf(disk_name, "loop%d", i);
           root = debugfs_lookup("block", NULL);
           if (root) {
                   dir = debugfs_lookup(disk_name, root);
                   if (dir) {
                           dput(dir);
                           dput(root);
                          pr_err("Directory '%s' with parent 'block' 
already present!\n",disk_name);
                           return -EBUSY;
                   }
                   dput(root);
           }
...

Thanks!
Yu Kuai


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, back to index

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-06 11:10 [PATCH] block: revert pushing the final release of request_queue to a workqueue yu kuai
2020-02-07  4:09 ` Bart Van Assche
2020-02-07  6:02   ` yukuai (C)
2020-02-07  7:10     ` yukuai (C)
2020-02-07  9:30 ` Ming Lei
2020-02-07 10:26   ` yukuai (C)
2020-02-07 12:24     ` yukuai (C)
2020-02-07 13:04       ` Ming Lei
2020-02-08  6:12         ` yukuai (C)
2020-02-10  1:00 ` Bart Van Assche
2020-02-10  2:13   ` yukuai (C)
2020-02-10  2:32     ` Ming Lei
2020-02-10  3:14     ` Bart Van Assche
2020-02-10  8:49       ` yukuai (C)

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org
	public-inbox-index linux-block

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git