* [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying
@ 2022-02-16 15:09 Christoph Hellwig
2022-02-16 15:09 ` [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals Christoph Hellwig
` (3 more replies)
0 siblings, 4 replies; 16+ messages in thread
From: Christoph Hellwig @ 2022-02-16 15:09 UTC (permalink / raw)
To: axboe; +Cc: kbusch, markus.bloechl, linux-nvme, linux-block
Various block drivers call blk_set_queue_dying to mark a disk as dead due
to surprise removal events, but since commit 8e141f9eb803 that doesn't
work given that the GD_DEAD flag needs to be set to stop I/O.
Replace the driver calls to blk_set_queue_dying with a new (and properly
documented) blk_mark_disk_dead API, and fold blk_set_queue_dying into the
only remaining caller.
Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk")
Reported-by: Markus Blöchl <markus.bloechl@ipetronik.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/blk-core.c | 18 +++++++++++++-----
drivers/block/mtip32xx/mtip32xx.c | 2 +-
drivers/block/rbd.c | 2 +-
drivers/block/xen-blkfront.c | 2 +-
drivers/md/dm.c | 2 +-
drivers/nvme/host/core.c | 2 +-
drivers/nvme/host/multipath.c | 2 +-
include/linux/blkdev.h | 3 ++-
8 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index d93e3bb9a769b..15d5c5ba5bbe5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -284,12 +284,19 @@ void blk_queue_start_drain(struct request_queue *q)
wake_up_all(&q->mq_freeze_wq);
}
-void blk_set_queue_dying(struct request_queue *q)
+/**
+ * blk_set_disk_dead - mark a disk as dead
+ * @disk: disk to mark as dead
+ *
+ * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
+ * to this disk.
+ */
+void blk_mark_disk_dead(struct gendisk *disk)
{
- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
- blk_queue_start_drain(q);
+ set_bit(GD_DEAD, &disk->state);
+ blk_queue_start_drain(disk->queue);
}
-EXPORT_SYMBOL_GPL(blk_set_queue_dying);
+EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
/**
* blk_cleanup_queue - shutdown a request queue
@@ -308,7 +315,8 @@ void blk_cleanup_queue(struct request_queue *q)
WARN_ON_ONCE(blk_queue_registered(q));
/* mark @q DYING, no new request or merges will be allowed afterwards */
- blk_set_queue_dying(q);
+ blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+ blk_queue_start_drain(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index e6005c2323281..2b588b62cbbb2 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -4112,7 +4112,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
"Completion workers still active!\n");
}
- blk_set_queue_dying(dd->queue);
+ blk_mark_disk_dead(dd->disk);
set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
/* Clean up the block layer. */
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4203cdab8abfd..b844432bad20b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -7185,7 +7185,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
* IO to complete/fail.
*/
blk_mq_freeze_queue(rbd_dev->disk->queue);
- blk_set_queue_dying(rbd_dev->disk->queue);
+ blk_mark_disk_dead(rbd_dev->disk);
}
del_gendisk(rbd_dev->disk);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ccd0dd0c6b83c..ca71a0585333f 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2126,7 +2126,7 @@ static void blkfront_closing(struct blkfront_info *info)
/* No more blkif_request(). */
blk_mq_stop_hw_queues(info->rq);
- blk_set_queue_dying(info->rq);
+ blk_mark_disk_dead(info->gd);
set_capacity(info->gd, 0);
for_each_rinfo(info, rinfo, i) {
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dcbd6d201619d..997ace47bbd54 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2077,7 +2077,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
set_bit(DMF_FREEING, &md->flags);
spin_unlock(&_minor_lock);
- blk_set_queue_dying(md->queue);
+ blk_mark_disk_dead(md->disk);
/*
* Take suspend_lock so that presuspend and postsuspend methods
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 79005ea1a33e3..469f23186159c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4574,7 +4574,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return;
- blk_set_queue_dying(ns->queue);
+ blk_mark_disk_dead(ns->disk);
nvme_start_ns_queue(ns);
set_capacity_and_notify(ns->disk, 0);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f8bf6606eb2fc..ff775235534cf 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -848,7 +848,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
- blk_set_queue_dying(head->disk->queue);
+ blk_mark_disk_dead(head->disk);
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f35aea98bc351..16b47035e4b06 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -748,7 +748,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
bool __must_check blk_get_queue(struct request_queue *);
extern void blk_put_queue(struct request_queue *);
-extern void blk_set_queue_dying(struct request_queue *);
+
+void blk_mark_disk_dead(struct gendisk *disk);
#ifdef CONFIG_BLOCK
/*
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals
2022-02-16 15:09 [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying Christoph Hellwig
@ 2022-02-16 15:09 ` Christoph Hellwig
2022-02-16 15:18 ` Hannes Reinecke
` (2 more replies)
2022-02-16 15:32 ` [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying Hannes Reinecke
` (2 subsequent siblings)
3 siblings, 3 replies; 16+ messages in thread
From: Christoph Hellwig @ 2022-02-16 15:09 UTC (permalink / raw)
To: axboe; +Cc: kbusch, markus.bloechl, linux-nvme, linux-block
For surprise removals that have already marked the disk dead, there is
no point in calling fsync_bdev as all I/O will fail anyway, so skip it.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/genhd.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/block/genhd.c b/block/genhd.c
index 626c8406f21a6..f68bdfe4f883b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -584,7 +584,14 @@ void del_gendisk(struct gendisk *disk)
blk_drop_partitions(disk);
mutex_unlock(&disk->open_mutex);
- fsync_bdev(disk->part0);
+ /*
+ * If this is not a surprise removal see if there is a file system
+ * mounted on this device and sync it (although this won't work for
+ * partitions). For surprise removals that have already marked the
+ * disk dead skip this call as no I/O is possible anyway.
+ */
+ if (!test_bit(GD_DEAD, &disk->state))
+ fsync_bdev(disk->part0);
__invalidate_device(disk->part0, true);
/*
--
2.30.2
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals
2022-02-16 15:09 ` [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals Christoph Hellwig
@ 2022-02-16 15:18 ` Hannes Reinecke
2022-02-16 15:20 ` Christoph Hellwig
2022-02-16 15:32 ` Markus Blöchl
2022-02-16 15:37 ` Keith Busch
2 siblings, 1 reply; 16+ messages in thread
From: Hannes Reinecke @ 2022-02-16 15:18 UTC (permalink / raw)
To: Christoph Hellwig, axboe; +Cc: kbusch, markus.bloechl, linux-nvme, linux-block
On 2/16/22 16:09, Christoph Hellwig wrote:
> For surprise removals that have already marked the disk dead, there is
> no point in calling fsync_bdev as all I/O will fail anyway, so skip it.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> block/genhd.c | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/block/genhd.c b/block/genhd.c
> index 626c8406f21a6..f68bdfe4f883b 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -584,7 +584,14 @@ void del_gendisk(struct gendisk *disk)
> blk_drop_partitions(disk);
> mutex_unlock(&disk->open_mutex);
>
> - fsync_bdev(disk->part0);
> + /*
> + * If this is not a surprise removal see if there is a file system
> + * mounted on this device and sync it (although this won't work for
> + * partitions). For surprise removals that have already marked the
> + * disk dead skip this call as no I/O is possible anyway.
> + */
> + if (!test_bit(GD_DEAD, &disk->state))
> + fsync_bdev(disk->part0);
> __invalidate_device(disk->part0, true);
>
> /*
My turn to be picky:
In the previous patch you use 'set_bit()' for GD_DEAD, which to my
knowledge doesn't imply a memory barrier.
Yet here you rely on that for the 'test_bit()' to return the
correct/most recent value.
Don't we need a memory barrier here somewhere?
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals
2022-02-16 15:18 ` Hannes Reinecke
@ 2022-02-16 15:20 ` Christoph Hellwig
2022-02-16 15:31 ` Hannes Reinecke
0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2022-02-16 15:20 UTC (permalink / raw)
To: Hannes Reinecke
Cc: Christoph Hellwig, axboe, kbusch, markus.bloechl, linux-nvme,
linux-block
On Wed, Feb 16, 2022 at 04:18:43PM +0100, Hannes Reinecke wrote:
> My turn to be picky:
> In the previous patch you use 'set_bit()' for GD_DEAD, which to my
> knowledge doesn't imply a memory barrier.
> Yet here you rely on that for the 'test_bit()' to return the correct/most
> recent value.
> Don't we need a memory barrier here somewhere?
Well, we only do the test to skip useless work. A race is not a
problem here.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals
2022-02-16 15:20 ` Christoph Hellwig
@ 2022-02-16 15:31 ` Hannes Reinecke
0 siblings, 0 replies; 16+ messages in thread
From: Hannes Reinecke @ 2022-02-16 15:31 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: axboe, kbusch, markus.bloechl, linux-nvme, linux-block
On 2/16/22 16:20, Christoph Hellwig wrote:
> On Wed, Feb 16, 2022 at 04:18:43PM +0100, Hannes Reinecke wrote:
>> My turn to be picky:
>> In the previous patch you use 'set_bit()' for GD_DEAD, which to my
>> knowledge doesn't imply a memory barrier.
>> Yet here you rely on that for the 'test_bit()' to return the correct/most
>> recent value.
>> Don't we need a memory barrier here somewhere?
>
> Well, we only do the test to skip useless work. A race is not a
> problem here.
Ok.
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying
2022-02-16 15:09 [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying Christoph Hellwig
2022-02-16 15:09 ` [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals Christoph Hellwig
@ 2022-02-16 15:32 ` Hannes Reinecke
2022-02-16 15:49 ` Markus Blöchl
2022-02-16 17:31 ` kernel test robot
3 siblings, 0 replies; 16+ messages in thread
From: Hannes Reinecke @ 2022-02-16 15:32 UTC (permalink / raw)
To: Christoph Hellwig, axboe; +Cc: kbusch, markus.bloechl, linux-nvme, linux-block
On 2/16/22 16:09, Christoph Hellwig wrote:
> Various block drivers call blk_set_queue_dying to mark a disk as dead due
> to surprise removal events, but since commit 8e141f9eb803 that doesn't
> work given that the GD_DEAD flag needs to be set to stop I/O.
>
> Replace the driver calls to blk_set_queue_dying with a new (and properly
> documented) blk_mark_disk_dead API, and fold blk_set_queue_dying into the
> only remaining caller.
>
> Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk")
> Reported-by: Markus Blöchl <markus.bloechl@ipetronik.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> block/blk-core.c | 18 +++++++++++++-----
> drivers/block/mtip32xx/mtip32xx.c | 2 +-
> drivers/block/rbd.c | 2 +-
> drivers/block/xen-blkfront.c | 2 +-
> drivers/md/dm.c | 2 +-
> drivers/nvme/host/core.c | 2 +-
> drivers/nvme/host/multipath.c | 2 +-
> include/linux/blkdev.h | 3 ++-
> 8 files changed, 21 insertions(+), 12 deletions(-)
>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals
2022-02-16 15:09 ` [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals Christoph Hellwig
2022-02-16 15:18 ` Hannes Reinecke
@ 2022-02-16 15:32 ` Markus Blöchl
2022-02-16 17:09 ` Christoph Hellwig
2022-02-16 15:37 ` Keith Busch
2 siblings, 1 reply; 16+ messages in thread
From: Markus Blöchl @ 2022-02-16 15:32 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: axboe, kbusch, linux-nvme, linux-block
On Wed, Feb 16, 2022 at 04:09:01PM +0100, Christoph Hellwig wrote:
> For surprise removals that have already marked the disk dead, there is
> no point in calling fsync_bdev as all I/O will fail anyway, so skip it.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> block/genhd.c | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/block/genhd.c b/block/genhd.c
> index 626c8406f21a6..f68bdfe4f883b 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -584,7 +584,14 @@ void del_gendisk(struct gendisk *disk)
> blk_drop_partitions(disk);
blk_drop_partitions() also invokes fsync_bdev() via delete_partition().
So why treat them differently?
> mutex_unlock(&disk->open_mutex);
>
> - fsync_bdev(disk->part0);
> + /*
> + * If this is not a surprise removal see if there is a file system
> + * mounted on this device and sync it (although this won't work for
> + * partitions). For surprise removals that have already marked the
> + * disk dead skip this call as no I/O is possible anyway.
> + */
> + if (!test_bit(GD_DEAD, &disk->state))
> + fsync_bdev(disk->part0);
> __invalidate_device(disk->part0, true);
>
> /*
> --
> 2.30.2
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals
2022-02-16 15:09 ` [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals Christoph Hellwig
2022-02-16 15:18 ` Hannes Reinecke
2022-02-16 15:32 ` Markus Blöchl
@ 2022-02-16 15:37 ` Keith Busch
2022-02-16 15:45 ` Christoph Hellwig
2 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2022-02-16 15:37 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: axboe, markus.bloechl, linux-nvme, linux-block
On Wed, Feb 16, 2022 at 04:09:01PM +0100, Christoph Hellwig wrote:
> - fsync_bdev(disk->part0);
> + /*
> + * If this is not a surprise removal see if there is a file system
> + * mounted on this device and sync it (although this won't work for
> + * partitions). For surprise removals that have already marked the
> + * disk dead skip this call as no I/O is possible anyway.
> + */
> + if (!test_bit(GD_DEAD, &disk->state))
> + fsync_bdev(disk->part0);
> __invalidate_device(disk->part0, true);
It used to be that any dirty pages would attempt to write, and get an
error on a surprise removal. Now that you're skipping the fsync_bdev(),
is something else taking responsibility to error those pages?
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals
2022-02-16 15:37 ` Keith Busch
@ 2022-02-16 15:45 ` Christoph Hellwig
0 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2022-02-16 15:45 UTC (permalink / raw)
To: Keith Busch
Cc: Christoph Hellwig, axboe, markus.bloechl, linux-nvme, linux-block
On Wed, Feb 16, 2022 at 07:37:22AM -0800, Keith Busch wrote:
> On Wed, Feb 16, 2022 at 04:09:01PM +0100, Christoph Hellwig wrote:
> > - fsync_bdev(disk->part0);
> > + /*
> > + * If this is not a surprise removal see if there is a file system
> > + * mounted on this device and sync it (although this won't work for
> > + * partitions). For surprise removals that have already marked the
> > + * disk dead skip this call as no I/O is possible anyway.
> > + */
> > + if (!test_bit(GD_DEAD, &disk->state))
> > + fsync_bdev(disk->part0);
> > __invalidate_device(disk->part0, true);
>
> It used to be that any dirty pages would attempt to write, and get an
> error on a surprise removal. Now that you're skipping the fsync_bdev(),
> is something else taking responsibility to error those pages?
truncate_inode_pages when closing the device.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying
2022-02-16 15:09 [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying Christoph Hellwig
2022-02-16 15:09 ` [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals Christoph Hellwig
2022-02-16 15:32 ` [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying Hannes Reinecke
@ 2022-02-16 15:49 ` Markus Blöchl
2022-02-16 17:09 ` Christoph Hellwig
2022-02-16 17:31 ` kernel test robot
3 siblings, 1 reply; 16+ messages in thread
From: Markus Blöchl @ 2022-02-16 15:49 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: axboe, kbusch, linux-nvme, linux-block
On Wed, Feb 16, 2022 at 04:09:00PM +0100, Christoph Hellwig wrote:
> Various block drivers call blk_set_queue_dying to mark a disk as dead due
> to surprise removal events, but since commit 8e141f9eb803 that doesn't
> work given that the GD_DEAD flag needs to be set to stop I/O.
>
> Replace the driver calls to blk_set_queue_dying with a new (and properly
> documented) blk_mark_disk_dead API, and fold blk_set_queue_dying into the
> only remaining caller.
>
> Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk")
> Reported-by: Markus Blöchl <markus.bloechl@ipetronik.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> block/blk-core.c | 18 +++++++++++++-----
> drivers/block/mtip32xx/mtip32xx.c | 2 +-
> drivers/block/rbd.c | 2 +-
> drivers/block/xen-blkfront.c | 2 +-
> drivers/md/dm.c | 2 +-
> drivers/nvme/host/core.c | 2 +-
> drivers/nvme/host/multipath.c | 2 +-
> include/linux/blkdev.h | 3 ++-
> 8 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index d93e3bb9a769b..15d5c5ba5bbe5 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -284,12 +284,19 @@ void blk_queue_start_drain(struct request_queue *q)
> wake_up_all(&q->mq_freeze_wq);
> }
>
> -void blk_set_queue_dying(struct request_queue *q)
> +/**
> + * blk_set_disk_dead - mark a disk as dead
> + * @disk: disk to mark as dead
> + *
> + * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
> + * to this disk.
> + */
> +void blk_mark_disk_dead(struct gendisk *disk)
> {
> - blk_queue_flag_set(QUEUE_FLAG_DYING, q);
> - blk_queue_start_drain(q);
> + set_bit(GD_DEAD, &disk->state);
> + blk_queue_start_drain(disk->queue);
> }
> -EXPORT_SYMBOL_GPL(blk_set_queue_dying);
> +EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
I might have missed something here, but assuming I am a driver which
employs multiple different queues, some with a disk attached to them,
some without (Is that possible? The admin queue e.g.?)
and I just lost my connection and want to notify everything below me
that their connection is dead.
Would I really want to kill disk queues differently from non-disk
queues?
How is the admin queue killed? Is it even?
>
> /**
> * blk_cleanup_queue - shutdown a request queue
> @@ -308,7 +315,8 @@ void blk_cleanup_queue(struct request_queue *q)
> WARN_ON_ONCE(blk_queue_registered(q));
>
> /* mark @q DYING, no new request or merges will be allowed afterwards */
> - blk_set_queue_dying(q);
> + blk_queue_flag_set(QUEUE_FLAG_DYING, q);
> + blk_queue_start_drain(q);
>
> blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
> blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
> diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
> index e6005c2323281..2b588b62cbbb2 100644
> --- a/drivers/block/mtip32xx/mtip32xx.c
> +++ b/drivers/block/mtip32xx/mtip32xx.c
> @@ -4112,7 +4112,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
> "Completion workers still active!\n");
> }
>
> - blk_set_queue_dying(dd->queue);
> + blk_mark_disk_dead(dd->disk);
This driver is weird, I did find are reliably hint that dd->disk always
exists here. At least mtip_block_remove() has an extra check for that.
It also only set QUEUE_FLAG_DEAD if it detects a surprise removal and
not QUEUE_FLAG_DYING.
> set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
>
> /* Clean up the block layer. */
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 4203cdab8abfd..b844432bad20b 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -7185,7 +7185,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
> * IO to complete/fail.
> */
> blk_mq_freeze_queue(rbd_dev->disk->queue);
> - blk_set_queue_dying(rbd_dev->disk->queue);
> + blk_mark_disk_dead(rbd_dev->disk);
> }
>
> del_gendisk(rbd_dev->disk);
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index ccd0dd0c6b83c..ca71a0585333f 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -2126,7 +2126,7 @@ static void blkfront_closing(struct blkfront_info *info)
>
> /* No more blkif_request(). */
> blk_mq_stop_hw_queues(info->rq);
> - blk_set_queue_dying(info->rq);
> + blk_mark_disk_dead(info->gd);
> set_capacity(info->gd, 0);
>
> for_each_rinfo(info, rinfo, i) {
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index dcbd6d201619d..997ace47bbd54 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -2077,7 +2077,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
> set_bit(DMF_FREEING, &md->flags);
> spin_unlock(&_minor_lock);
>
> - blk_set_queue_dying(md->queue);
> + blk_mark_disk_dead(md->disk);
>
> /*
> * Take suspend_lock so that presuspend and postsuspend methods
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 79005ea1a33e3..469f23186159c 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -4574,7 +4574,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
> if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
> return;
>
> - blk_set_queue_dying(ns->queue);
> + blk_mark_disk_dead(ns->disk);
> nvme_start_ns_queue(ns);
>
> set_capacity_and_notify(ns->disk, 0);
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index f8bf6606eb2fc..ff775235534cf 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -848,7 +848,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
> {
> if (!head->disk)
> return;
> - blk_set_queue_dying(head->disk->queue);
> + blk_mark_disk_dead(head->disk);
> /* make sure all pending bios are cleaned up */
> kblockd_schedule_work(&head->requeue_work);
> flush_work(&head->requeue_work);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index f35aea98bc351..16b47035e4b06 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -748,7 +748,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
>
> bool __must_check blk_get_queue(struct request_queue *);
> extern void blk_put_queue(struct request_queue *);
> -extern void blk_set_queue_dying(struct request_queue *);
> +
> +void blk_mark_disk_dead(struct gendisk *disk);
>
> #ifdef CONFIG_BLOCK
> /*
> --
> 2.30.2
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying
2022-02-16 15:49 ` Markus Blöchl
@ 2022-02-16 17:09 ` Christoph Hellwig
2022-02-16 18:08 ` Markus Blöchl
0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2022-02-16 17:09 UTC (permalink / raw)
To: Markus Blöchl
Cc: Christoph Hellwig, axboe, kbusch, linux-nvme, linux-block
On Wed, Feb 16, 2022 at 04:49:50PM +0100, Markus Blöchl wrote:
> > - blk_queue_flag_set(QUEUE_FLAG_DYING, q);
> > - blk_queue_start_drain(q);
> > + set_bit(GD_DEAD, &disk->state);
> > + blk_queue_start_drain(disk->queue);
> > }
> > -EXPORT_SYMBOL_GPL(blk_set_queue_dying);
> > +EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
>
> I might have missed something here, but assuming I am a driver which
> employs multiple different queues, some with a disk attached to them,
> some without (Is that possible? The admin queue e.g.?)
> and I just lost my connection and want to notify everything below me
> that their connection is dead.
> Would I really want to kill disk queues differently from non-disk
> queues?
Yes. Things like the admin queue in nvme are under full control of
the driver. While the "disk" queues just get I/O from the file system
and thus need to be cut off.
> How is the admin queue killed? Is it even?
It isn't. We just stop submitting to it.
> > --- a/drivers/block/mtip32xx/mtip32xx.c
> > +++ b/drivers/block/mtip32xx/mtip32xx.c
> > @@ -4112,7 +4112,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
> > "Completion workers still active!\n");
> > }
> >
> > - blk_set_queue_dying(dd->queue);
> > + blk_mark_disk_dead(dd->disk);
>
> This driver is weird, I did find are reliably hint that dd->disk always
> exists here. At least mtip_block_remove() has an extra check for that.
The driver is a bit of a mess indeed, but the disk and queue will be
non-NULL if ->probe returns successfully so this is fine. It is more
that some of the checks are not required.
> It also only set QUEUE_FLAG_DEAD if it detects a surprise removal and
> not QUEUE_FLAG_DYING.
Yes, this driver will need further work.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals
2022-02-16 15:32 ` Markus Blöchl
@ 2022-02-16 17:09 ` Christoph Hellwig
0 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2022-02-16 17:09 UTC (permalink / raw)
To: Markus Blöchl
Cc: Christoph Hellwig, axboe, kbusch, linux-nvme, linux-block
On Wed, Feb 16, 2022 at 04:32:26PM +0100, Markus Blöchl wrote:
> On Wed, Feb 16, 2022 at 04:09:01PM +0100, Christoph Hellwig wrote:
> > For surprise removals that have already marked the disk dead, there is
> > no point in calling fsync_bdev as all I/O will fail anyway, so skip it.
> >
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> > block/genhd.c | 9 ++++++++-
> > 1 file changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/block/genhd.c b/block/genhd.c
> > index 626c8406f21a6..f68bdfe4f883b 100644
> > --- a/block/genhd.c
> > +++ b/block/genhd.c
> > @@ -584,7 +584,14 @@ void del_gendisk(struct gendisk *disk)
> > blk_drop_partitions(disk);
>
> blk_drop_partitions() also invokes fsync_bdev() via delete_partition().
> So why treat them differently?
Yeah. I guess we should just skip this patch for now.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying
2022-02-16 15:09 [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying Christoph Hellwig
@ 2022-02-16 17:31 ` kernel test robot
2022-02-16 15:32 ` [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying Hannes Reinecke
` (2 subsequent siblings)
3 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2022-02-16 17:31 UTC (permalink / raw)
To: Christoph Hellwig, axboe
Cc: kbuild-all, kbusch, markus.bloechl, linux-nvme, linux-block
Hi Christoph,
I love your patch! Perhaps something to improve:
[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on ceph-client/for-linus xen-tip/linux-next device-mapper-dm/for-next linus/master v5.17-rc4 next-20220216]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christoph-Hellwig/block-fix-surprise-removal-for-drivers-calling-blk_set_queue_dying/20220216-231057
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: nds32-allnoconfig (https://download.01.org/0day-ci/archive/20220217/202202170106.0gjJs3bN-lkp@intel.com/config)
compiler: nds32le-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/97d6b5e4c96fcbf1dc26120c918dd1baca987188
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christoph-Hellwig/block-fix-surprise-removal-for-drivers-calling-blk_set_queue_dying/20220216-231057
git checkout 97d6b5e4c96fcbf1dc26120c918dd1baca987188
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=nds32 SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
>> block/blk-core.c:296: warning: expecting prototype for blk_set_disk_dead(). Prototype was for blk_mark_disk_dead() instead
vim +296 block/blk-core.c
8e141f9eb803e20 Christoph Hellwig 2021-09-29 287
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 288 /**
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 289 * blk_set_disk_dead - mark a disk as dead
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 290 * @disk: disk to mark as dead
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 291 *
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 292 * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 293 * to this disk.
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 294 */
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 295 void blk_mark_disk_dead(struct gendisk *disk)
8e141f9eb803e20 Christoph Hellwig 2021-09-29 @296 {
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 297 set_bit(GD_DEAD, &disk->state);
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 298 blk_queue_start_drain(disk->queue);
8e141f9eb803e20 Christoph Hellwig 2021-09-29 299 }
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 300 EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
aed3ea94bdd2ac0 Jens Axboe 2014-12-22 301
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying
@ 2022-02-16 17:31 ` kernel test robot
0 siblings, 0 replies; 16+ messages in thread
From: kernel test robot @ 2022-02-16 17:31 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 3138 bytes --]
Hi Christoph,
I love your patch! Perhaps something to improve:
[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on ceph-client/for-linus xen-tip/linux-next device-mapper-dm/for-next linus/master v5.17-rc4 next-20220216]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christoph-Hellwig/block-fix-surprise-removal-for-drivers-calling-blk_set_queue_dying/20220216-231057
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: nds32-allnoconfig (https://download.01.org/0day-ci/archive/20220217/202202170106.0gjJs3bN-lkp(a)intel.com/config)
compiler: nds32le-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/97d6b5e4c96fcbf1dc26120c918dd1baca987188
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christoph-Hellwig/block-fix-surprise-removal-for-drivers-calling-blk_set_queue_dying/20220216-231057
git checkout 97d6b5e4c96fcbf1dc26120c918dd1baca987188
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=nds32 SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
>> block/blk-core.c:296: warning: expecting prototype for blk_set_disk_dead(). Prototype was for blk_mark_disk_dead() instead
vim +296 block/blk-core.c
8e141f9eb803e20 Christoph Hellwig 2021-09-29 287
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 288 /**
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 289 * blk_set_disk_dead - mark a disk as dead
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 290 * @disk: disk to mark as dead
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 291 *
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 292 * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 293 * to this disk.
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 294 */
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 295 void blk_mark_disk_dead(struct gendisk *disk)
8e141f9eb803e20 Christoph Hellwig 2021-09-29 @296 {
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 297 set_bit(GD_DEAD, &disk->state);
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 298 blk_queue_start_drain(disk->queue);
8e141f9eb803e20 Christoph Hellwig 2021-09-29 299 }
97d6b5e4c96fcbf Christoph Hellwig 2022-02-16 300 EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
aed3ea94bdd2ac0 Jens Axboe 2014-12-22 301
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying
2022-02-16 17:09 ` Christoph Hellwig
@ 2022-02-16 18:08 ` Markus Blöchl
2022-02-16 18:10 ` Christoph Hellwig
0 siblings, 1 reply; 16+ messages in thread
From: Markus Blöchl @ 2022-02-16 18:08 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: axboe, kbusch, linux-nvme, linux-block
On Wed, Feb 16, 2022 at 06:09:19PM +0100, Christoph Hellwig wrote:
> > I might have missed something here, but assuming I am a driver which
> > employs multiple different queues, some with a disk attached to them,
> > some without (Is that possible? The admin queue e.g.?)
> > and I just lost my connection and want to notify everything below me
> > that their connection is dead.
> > Would I really want to kill disk queues differently from non-disk
> > queues?
>
> Yes. Things like the admin queue in nvme are under full control of
> the driver. While the "disk" queues just get I/O from the file system
> and thus need to be cut off.
>
> > How is the admin queue killed? Is it even?
>
> It isn't. We just stop submitting to it.
Ah, It is in nvme_dev_remove_admin() so as long as we don't get stuck
ourselves before we get there, things should be fine since other tasks
waiting for blk_queue_enter() only wait until nvme_remove() is done.
>
> > > --- a/drivers/block/mtip32xx/mtip32xx.c
> > > +++ b/drivers/block/mtip32xx/mtip32xx.c
> > > @@ -4112,7 +4112,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
> > > "Completion workers still active!\n");
> > > }
> > >
> > > - blk_set_queue_dying(dd->queue);
> > > + blk_mark_disk_dead(dd->disk);
> >
> > This driver is weird, I did find are reliably hint that dd->disk always
> > exists here. At least mtip_block_remove() has an extra check for that.
>
> The driver is a bit of a mess indeed, but the disk and queue will be
> non-NULL if ->probe returns successfully so this is fine. It is more
> that some of the checks are not required.
>
> > It also only set QUEUE_FLAG_DEAD if it detects a surprise removal and
> > not QUEUE_FLAG_DYING.
>
> Yes, this driver will need further work.
Alright, I more or less ignore this one for now, then.
I noticed that set_capacity() is also called most of the time when
a disk is killed. Should we also move that into blk_mark_disk_dead()?
Any reasons not to?
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying
2022-02-16 18:08 ` Markus Blöchl
@ 2022-02-16 18:10 ` Christoph Hellwig
0 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2022-02-16 18:10 UTC (permalink / raw)
To: Markus Blöchl
Cc: Christoph Hellwig, axboe, kbusch, linux-nvme, linux-block
On Wed, Feb 16, 2022 at 07:08:50PM +0100, Markus Blöchl wrote:
> I noticed that set_capacity() is also called most of the time when
> a disk is killed. Should we also move that into blk_mark_disk_dead()?
> Any reasons not to?
I thought about that and I think it is a good idea. But for now I'd
keep the regression fix minimal and then do that as a next step.
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2022-02-16 18:10 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-16 15:09 [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying Christoph Hellwig
2022-02-16 15:09 ` [PATCH 2/2] block: skip the fsync_bdev call in del_gendisk for surprise removals Christoph Hellwig
2022-02-16 15:18 ` Hannes Reinecke
2022-02-16 15:20 ` Christoph Hellwig
2022-02-16 15:31 ` Hannes Reinecke
2022-02-16 15:32 ` Markus Blöchl
2022-02-16 17:09 ` Christoph Hellwig
2022-02-16 15:37 ` Keith Busch
2022-02-16 15:45 ` Christoph Hellwig
2022-02-16 15:32 ` [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying Hannes Reinecke
2022-02-16 15:49 ` Markus Blöchl
2022-02-16 17:09 ` Christoph Hellwig
2022-02-16 18:08 ` Markus Blöchl
2022-02-16 18:10 ` Christoph Hellwig
2022-02-16 17:31 ` kernel test robot
2022-02-16 17:31 ` kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.