linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@suse.de>
To: Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>
Cc: Tejun Heo <tj@kernel.org>,
	linux-block@vger.kernel.org, Ming Lei <ming.lei@redhat.com>
Subject: Re: [PATCH 3/4] block: drain file system I/O on del_gendisk
Date: Thu, 23 Sep 2021 08:21:34 +0200	[thread overview]
Message-ID: <ce23ed6e-fc5c-b209-0b6c-70163572fb67@suse.de> (raw)
In-Reply-To: <20210922172222.2453343-4-hch@lst.de>

On 9/22/21 7:22 PM, Christoph Hellwig wrote:
> Instead of delaying draining of file system I/O related items like the
> blk-qos queues, the integrity read workqueue and timeouts only when the
> request_queue is removed, do that when del_gendisk is called.  This is
> important for SCSI where the upper level drivers that control the gendisk
> are separate entities, and the disk can be freed much earlier than the
> request_queue, or can even be unbound without tearing down the queue.
> 
> Fixes: edb0872f44ec ("block: move the bdi from the request_queue to the gendisk")
> Reported-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/blk-core.c      | 27 ++++++++++++---------------
>   block/blk.h           |  1 +
>   block/genhd.c         | 21 +++++++++++++++++++++
>   include/linux/genhd.h |  1 +
>   4 files changed, 35 insertions(+), 15 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index e951378855a02..d150d829a53c4 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -49,7 +49,6 @@
>   #include "blk-mq.h"
>   #include "blk-mq-sched.h"
>   #include "blk-pm.h"
> -#include "blk-rq-qos.h"
>   
>   struct dentry *blk_debugfs_root;
>   
> @@ -337,23 +336,25 @@ void blk_put_queue(struct request_queue *q)
>   }
>   EXPORT_SYMBOL(blk_put_queue);
>   
> -void blk_set_queue_dying(struct request_queue *q)
> +void blk_queue_start_drain(struct request_queue *q)
>   {
> -	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
> -
>   	/*
>   	 * When queue DYING flag is set, we need to block new req
>   	 * entering queue, so we call blk_freeze_queue_start() to
>   	 * prevent I/O from crossing blk_queue_enter().
>   	 */
>   	blk_freeze_queue_start(q);
> -
>   	if (queue_is_mq(q))
>   		blk_mq_wake_waiters(q);
> -
>   	/* Make blk_queue_enter() reexamine the DYING flag. */
>   	wake_up_all(&q->mq_freeze_wq);
>   }
> +
> +void blk_set_queue_dying(struct request_queue *q)
> +{
> +	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
> +	blk_queue_start_drain(q);
> +}
>   EXPORT_SYMBOL_GPL(blk_set_queue_dying);
>   
>   /**
> @@ -385,13 +386,8 @@ void blk_cleanup_queue(struct request_queue *q)
>   	 */
>   	blk_freeze_queue(q);
>   
> -	rq_qos_exit(q);
> -
>   	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
>   
> -	/* for synchronous bio-based driver finish in-flight integrity i/o */
> -	blk_flush_integrity();
> -
>   	blk_sync_queue(q);
>   	if (queue_is_mq(q))
>   		blk_mq_exit_queue(q);
> @@ -474,11 +470,12 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
>   
>   static inline int bio_queue_enter(struct bio *bio)
>   {
> -	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
> +	struct gendisk *disk = bio->bi_bdev->bd_disk;
> +	struct request_queue *q = disk->queue;
>   
>   	while (!blk_try_enter_queue(q, false)) {
>   		if (bio->bi_opf & REQ_NOWAIT) {
> -			if (blk_queue_dying(q))
> +			if (test_bit(GD_DEAD, &disk->state))
>   				goto dead;
>   			bio_wouldblock_error(bio);
>   			return -EBUSY;
> @@ -495,8 +492,8 @@ static inline int bio_queue_enter(struct bio *bio)
>   		wait_event(q->mq_freeze_wq,
>   			   (!q->mq_freeze_depth &&
>   			    blk_pm_resume_queue(false, q)) ||
> -			   blk_queue_dying(q));
> -		if (blk_queue_dying(q))
> +			   test_bit(GD_DEAD, &disk->state));
> +		if (test_bit(GD_DEAD, &disk->state))
>   			goto dead;
>   	}
>   
> diff --git a/block/blk.h b/block/blk.h
> index 7d2a0ba7ed21d..e2ed2257709ae 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -51,6 +51,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
>   void blk_free_flush_queue(struct blk_flush_queue *q);
>   
>   void blk_freeze_queue(struct request_queue *q);
> +void blk_queue_start_drain(struct request_queue *q);
>   
>   #define BIO_INLINE_VECS 4
>   struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
> diff --git a/block/genhd.c b/block/genhd.c
> index 7b6e5e1cf9564..b3c33495d7208 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -26,6 +26,7 @@
>   #include <linux/badblocks.h>
>   
>   #include "blk.h"
> +#include "blk-rq-qos.h"
>   
>   static struct kobject *block_depr;
>   
> @@ -559,6 +560,8 @@ EXPORT_SYMBOL(device_add_disk);
>    */
>   void del_gendisk(struct gendisk *disk)
>   {
> +	struct request_queue *q = disk->queue;
> +
>   	might_sleep();
>   
>   	if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
> @@ -575,8 +578,26 @@ void del_gendisk(struct gendisk *disk)
>   	fsync_bdev(disk->part0);
>   	__invalidate_device(disk->part0, true);
>   
> +	/*
> +	 * Fail any new I/O.
> +	 */
> +	set_bit(GD_DEAD, &disk->state);
>   	set_capacity(disk, 0);
>   

I always tend to become a bit nervous about simple 'set_bit' statements.
On the one side, it _might_ be already set (unlikely, I know).
But more importantly, 'set_bit' implies no memory barrier, so we have an 
inherent race condition.
So don't you need a barrier here to synchronize the 'state' variable 
update to all CPUs?
Or can't we just make it a

if (test_and_set_bit(GD_DEAD, &disk->state))
	WARN()

Hmm?

> +	/*
> +	 * Prevent new I/O from crossing bio_queue_enter().
> +	 */
> +	blk_queue_start_drain(q);
> +	blk_mq_freeze_queue_wait(q);
> +
> +	rq_qos_exit(q);
> +	blk_sync_queue(q);
> +	blk_flush_integrity();
> +	/*
> +	 * Allow using passthrough request again after the queue is torn down.
> +	 */
> +	blk_mq_unfreeze_queue(q);
> +
>   	if (!(disk->flags & GENHD_FL_HIDDEN)) {
>   		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
>   
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index c68d83c87f83f..0f5315c2b5a34 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -149,6 +149,7 @@ struct gendisk {
>   	unsigned long state;
>   #define GD_NEED_PART_SCAN		0
>   #define GD_READ_ONLY			1
> +#define GD_DEAD				2
>   
>   	struct mutex open_mutex;	/* open/close mutex */
>   	unsigned open_partitions;	/* number of open partitions */
> 

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

  parent reply	other threads:[~2021-09-23  6:21 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-22 17:22 tear down file system I/O in del_gendisk v2 Christoph Hellwig
2021-09-22 17:22 ` [PATCH 1/4] block: factor out a blk_try_enter_queue helper Christoph Hellwig
2021-09-22 17:22 ` [PATCH 2/4] block: split bio_queue_enter from blk_queue_enter Christoph Hellwig
2021-09-22 17:22 ` [PATCH 3/4] block: drain file system I/O on del_gendisk Christoph Hellwig
2021-09-23  1:34   ` Ming Lei
2021-09-23  5:27     ` Christoph Hellwig
2021-09-23  6:39       ` Ming Lei
2021-09-27 12:04         ` Christoph Hellwig
2021-09-27 14:32           ` Ming Lei
2021-09-23  6:21   ` Hannes Reinecke [this message]
2021-09-22 17:22 ` [PATCH 4/4] block: keep q_usage_counter in atomic mode after del_gendisk Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2021-09-20 11:24 tear down file system I/O in del_gendisk Christoph Hellwig
2021-09-20 11:24 ` [PATCH 3/4] block: drain file system I/O on del_gendisk Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ce23ed6e-fc5c-b209-0b6c-70163572fb67@suse.de \
    --to=hare@suse.de \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=tj@kernel.org \
    --subject='Re: [PATCH 3/4] block: drain file system I/O on del_gendisk' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).