All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>, Tejun Heo <tj@kernel.org>,
	linux-block@vger.kernel.org,
	"Darrick J . Wong" <djwong@kernel.org>
Subject: Re: [PATCH 4/5] block: drain file system I/O on del_gendisk
Date: Wed, 29 Sep 2021 16:17:01 +0800	[thread overview]
Message-ID: <YVQg/a6GnELfPV1S@T590> (raw)
In-Reply-To: <20210929071241.934472-5-hch@lst.de>

On Wed, Sep 29, 2021 at 09:12:40AM +0200, Christoph Hellwig wrote:
> Instead of delaying draining of file system I/O related items like the
> blk-qos queues, the integrity read workqueue and timeouts only when the
> request_queue is removed, do that when del_gendisk is called.  This is
> important for SCSI where the upper level drivers that control the gendisk
> are separate entities, and the disk can be freed much earlier than the
> request_queue, or can even be unbound without tearing down the queue.
> 
> Fixes: edb0872f44ec ("block: move the bdi from the request_queue to the gendisk")
> Reported-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  block/blk-core.c      | 27 ++++++++++++---------------
>  block/blk.h           |  1 +
>  block/genhd.c         | 21 +++++++++++++++++++++
>  include/linux/genhd.h |  1 +
>  4 files changed, 35 insertions(+), 15 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 43f5da707d8e3..4d8f5fe915887 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -49,7 +49,6 @@
>  #include "blk-mq.h"
>  #include "blk-mq-sched.h"
>  #include "blk-pm.h"
> -#include "blk-rq-qos.h"
>  
>  struct dentry *blk_debugfs_root;
>  
> @@ -337,23 +336,25 @@ void blk_put_queue(struct request_queue *q)
>  }
>  EXPORT_SYMBOL(blk_put_queue);
>  
> -void blk_set_queue_dying(struct request_queue *q)
> +void blk_queue_start_drain(struct request_queue *q)
>  {
> -	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
> -
>  	/*
>  	 * When queue DYING flag is set, we need to block new req
>  	 * entering queue, so we call blk_freeze_queue_start() to
>  	 * prevent I/O from crossing blk_queue_enter().
>  	 */
>  	blk_freeze_queue_start(q);
> -
>  	if (queue_is_mq(q))
>  		blk_mq_wake_waiters(q);
> -
>  	/* Make blk_queue_enter() reexamine the DYING flag. */
>  	wake_up_all(&q->mq_freeze_wq);
>  }
> +
> +void blk_set_queue_dying(struct request_queue *q)
> +{
> +	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
> +	blk_queue_start_drain(q);
> +}
>  EXPORT_SYMBOL_GPL(blk_set_queue_dying);
>  
>  /**
> @@ -385,13 +386,8 @@ void blk_cleanup_queue(struct request_queue *q)
>  	 */
>  	blk_freeze_queue(q);
>  
> -	rq_qos_exit(q);
> -
>  	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
>  
> -	/* for synchronous bio-based driver finish in-flight integrity i/o */
> -	blk_flush_integrity();
> -
>  	blk_sync_queue(q);
>  	if (queue_is_mq(q))
>  		blk_mq_exit_queue(q);
> @@ -474,11 +470,12 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
>  
>  static inline int bio_queue_enter(struct bio *bio)
>  {
> -	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
> +	struct gendisk *disk = bio->bi_bdev->bd_disk;
> +	struct request_queue *q = disk->queue;
>  
>  	while (!blk_try_enter_queue(q, false)) {
>  		if (bio->bi_opf & REQ_NOWAIT) {
> -			if (blk_queue_dying(q))
> +			if (test_bit(GD_DEAD, &disk->state))
>  				goto dead;
>  			bio_wouldblock_error(bio);
>  			return -EBUSY;
> @@ -495,8 +492,8 @@ static inline int bio_queue_enter(struct bio *bio)
>  		wait_event(q->mq_freeze_wq,
>  			   (!q->mq_freeze_depth &&
>  			    blk_pm_resume_queue(false, q)) ||
> -			   blk_queue_dying(q));
> -		if (blk_queue_dying(q))
> +			   test_bit(GD_DEAD, &disk->state));
> +		if (test_bit(GD_DEAD, &disk->state))
>  			goto dead;
>  	}
>  
> diff --git a/block/blk.h b/block/blk.h
> index 7d2a0ba7ed21d..e2ed2257709ae 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -51,6 +51,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
>  void blk_free_flush_queue(struct blk_flush_queue *q);
>  
>  void blk_freeze_queue(struct request_queue *q);
> +void blk_queue_start_drain(struct request_queue *q);
>  
>  #define BIO_INLINE_VECS 4
>  struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
> diff --git a/block/genhd.c b/block/genhd.c
> index 7b6e5e1cf9564..b3c33495d7208 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -26,6 +26,7 @@
>  #include <linux/badblocks.h>
>  
>  #include "blk.h"
> +#include "blk-rq-qos.h"
>  
>  static struct kobject *block_depr;
>  
> @@ -559,6 +560,8 @@ EXPORT_SYMBOL(device_add_disk);
>   */
>  void del_gendisk(struct gendisk *disk)
>  {
> +	struct request_queue *q = disk->queue;
> +
>  	might_sleep();
>  
>  	if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
> @@ -575,8 +578,26 @@ void del_gendisk(struct gendisk *disk)
>  	fsync_bdev(disk->part0);
>  	__invalidate_device(disk->part0, true);
>  
> +	/*
> +	 * Fail any new I/O.
> +	 */
> +	set_bit(GD_DEAD, &disk->state);
>  	set_capacity(disk, 0);
>  
> +	/*
> +	 * Prevent new I/O from crossing bio_queue_enter().
> +	 */
> +	blk_queue_start_drain(q);
> +	blk_mq_freeze_queue_wait(q);

Draining request won't fix the problem completely:

1) blk-mq dispatch code may still be in-progress after q_usage_counter
becomes zero, see the story in 662156641bc4 ("block: don't drain in-progress dispatch in
blk_cleanup_queue()")

2) elevator code / blkcg code may still be called after blk_cleanup_queue(), such
as kyber, trace_kyber_latency()(q->disk is referred) is called in kyber's timer
handler, and the timer is deleted via del_timer_sync() via kyber_exit_sched()
from blk_release_queue().

> +
> +	rq_qos_exit(q);
> +	blk_sync_queue(q);
> +	blk_flush_integrity();
> +	/*
> +	 * Allow using passthrough request again after the queue is torn down.
> +	 */
> +	blk_mq_unfreeze_queue(q);

Again, one FS bio is still possible to enter queue now: submit_bio_checks()
is done before set_capacity(0), and submitted after blk_mq_unfreeze_queue()
returns.


Thanks,
Ming


  reply	other threads:[~2021-09-29  8:17 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-29  7:12 tear down file system I/O in del_gendisk v3 Christoph Hellwig
2021-09-29  7:12 ` [PATCH 1/5] block: call submit_bio_checks under q_usage_counter Christoph Hellwig
2021-10-17  1:14   ` Jens Axboe
2021-09-29  7:12 ` [PATCH 2/5] block: factor out a blk_try_enter_queue helper Christoph Hellwig
2021-09-29  7:12 ` [PATCH 3/5] block: split bio_queue_enter from blk_queue_enter Christoph Hellwig
2021-09-29  7:12 ` [PATCH 4/5] block: drain file system I/O on del_gendisk Christoph Hellwig
2021-09-29  8:17   ` Ming Lei [this message]
2021-10-01  4:13     ` Christoph Hellwig
2021-10-05  2:15       ` Ming Lei
2021-09-29  7:12 ` [PATCH 5/5] block: keep q_usage_counter in atomic mode after del_gendisk Christoph Hellwig
2021-10-12  9:33 ` [PATCH 6/5] kyber: avoid q->disk dereferences in trace points Christoph Hellwig
2021-10-14 12:57   ` Ming Lei
2021-10-14  9:23 ` tear down file system I/O in del_gendisk v3 Yi Zhang
2021-10-17  1:14 ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YVQg/a6GnELfPV1S@T590 \
    --to=ming.lei@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.