All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
	"Martin K . Petersen" <martin.petersen@oracle.com>
Cc: linux-block@vger.kernel.org, linux-nvme@lists.infradead.org,
	linux-scsi@vger.kernel.org, Ming Lei <ming.lei@redhat.com>
Subject: [PATCH V2 13/13] block: don't drain file system I/O on del_gendisk
Date: Sat, 22 Jan 2022 19:10:54 +0800	[thread overview]
Message-ID: <20220122111054.1126146-14-ming.lei@redhat.com> (raw)
In-Reply-To: <20220122111054.1126146-1-ming.lei@redhat.com>

Commit 8e141f9eb803 ("block: drain file system I/O on del_gendisk") is
added for avoiding to reference of q->disk after disk is released, since
disk release can be done before running into blk_cleanup_queue(),
but it can't avoid the issue completely, and the approach is very
fragile:

1) queue freezing can't drain FS I/O for bio based driver, so there is
still inflight bios after disk is deleted, so blk-cgroup shutdown
can't be moved to del_gendisk(); meantime elevator shutdown can't be
moved to del_gendisk() because freezing queue isn't enough to drain
io issue activities. So both block-cgroup and elevator code may still
refer to q->disk somewhere. Same with q->disk referring for bio based
driver.

2) the added flag of GD_DEAD may not be observed reliably in
__bio_queue_enter() because queue freezing might not imply rcu grace
period.

Now we have moved blkcg, rqos and elevator shutdown code into disk_release()
where it is guaranteed that no any FS IO is inflight. Meantime we only
account passthrough IO issued from userspace, where q->disk is always
valid. Then q->disk can be guaranteed to be referred before releasing
disk.

So not necessary to drain FS I/O in del_gendisk() any more.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-core.c      | 21 +++++++++------------
 block/blk.h           |  1 -
 block/genhd.c         | 21 ---------------------
 include/linux/genhd.h |  1 -
 4 files changed, 9 insertions(+), 35 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index bcb4d982cd80..2216360165c9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -270,8 +270,10 @@ void blk_put_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_put_queue);
 
-void blk_queue_start_drain(struct request_queue *q)
+void blk_set_queue_dying(struct request_queue *q)
 {
+	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+
 	/*
 	 * When queue DYING flag is set, we need to block new req
 	 * entering queue, so we call blk_freeze_queue_start() to
@@ -283,12 +285,6 @@ void blk_queue_start_drain(struct request_queue *q)
 	/* Make blk_queue_enter() reexamine the DYING flag. */
 	wake_up_all(&q->mq_freeze_wq);
 }
-
-void blk_set_queue_dying(struct request_queue *q)
-{
-	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
-	blk_queue_start_drain(q);
-}
 EXPORT_SYMBOL_GPL(blk_set_queue_dying);
 
 /**
@@ -322,6 +318,9 @@ void blk_cleanup_queue(struct request_queue *q)
 
 	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
 
+	/* for synchronous bio-based driver finish in-flight integrity i/o */
+	blk_flush_integrity();
+
 	blk_sync_queue(q);
 	if (queue_is_mq(q)) {
 		blk_mq_cancel_work_sync(q);
@@ -381,10 +380,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 int __bio_queue_enter(struct request_queue *q, struct bio *bio)
 {
 	while (!blk_try_enter_queue(q, false)) {
-		struct gendisk *disk = bio->bi_bdev->bd_disk;
-
 		if (bio->bi_opf & REQ_NOWAIT) {
-			if (test_bit(GD_DEAD, &disk->state))
+			if (blk_queue_dying(q))
 				goto dead;
 			bio_wouldblock_error(bio);
 			return -EBUSY;
@@ -401,8 +398,8 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio)
 		wait_event(q->mq_freeze_wq,
 			   (!q->mq_freeze_depth &&
 			    blk_pm_resume_queue(false, q)) ||
-			   test_bit(GD_DEAD, &disk->state));
-		if (test_bit(GD_DEAD, &disk->state))
+			   blk_queue_dying(q));
+		if (blk_queue_dying(q))
 			goto dead;
 	}
 
diff --git a/block/blk.h b/block/blk.h
index a038e25d8637..6adedf97cba2 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -43,7 +43,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
 void blk_free_flush_queue(struct blk_flush_queue *q);
 
 void blk_freeze_queue(struct request_queue *q);
-void blk_queue_start_drain(struct request_queue *q);
 int __bio_queue_enter(struct request_queue *q, struct bio *bio);
 bool submit_bio_checks(struct bio *bio);
 
diff --git a/block/genhd.c b/block/genhd.c
index 2f0e92cdcf6d..90742679f949 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -569,8 +569,6 @@ EXPORT_SYMBOL(device_add_disk);
  */
 void del_gendisk(struct gendisk *disk)
 {
-	struct request_queue *q = disk->queue;
-
 	might_sleep();
 
 	if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
@@ -587,17 +585,8 @@ void del_gendisk(struct gendisk *disk)
 	fsync_bdev(disk->part0);
 	__invalidate_device(disk->part0, true);
 
-	/*
-	 * Fail any new I/O.
-	 */
-	set_bit(GD_DEAD, &disk->state);
 	set_capacity(disk, 0);
 
-	/*
-	 * Prevent new I/O from crossing bio_queue_enter().
-	 */
-	blk_queue_start_drain(q);
-
 	if (!(disk->flags & GENHD_FL_HIDDEN)) {
 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 
@@ -619,16 +608,6 @@ void del_gendisk(struct gendisk *disk)
 		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
 	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
 	device_del(disk_to_dev(disk));
-
-	blk_mq_freeze_queue_wait(q);
-
-	blk_sync_queue(q);
-	blk_flush_integrity();
-	/*
-	 * Allow using passthrough request again after the queue is torn down.
-	 */
-	__blk_mq_unfreeze_queue(q, true);
-
 }
 EXPORT_SYMBOL(del_gendisk);
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6906a45bc761..3e9f234495e4 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -108,7 +108,6 @@ struct gendisk {
 	unsigned long state;
 #define GD_NEED_PART_SCAN		0
 #define GD_READ_ONLY			1
-#define GD_DEAD				2
 #define GD_NATIVE_CAPACITY		3
 
 	struct mutex open_mutex;	/* open/close mutex */
-- 
2.31.1


  parent reply	other threads:[~2022-01-22 11:12 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-22 11:10 [PATCH V2 00/13] block: don't drain file system I/O on del_gendisk Ming Lei
2022-01-22 11:10 ` [PATCH V2 01/13] block: declare blkcg_[init|exit]_queue in private header Ming Lei
2022-01-24 12:59   ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 02/13] block: move initialization of q->blkg_list into blkcg_init_queue Ming Lei
2022-01-24 13:00   ` Christoph Hellwig
2022-01-24 18:32   ` Bart Van Assche
2022-01-22 11:10 ` [PATCH V2 03/13] block: move blkcg initialization/destroy into disk allocation/release handler Ming Lei
2022-01-24 13:02   ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 04/13] block/wbt: fix negative inflight counter when remove scsi device Ming Lei
2022-02-17  7:45   ` Christoph Hellwig
2022-02-17 14:53     ` Jens Axboe
2022-01-22 11:10 ` [PATCH V2 05/13] block: only account passthrough IO from userspace Ming Lei
2022-01-24 13:05   ` Christoph Hellwig
2022-01-24 23:09     ` Ming Lei
2022-01-25  6:16       ` Christoph Hellwig
2022-01-25  7:19         ` Christoph Hellwig
2022-01-25  8:35           ` Ming Lei
2022-01-25  9:09           ` Ming Lei
2022-01-26  5:50             ` Christoph Hellwig
2022-01-26  7:21               ` Ming Lei
2022-01-26  8:10                 ` Christoph Hellwig
2022-01-26  8:33                   ` Ming Lei
2022-01-26  8:49                     ` Christoph Hellwig
2022-01-26  9:59                       ` Ming Lei
2022-01-26 16:37                         ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 06/13] block: don't remove hctx debugfs dir from blk_mq_exit_queue Ming Lei
2022-01-24 13:06   ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 07/13] block: move q_usage_counter release into blk_queue_release Ming Lei
2022-01-24 13:09   ` Christoph Hellwig
2022-01-24 19:06   ` Bart Van Assche
2022-01-22 11:10 ` [PATCH V2 08/13] block: export __blk_mq_unfreeze_queue Ming Lei
2022-01-24 13:11   ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 09/13] scsi: force unfreezing queue into atomic mode Ming Lei
2022-01-24 13:15   ` Christoph Hellwig
2022-01-24 23:21     ` Ming Lei
2022-01-25  7:27       ` Christoph Hellwig
2022-01-25  8:54         ` Ming Lei
2022-01-26  8:15           ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 10/13] block: add helper of disk_release_queue for release queue data for disk Ming Lei
2022-01-24 13:16   ` Christoph Hellwig
2022-01-24 23:27     ` Ming Lei
2022-01-25  6:17       ` Christoph Hellwig
2022-01-22 11:10 ` [PATCH V2 11/13] block: move blk_exit_queue into disk_release Ming Lei
2022-01-24 13:22   ` Christoph Hellwig
2022-01-24 23:38     ` Ming Lei
2022-01-22 11:10 ` [PATCH V2 12/13] block: move rq_qos_exit() into disk_release() Ming Lei
2022-01-24 13:28   ` Christoph Hellwig
2022-01-22 11:10 ` Ming Lei [this message]
2022-01-24 13:39   ` [PATCH V2 13/13] block: don't drain file system I/O on del_gendisk Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220122111054.1126146-14-ming.lei@redhat.com \
    --to=ming.lei@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.