All of lore.kernel.org
 help / color / mirror / Atom feed
* fix debugfs name reuse for remove disks
@ 2022-05-30 13:45 Christoph Hellwig
  2022-05-30 13:45 ` [PATCH 1/3] block: serialize all debugfs operations using q->debugfs_mutex Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Christoph Hellwig @ 2022-05-30 13:45 UTC (permalink / raw)
  To: axboe; +Cc: shinichiro.kawasaki, dan.j.williams, yukuai3, ming.lei, linux-block

Hi all,

this series aims to primarily fix the debugfs name reuse when a disk
is removed while there are outstanding references to it and then a
new one with the same name is created.

Diffstat:
 block/blk-mq-debugfs.c  |   23 ++++++++++++-----------
 block/blk-mq-debugfs.h  |   10 ----------
 block/blk-mq-sched.c    |   11 +++++++++++
 block/blk-rq-qos.c      |    2 --
 block/blk-rq-qos.h      |    7 ++++++-
 block/blk-sysfs.c       |   30 ++++++++++++++----------------
 block/genhd.c           |    3 +--
 include/linux/blkdev.h  |    3 ---
 kernel/trace/blktrace.c |    3 ---
 9 files changed, 44 insertions(+), 48 deletions(-)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] block: serialize all debugfs operations using q->debugfs_mutex
  2022-05-30 13:45 fix debugfs name reuse for remove disks Christoph Hellwig
@ 2022-05-30 13:45 ` Christoph Hellwig
  2022-05-31  6:49   ` Hannes Reinecke
  2022-05-30 13:45 ` [PATCH 2/3] block: remove per-disk debugfs files in blk_unregister_queue Christoph Hellwig
  2022-05-30 13:45 ` [PATCH 3/3] block: freeze the queue earlier in del_gendisk Christoph Hellwig
  2 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2022-05-30 13:45 UTC (permalink / raw)
  To: axboe; +Cc: shinichiro.kawasaki, dan.j.williams, yukuai3, ming.lei, linux-block

Various places like I/O schedulers or the QOS infrastructure try to
register debugfs files on demans, which can race with creating and
removing the main queue debugfs directory.  Use the existing
debugfs_mutex to serialize all debugfs operations that rely on
q->debugfs_dir or the directories hanging off it.

To make the teardown code a little simpler declare all debugfs dentry
pointers and not just the main one uncoditionally in blkdev.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq-debugfs.c  | 19 ++++++++++++++-----
 block/blk-mq-debugfs.h  |  5 -----
 block/blk-mq-sched.c    | 11 +++++++++++
 block/blk-rq-qos.c      |  2 --
 block/blk-rq-qos.h      |  7 ++++++-
 block/blk-sysfs.c       | 20 +++++++++-----------
 block/genhd.c           |  4 ++++
 include/linux/blkdev.h  |  3 ---
 kernel/trace/blktrace.c |  3 ---
 9 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 7e4136a60e1cc..bee80fc562052 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -711,11 +711,6 @@ void blk_mq_debugfs_register(struct request_queue *q)
 	}
 }
 
-void blk_mq_debugfs_unregister(struct request_queue *q)
-{
-	q->sched_debugfs_dir = NULL;
-}
-
 static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_ctx *ctx)
 {
@@ -773,6 +768,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
 {
 	struct elevator_type *e = q->elevator->type;
 
+	lockdep_assert_held(&q->debugfs_mutex);
+
 	/*
 	 * If the parent directory has not been created yet, return, we will be
 	 * called again later on and the directory/files will be created then.
@@ -790,6 +787,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
 
 void blk_mq_debugfs_unregister_sched(struct request_queue *q)
 {
+	lockdep_assert_held(&q->debugfs_mutex);
+
 	debugfs_remove_recursive(q->sched_debugfs_dir);
 	q->sched_debugfs_dir = NULL;
 }
@@ -811,6 +810,8 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id)
 
 void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
 {
+	lockdep_assert_held(&rqos->q->debugfs_mutex);
+
 	debugfs_remove_recursive(rqos->debugfs_dir);
 	rqos->debugfs_dir = NULL;
 }
@@ -820,6 +821,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 	struct request_queue *q = rqos->q;
 	const char *dir_name = rq_qos_id_to_name(rqos->id);
 
+	lockdep_assert_held(&q->debugfs_mutex);
+
 	if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs)
 		return;
 
@@ -835,6 +838,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 
 void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
 {
+	lockdep_assert_held(&q->debugfs_mutex);
+
 	debugfs_remove_recursive(q->rqos_debugfs_dir);
 	q->rqos_debugfs_dir = NULL;
 }
@@ -844,6 +849,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
 {
 	struct elevator_type *e = q->elevator->type;
 
+	lockdep_assert_held(&q->debugfs_mutex);
+
 	/*
 	 * If the parent debugfs directory has not been created yet, return;
 	 * We will be called again later on with appropriate parent debugfs
@@ -863,6 +870,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
 
 void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
 {
+	lockdep_assert_held(&hctx->queue->debugfs_mutex);
+
 	debugfs_remove_recursive(hctx->sched_debugfs_dir);
 	hctx->sched_debugfs_dir = NULL;
 }
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index 69918f4170d69..771d458328788 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -21,7 +21,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq);
 int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
 
 void blk_mq_debugfs_register(struct request_queue *q);
-void blk_mq_debugfs_unregister(struct request_queue *q);
 void blk_mq_debugfs_register_hctx(struct request_queue *q,
 				  struct blk_mq_hw_ctx *hctx);
 void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx);
@@ -42,10 +41,6 @@ static inline void blk_mq_debugfs_register(struct request_queue *q)
 {
 }
 
-static inline void blk_mq_debugfs_unregister(struct request_queue *q)
-{
-}
-
 static inline void blk_mq_debugfs_register_hctx(struct request_queue *q,
 						struct blk_mq_hw_ctx *hctx)
 {
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 9e56a69422b65..e84bec39fd3ad 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -593,7 +593,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	if (ret)
 		goto err_free_map_and_rqs;
 
+	mutex_lock(&q->debugfs_mutex);
 	blk_mq_debugfs_register_sched(q);
+	mutex_unlock(&q->debugfs_mutex);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (e->ops.init_hctx) {
@@ -606,7 +608,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 				return ret;
 			}
 		}
+		mutex_lock(&q->debugfs_mutex);
 		blk_mq_debugfs_register_sched_hctx(q, hctx);
+		mutex_unlock(&q->debugfs_mutex);
 	}
 
 	return 0;
@@ -647,14 +651,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 	unsigned int flags = 0;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
+		mutex_lock(&q->debugfs_mutex);
 		blk_mq_debugfs_unregister_sched_hctx(hctx);
+		mutex_unlock(&q->debugfs_mutex);
+
 		if (e->type->ops.exit_hctx && hctx->sched_data) {
 			e->type->ops.exit_hctx(hctx, i);
 			hctx->sched_data = NULL;
 		}
 		flags = hctx->flags;
 	}
+
+	mutex_lock(&q->debugfs_mutex);
 	blk_mq_debugfs_unregister_sched(q);
+	mutex_unlock(&q->debugfs_mutex);
+
 	if (e->type->ops.exit_sched)
 		e->type->ops.exit_sched(e);
 	blk_mq_sched_tags_teardown(q, flags);
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index e83af7bc75919..d3a75693adbf4 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -294,8 +294,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 
 void rq_qos_exit(struct request_queue *q)
 {
-	blk_mq_debugfs_unregister_queue_rqos(q);
-
 	while (q->rq_qos) {
 		struct rq_qos *rqos = q->rq_qos;
 		q->rq_qos = rqos->next;
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 68267007da1c6..0e46052b018a4 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -104,8 +104,11 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 
 	blk_mq_unfreeze_queue(q);
 
-	if (rqos->ops->debugfs_attrs)
+	if (rqos->ops->debugfs_attrs) {
+		mutex_lock(&q->debugfs_mutex);
 		blk_mq_debugfs_register_rqos(rqos);
+		mutex_unlock(&q->debugfs_mutex);
+	}
 }
 
 static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
@@ -129,7 +132,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
 
 	blk_mq_unfreeze_queue(q);
 
+	mutex_lock(&q->debugfs_mutex);
 	blk_mq_debugfs_unregister_rqos(rqos);
+	mutex_unlock(&q->debugfs_mutex);
 }
 
 typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 88bd41d4cb593..6e4801b217a79 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -779,14 +779,13 @@ static void blk_release_queue(struct kobject *kobj)
 	if (queue_is_mq(q))
 		blk_mq_release(q);
 
-	blk_trace_shutdown(q);
 	mutex_lock(&q->debugfs_mutex);
+	blk_trace_shutdown(q);
 	debugfs_remove_recursive(q->debugfs_dir);
+	q->debugfs_dir = NULL;
+	q->sched_debugfs_dir = NULL;
 	mutex_unlock(&q->debugfs_mutex);
 
-	if (queue_is_mq(q))
-		blk_mq_debugfs_unregister(q);
-
 	bioset_exit(&q->bio_split);
 
 	if (blk_queue_has_srcu(q))
@@ -836,17 +835,16 @@ int blk_register_queue(struct gendisk *disk)
 		goto unlock;
 	}
 
+	if (queue_is_mq(q))
+		__blk_mq_register_dev(dev, q);
+	mutex_lock(&q->sysfs_lock);
+
 	mutex_lock(&q->debugfs_mutex);
 	q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
 					    blk_debugfs_root);
-	mutex_unlock(&q->debugfs_mutex);
-
-	if (queue_is_mq(q)) {
-		__blk_mq_register_dev(dev, q);
+	if (queue_is_mq(q))
 		blk_mq_debugfs_register(q);
-	}
-
-	mutex_lock(&q->sysfs_lock);
+	mutex_unlock(&q->debugfs_mutex);
 
 	ret = disk_register_independent_access_ranges(disk, NULL);
 	if (ret)
diff --git a/block/genhd.c b/block/genhd.c
index 36532b9318419..9f4eb7516df7e 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1122,6 +1122,10 @@ static void disk_release_mq(struct request_queue *q)
 {
 	blk_mq_cancel_work_sync(q);
 
+	mutex_lock(&q->debugfs_mutex);
+	blk_mq_debugfs_unregister_queue_rqos(q);
+	mutex_unlock(&q->debugfs_mutex);
+
 	/*
 	 * There can't be any non non-passthrough bios in flight here, but
 	 * requests stay around longer, including passthrough ones so we
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1b24c1fb3bb1e..095a0d68c9b41 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -525,11 +525,8 @@ struct request_queue {
 	struct bio_set		bio_split;
 
 	struct dentry		*debugfs_dir;
-
-#ifdef CONFIG_BLK_DEBUG_FS
 	struct dentry		*sched_debugfs_dir;
 	struct dentry		*rqos_debugfs_dir;
-#endif
 
 	bool			mq_sysfs_init_done;
 
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 10a32b0f2deb6..fe04c6f96ca5d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -770,14 +770,11 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
  **/
 void blk_trace_shutdown(struct request_queue *q)
 {
-	mutex_lock(&q->debugfs_mutex);
 	if (rcu_dereference_protected(q->blk_trace,
 				      lockdep_is_held(&q->debugfs_mutex))) {
 		__blk_trace_startstop(q, 0);
 		__blk_trace_remove(q);
 	}
-
-	mutex_unlock(&q->debugfs_mutex);
 }
 
 #ifdef CONFIG_BLK_CGROUP
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] block: remove per-disk debugfs files in blk_unregister_queue
  2022-05-30 13:45 fix debugfs name reuse for remove disks Christoph Hellwig
  2022-05-30 13:45 ` [PATCH 1/3] block: serialize all debugfs operations using q->debugfs_mutex Christoph Hellwig
@ 2022-05-30 13:45 ` Christoph Hellwig
  2022-05-31  6:50   ` Hannes Reinecke
  2022-05-30 13:45 ` [PATCH 3/3] block: freeze the queue earlier in del_gendisk Christoph Hellwig
  2 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2022-05-30 13:45 UTC (permalink / raw)
  To: axboe; +Cc: shinichiro.kawasaki, dan.j.williams, yukuai3, ming.lei, linux-block

The block debugfs files are created in blk_register_queue, which is
called by add_disk and use a naming scheme based on the disk_name.
After del_gendisk returns that name can be reused and thus we must not
leave these debugfs files around, otherwise the kernel is unhappy
and spews messages like:

    Directory XXXXX with parent 'block' already present!

and the newly created devices will not have working debugfs files.

Move the unregistration to blk_unregister_queue instead (which matches
the sysfs unregistration) to make sure the debugfs life time rules match
those of the disk name.

As part of the move also make sure the whole debugfs unregistration is
inside a single debugfs_mutex critical section.

Note that this breaks blktests block/002, which checks that the debugfs
directory has not been removed while blktests is running, but that
particular check should simply be removed from the test case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq-debugfs.c |  8 --------
 block/blk-mq-debugfs.h |  5 -----
 block/blk-sysfs.c      | 16 ++++++++--------
 block/genhd.c          |  4 ----
 4 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index bee80fc562052..6bda8dd7a271f 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -836,14 +836,6 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 	debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
 }
 
-void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
-{
-	lockdep_assert_held(&q->debugfs_mutex);
-
-	debugfs_remove_recursive(q->rqos_debugfs_dir);
-	q->rqos_debugfs_dir = NULL;
-}
-
 void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
 					struct blk_mq_hw_ctx *hctx)
 {
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index 771d458328788..9c7d4b6117d41 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -35,7 +35,6 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx);
 
 void blk_mq_debugfs_register_rqos(struct rq_qos *rqos);
 void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos);
-void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q);
 #else
 static inline void blk_mq_debugfs_register(struct request_queue *q)
 {
@@ -82,10 +81,6 @@ static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
 {
 }
-
-static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
-{
-}
 #endif
 
 #ifdef CONFIG_BLK_DEBUG_FS_ZONED
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6e4801b217a79..9b905e9443e49 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -779,13 +779,6 @@ static void blk_release_queue(struct kobject *kobj)
 	if (queue_is_mq(q))
 		blk_mq_release(q);
 
-	mutex_lock(&q->debugfs_mutex);
-	blk_trace_shutdown(q);
-	debugfs_remove_recursive(q->debugfs_dir);
-	q->debugfs_dir = NULL;
-	q->sched_debugfs_dir = NULL;
-	mutex_unlock(&q->debugfs_mutex);
-
 	bioset_exit(&q->bio_split);
 
 	if (blk_queue_has_srcu(q))
@@ -946,8 +939,15 @@ void blk_unregister_queue(struct gendisk *disk)
 	/* Now that we've deleted all child objects, we can delete the queue. */
 	kobject_uevent(&q->kobj, KOBJ_REMOVE);
 	kobject_del(&q->kobj);
-
 	mutex_unlock(&q->sysfs_dir_lock);
 
+	mutex_lock(&q->debugfs_mutex);
+	blk_trace_shutdown(q);
+	debugfs_remove_recursive(q->debugfs_dir);
+	q->debugfs_dir = NULL;
+	q->sched_debugfs_dir = NULL;
+	q->rqos_debugfs_dir = NULL;
+	mutex_unlock(&q->debugfs_mutex);
+
 	kobject_put(&disk_to_dev(disk)->kobj);
 }
diff --git a/block/genhd.c b/block/genhd.c
index 9f4eb7516df7e..36532b9318419 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1122,10 +1122,6 @@ static void disk_release_mq(struct request_queue *q)
 {
 	blk_mq_cancel_work_sync(q);
 
-	mutex_lock(&q->debugfs_mutex);
-	blk_mq_debugfs_unregister_queue_rqos(q);
-	mutex_unlock(&q->debugfs_mutex);
-
 	/*
 	 * There can't be any non non-passthrough bios in flight here, but
 	 * requests stay around longer, including passthrough ones so we
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] block: freeze the queue earlier in del_gendisk
  2022-05-30 13:45 fix debugfs name reuse for remove disks Christoph Hellwig
  2022-05-30 13:45 ` [PATCH 1/3] block: serialize all debugfs operations using q->debugfs_mutex Christoph Hellwig
  2022-05-30 13:45 ` [PATCH 2/3] block: remove per-disk debugfs files in blk_unregister_queue Christoph Hellwig
@ 2022-05-30 13:45 ` Christoph Hellwig
  2022-05-31  6:51   ` Hannes Reinecke
  2 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2022-05-30 13:45 UTC (permalink / raw)
  To: axboe; +Cc: shinichiro.kawasaki, dan.j.williams, yukuai3, ming.lei, linux-block

Freeze the queue earlier in del_gendisk so that the state does not
change while we remove debugfs and sysfs files.

Ming mentioned that being able to observer request in debugfs might
be useful while the queue is being frozen in del_gendisk, which is
made possible by this change.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/genhd.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 36532b9318419..8ff5b187791af 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -621,6 +621,7 @@ void del_gendisk(struct gendisk *disk)
 	 * Prevent new I/O from crossing bio_queue_enter().
 	 */
 	blk_queue_start_drain(q);
+	blk_mq_freeze_queue_wait(q);
 
 	if (!(disk->flags & GENHD_FL_HIDDEN)) {
 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
@@ -644,8 +645,6 @@ void del_gendisk(struct gendisk *disk)
 	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
 	device_del(disk_to_dev(disk));
 
-	blk_mq_freeze_queue_wait(q);
-
 	blk_throtl_cancel_bios(disk->queue);
 
 	blk_sync_queue(q);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] block: serialize all debugfs operations using q->debugfs_mutex
  2022-05-30 13:45 ` [PATCH 1/3] block: serialize all debugfs operations using q->debugfs_mutex Christoph Hellwig
@ 2022-05-31  6:49   ` Hannes Reinecke
  0 siblings, 0 replies; 7+ messages in thread
From: Hannes Reinecke @ 2022-05-31  6:49 UTC (permalink / raw)
  To: Christoph Hellwig, axboe
  Cc: shinichiro.kawasaki, dan.j.williams, yukuai3, ming.lei, linux-block

On 5/30/22 15:45, Christoph Hellwig wrote:
> Various places like I/O schedulers or the QOS infrastructure try to
> register debugfs files on demans, which can race with creating and
> removing the main queue debugfs directory.  Use the existing
> debugfs_mutex to serialize all debugfs operations that rely on
> q->debugfs_dir or the directories hanging off it.
> 
> To make the teardown code a little simpler declare all debugfs dentry
> pointers and not just the main one uncoditionally in blkdev.h.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/blk-mq-debugfs.c  | 19 ++++++++++++++-----
>   block/blk-mq-debugfs.h  |  5 -----
>   block/blk-mq-sched.c    | 11 +++++++++++
>   block/blk-rq-qos.c      |  2 --
>   block/blk-rq-qos.h      |  7 ++++++-
>   block/blk-sysfs.c       | 20 +++++++++-----------
>   block/genhd.c           |  4 ++++
>   include/linux/blkdev.h  |  3 ---
>   kernel/trace/blktrace.c |  3 ---
>   9 files changed, 44 insertions(+), 30 deletions(-)
> 
> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> index 7e4136a60e1cc..bee80fc562052 100644
> --- a/block/blk-mq-debugfs.c
> +++ b/block/blk-mq-debugfs.c
> @@ -711,11 +711,6 @@ void blk_mq_debugfs_register(struct request_queue *q)
>   	}
>   }
>   
> -void blk_mq_debugfs_unregister(struct request_queue *q)
> -{
> -	q->sched_debugfs_dir = NULL;
> -}
> -
>   static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
>   					struct blk_mq_ctx *ctx)
>   {
> @@ -773,6 +768,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
>   {
>   	struct elevator_type *e = q->elevator->type;
>   
> +	lockdep_assert_held(&q->debugfs_mutex);
> +
>   	/*
>   	 * If the parent directory has not been created yet, return, we will be
>   	 * called again later on and the directory/files will be created then.
> @@ -790,6 +787,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
>   
>   void blk_mq_debugfs_unregister_sched(struct request_queue *q)
>   {
> +	lockdep_assert_held(&q->debugfs_mutex);
> +
>   	debugfs_remove_recursive(q->sched_debugfs_dir);
>   	q->sched_debugfs_dir = NULL;
>   }
> @@ -811,6 +810,8 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id)
>   
>   void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
>   {
> +	lockdep_assert_held(&rqos->q->debugfs_mutex);
> +
>   	debugfs_remove_recursive(rqos->debugfs_dir);
>   	rqos->debugfs_dir = NULL;
>   }
> @@ -820,6 +821,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
>   	struct request_queue *q = rqos->q;
>   	const char *dir_name = rq_qos_id_to_name(rqos->id);
>   
> +	lockdep_assert_held(&q->debugfs_mutex);
> +
>   	if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs)
>   		return;
>   
> @@ -835,6 +838,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
>   
>   void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
>   {
> +	lockdep_assert_held(&q->debugfs_mutex);
> +
>   	debugfs_remove_recursive(q->rqos_debugfs_dir);
>   	q->rqos_debugfs_dir = NULL;
>   }
> @@ -844,6 +849,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
>   {
>   	struct elevator_type *e = q->elevator->type;
>   
> +	lockdep_assert_held(&q->debugfs_mutex);
> +
>   	/*
>   	 * If the parent debugfs directory has not been created yet, return;
>   	 * We will be called again later on with appropriate parent debugfs
> @@ -863,6 +870,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
>   
>   void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
>   {
> +	lockdep_assert_held(&hctx->queue->debugfs_mutex);
> +
>   	debugfs_remove_recursive(hctx->sched_debugfs_dir);
>   	hctx->sched_debugfs_dir = NULL;
>   }
> diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
> index 69918f4170d69..771d458328788 100644
> --- a/block/blk-mq-debugfs.h
> +++ b/block/blk-mq-debugfs.h
> @@ -21,7 +21,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq);
>   int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
>   
>   void blk_mq_debugfs_register(struct request_queue *q);
> -void blk_mq_debugfs_unregister(struct request_queue *q);
>   void blk_mq_debugfs_register_hctx(struct request_queue *q,
>   				  struct blk_mq_hw_ctx *hctx);
>   void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx);
> @@ -42,10 +41,6 @@ static inline void blk_mq_debugfs_register(struct request_queue *q)
>   {
>   }
>   
> -static inline void blk_mq_debugfs_unregister(struct request_queue *q)
> -{
> -}
> -
>   static inline void blk_mq_debugfs_register_hctx(struct request_queue *q,
>   						struct blk_mq_hw_ctx *hctx)
>   {
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 9e56a69422b65..e84bec39fd3ad 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -593,7 +593,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
>   	if (ret)
>   		goto err_free_map_and_rqs;
>   
> +	mutex_lock(&q->debugfs_mutex);
>   	blk_mq_debugfs_register_sched(q);
> +	mutex_unlock(&q->debugfs_mutex);
>   
>   	queue_for_each_hw_ctx(q, hctx, i) {
>   		if (e->ops.init_hctx) {
> @@ -606,7 +608,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
>   				return ret;
>   			}
>   		}
> +		mutex_lock(&q->debugfs_mutex);
>   		blk_mq_debugfs_register_sched_hctx(q, hctx);
> +		mutex_unlock(&q->debugfs_mutex);
>   	}
>   
>   	return 0;
> @@ -647,14 +651,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
>   	unsigned int flags = 0;
>   
>   	queue_for_each_hw_ctx(q, hctx, i) {
> +		mutex_lock(&q->debugfs_mutex);
>   		blk_mq_debugfs_unregister_sched_hctx(hctx);
> +		mutex_unlock(&q->debugfs_mutex);
> +
>   		if (e->type->ops.exit_hctx && hctx->sched_data) {
>   			e->type->ops.exit_hctx(hctx, i);
>   			hctx->sched_data = NULL;
>   		}
>   		flags = hctx->flags;
>   	}
> +
> +	mutex_lock(&q->debugfs_mutex);
>   	blk_mq_debugfs_unregister_sched(q);
> +	mutex_unlock(&q->debugfs_mutex);
> +
>   	if (e->type->ops.exit_sched)
>   		e->type->ops.exit_sched(e);
>   	blk_mq_sched_tags_teardown(q, flags);
> diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
> index e83af7bc75919..d3a75693adbf4 100644
> --- a/block/blk-rq-qos.c
> +++ b/block/blk-rq-qos.c
> @@ -294,8 +294,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
>   
>   void rq_qos_exit(struct request_queue *q)
>   {
> -	blk_mq_debugfs_unregister_queue_rqos(q);
> -
>   	while (q->rq_qos) {
>   		struct rq_qos *rqos = q->rq_qos;
>   		q->rq_qos = rqos->next;
> diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
> index 68267007da1c6..0e46052b018a4 100644
> --- a/block/blk-rq-qos.h
> +++ b/block/blk-rq-qos.h
> @@ -104,8 +104,11 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
>   
>   	blk_mq_unfreeze_queue(q);
>   
> -	if (rqos->ops->debugfs_attrs)
> +	if (rqos->ops->debugfs_attrs) {
> +		mutex_lock(&q->debugfs_mutex);
>   		blk_mq_debugfs_register_rqos(rqos);
> +		mutex_unlock(&q->debugfs_mutex);
> +	}
>   }
>   
>   static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
> @@ -129,7 +132,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
>   
>   	blk_mq_unfreeze_queue(q);
>   
> +	mutex_lock(&q->debugfs_mutex);
>   	blk_mq_debugfs_unregister_rqos(rqos);
> +	mutex_unlock(&q->debugfs_mutex);
>   }
>   
>   typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 88bd41d4cb593..6e4801b217a79 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -779,14 +779,13 @@ static void blk_release_queue(struct kobject *kobj)
>   	if (queue_is_mq(q))
>   		blk_mq_release(q);
>   
> -	blk_trace_shutdown(q);
>   	mutex_lock(&q->debugfs_mutex);
> +	blk_trace_shutdown(q);
>   	debugfs_remove_recursive(q->debugfs_dir);
> +	q->debugfs_dir = NULL;
> +	q->sched_debugfs_dir = NULL;
>   	mutex_unlock(&q->debugfs_mutex);
>   
> -	if (queue_is_mq(q))
> -		blk_mq_debugfs_unregister(q);
> -
>   	bioset_exit(&q->bio_split);
>   
>   	if (blk_queue_has_srcu(q))
> @@ -836,17 +835,16 @@ int blk_register_queue(struct gendisk *disk)
>   		goto unlock;
>   	}
>   
> +	if (queue_is_mq(q))
> +		__blk_mq_register_dev(dev, q);
> +	mutex_lock(&q->sysfs_lock);
> +
>   	mutex_lock(&q->debugfs_mutex);
>   	q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
>   					    blk_debugfs_root);
> -	mutex_unlock(&q->debugfs_mutex);
> -
> -	if (queue_is_mq(q)) {
> -		__blk_mq_register_dev(dev, q);
> +	if (queue_is_mq(q))
>   		blk_mq_debugfs_register(q);
> -	}
> -
> -	mutex_lock(&q->sysfs_lock);
> +	mutex_unlock(&q->debugfs_mutex);
>   
>   	ret = disk_register_independent_access_ranges(disk, NULL);
>   	if (ret)
> diff --git a/block/genhd.c b/block/genhd.c
> index 36532b9318419..9f4eb7516df7e 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -1122,6 +1122,10 @@ static void disk_release_mq(struct request_queue *q)
>   {
>   	blk_mq_cancel_work_sync(q);
>   
> +	mutex_lock(&q->debugfs_mutex);
> +	blk_mq_debugfs_unregister_queue_rqos(q);
> +	mutex_unlock(&q->debugfs_mutex);
> +
>   	/*
>   	 * There can't be any non non-passthrough bios in flight here, but
>   	 * requests stay around longer, including passthrough ones so we
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 1b24c1fb3bb1e..095a0d68c9b41 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -525,11 +525,8 @@ struct request_queue {
>   	struct bio_set		bio_split;
>   
>   	struct dentry		*debugfs_dir;
> -
> -#ifdef CONFIG_BLK_DEBUG_FS
>   	struct dentry		*sched_debugfs_dir;
>   	struct dentry		*rqos_debugfs_dir;
> -#endif
>   
>   	bool			mq_sysfs_init_done;
>   
It would be good if we could move the debugfs_mutex to here, too, and 
maybe add a comment about it protecting the debugfs dir pointers.

Otherwise looks good.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		           Kernel Storage Architect
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] block: remove per-disk debugfs files in blk_unregister_queue
  2022-05-30 13:45 ` [PATCH 2/3] block: remove per-disk debugfs files in blk_unregister_queue Christoph Hellwig
@ 2022-05-31  6:50   ` Hannes Reinecke
  0 siblings, 0 replies; 7+ messages in thread
From: Hannes Reinecke @ 2022-05-31  6:50 UTC (permalink / raw)
  To: Christoph Hellwig, axboe
  Cc: shinichiro.kawasaki, dan.j.williams, yukuai3, ming.lei, linux-block

On 5/30/22 15:45, Christoph Hellwig wrote:
> The block debugfs files are created in blk_register_queue, which is
> called by add_disk and use a naming scheme based on the disk_name.
> After del_gendisk returns that name can be reused and thus we must not
> leave these debugfs files around, otherwise the kernel is unhappy
> and spews messages like:
> 
>      Directory XXXXX with parent 'block' already present!
> 
> and the newly created devices will not have working debugfs files.
> 
> Move the unregistration to blk_unregister_queue instead (which matches
> the sysfs unregistration) to make sure the debugfs life time rules match
> those of the disk name.
> 
> As part of the move also make sure the whole debugfs unregistration is
> inside a single debugfs_mutex critical section.
> 
> Note that this breaks blktests block/002, which checks that the debugfs
> directory has not been removed while blktests is running, but that
> particular check should simply be removed from the test case.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/blk-mq-debugfs.c |  8 --------
>   block/blk-mq-debugfs.h |  5 -----
>   block/blk-sysfs.c      | 16 ++++++++--------
>   block/genhd.c          |  4 ----
>   4 files changed, 8 insertions(+), 25 deletions(-)
> 

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		           Kernel Storage Architect
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] block: freeze the queue earlier in del_gendisk
  2022-05-30 13:45 ` [PATCH 3/3] block: freeze the queue earlier in del_gendisk Christoph Hellwig
@ 2022-05-31  6:51   ` Hannes Reinecke
  0 siblings, 0 replies; 7+ messages in thread
From: Hannes Reinecke @ 2022-05-31  6:51 UTC (permalink / raw)
  To: Christoph Hellwig, axboe
  Cc: shinichiro.kawasaki, dan.j.williams, yukuai3, ming.lei, linux-block

On 5/30/22 15:45, Christoph Hellwig wrote:
> Freeze the queue earlier in del_gendisk so that the state does not
> change while we remove debugfs and sysfs files.
> 
> Ming mentioned that being able to observer request in debugfs might
> be useful while the queue is being frozen in del_gendisk, which is
> made possible by this change.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/genhd.c | 3 +--
>   1 file changed, 1 insertion(+), 2 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		           Kernel Storage Architect
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-05-31  6:51 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-30 13:45 fix debugfs name reuse for remove disks Christoph Hellwig
2022-05-30 13:45 ` [PATCH 1/3] block: serialize all debugfs operations using q->debugfs_mutex Christoph Hellwig
2022-05-31  6:49   ` Hannes Reinecke
2022-05-30 13:45 ` [PATCH 2/3] block: remove per-disk debugfs files in blk_unregister_queue Christoph Hellwig
2022-05-31  6:50   ` Hannes Reinecke
2022-05-30 13:45 ` [PATCH 3/3] block: freeze the queue earlier in del_gendisk Christoph Hellwig
2022-05-31  6:51   ` Hannes Reinecke

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.