All of lore.kernel.org
 help / color / mirror / Atom feed
* switch blk-cgroup to work on gendisk v2
@ 2023-01-24  6:57 Christoph Hellwig
  2023-01-24  6:57   ` Christoph Hellwig
                   ` (14 more replies)
  0 siblings, 15 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups

Hi all,

blk-cgroup works on only on live disks and "file system" I/O from bios.
This all the information should be in the gendisk, and not the
request_queue that also exists for pure passthrough request based
devices.

Changes since v1:
 - use the local disk variable in wbt_init instead of q->disk
 - various spelling fixes

Diffstat:
 block/bfq-cgroup.c        |   18 ++--
 block/bfq-iosched.c       |    6 -
 block/blk-cgroup-rwstat.c |    2 
 block/blk-cgroup.c        |  185 +++++++++++++++++++++-------------------------
 block/blk-cgroup.h        |   41 ++++------
 block/blk-iocost.c        |   40 ++++-----
 block/blk-iolatency.c     |   41 ++++------
 block/blk-ioprio.c        |    6 -
 block/blk-mq-debugfs.c    |   10 --
 block/blk-rq-qos.c        |   67 ++++++++++++++++
 block/blk-rq-qos.h        |   66 +---------------
 block/blk-stat.c          |    3 
 block/blk-sysfs.c         |    4 
 block/blk-throttle.c      |   31 ++++---
 block/blk-wbt.c           |   39 ++++-----
 block/blk-wbt.h           |   12 +-
 block/genhd.c             |   17 ++--
 include/linux/blkdev.h    |   10 +-
 include/linux/sched.h     |    2 
 kernel/fork.c             |    2 
 mm/swapfile.c             |    2 
 21 files changed, 292 insertions(+), 312 deletions(-)

^ permalink raw reply	[flat|nested] 45+ messages in thread

* [PATCH 01/15] blk-cgroup: don't defer blkg_free to a workqueue
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

Now that blk_put_queue can be called from process context, there is no
need for the asynchronous execution.

This effectively reverts commit d578c770c85233af592e54537f93f3831bde7e9a.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/blk-cgroup.c | 32 ++++++++++----------------------
 block/blk-cgroup.h |  5 +----
 2 files changed, 11 insertions(+), 26 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ce6a2b7d3dfb2b..30d493b43f9272 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -114,12 +114,19 @@ static bool blkcg_policy_enabled(struct request_queue *q,
 	return pol && test_bit(pol->plid, q->blkcg_pols);
 }
 
-static void blkg_free_workfn(struct work_struct *work)
+/**
+ * blkg_free - free a blkg
+ * @blkg: blkg to free
+ *
+ * Free @blkg which may be partially allocated.
+ */
+static void blkg_free(struct blkcg_gq *blkg)
 {
-	struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
-					     free_work);
 	int i;
 
+	if (!blkg)
+		return;
+
 	for (i = 0; i < BLKCG_MAX_POLS; i++)
 		if (blkg->pd[i])
 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
@@ -131,25 +138,6 @@ static void blkg_free_workfn(struct work_struct *work)
 	kfree(blkg);
 }
 
-/**
- * blkg_free - free a blkg
- * @blkg: blkg to free
- *
- * Free @blkg which may be partially allocated.
- */
-static void blkg_free(struct blkcg_gq *blkg)
-{
-	if (!blkg)
-		return;
-
-	/*
-	 * Both ->pd_free_fn() and request queue's release handler may
-	 * sleep, so free us by scheduling one work func
-	 */
-	INIT_WORK(&blkg->free_work, blkg_free_workfn);
-	schedule_work(&blkg->free_work);
-}
-
 static void __blkg_release(struct rcu_head *rcu)
 {
 	struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 1e94e404eaa80a..f126fe36001eb3 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -75,10 +75,7 @@ struct blkcg_gq {
 
 	spinlock_t			async_bio_lock;
 	struct bio_list			async_bios;
-	union {
-		struct work_struct	async_bio_work;
-		struct work_struct	free_work;
-	};
+	struct work_struct		async_bio_work;
 
 	atomic_t			use_delay;
 	atomic64_t			delay_nsec;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 01/15] blk-cgroup: don't defer blkg_free to a workqueue
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

Now that blk_put_queue can be called from process context, there is no
need for the asynchronous execution.

This effectively reverts commit d578c770c85233af592e54537f93f3831bde7e9a.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Andreas Herrmann <aherrmann-l3A5Bk7waGM@public.gmane.org>
---
 block/blk-cgroup.c | 32 ++++++++++----------------------
 block/blk-cgroup.h |  5 +----
 2 files changed, 11 insertions(+), 26 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ce6a2b7d3dfb2b..30d493b43f9272 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -114,12 +114,19 @@ static bool blkcg_policy_enabled(struct request_queue *q,
 	return pol && test_bit(pol->plid, q->blkcg_pols);
 }
 
-static void blkg_free_workfn(struct work_struct *work)
+/**
+ * blkg_free - free a blkg
+ * @blkg: blkg to free
+ *
+ * Free @blkg which may be partially allocated.
+ */
+static void blkg_free(struct blkcg_gq *blkg)
 {
-	struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
-					     free_work);
 	int i;
 
+	if (!blkg)
+		return;
+
 	for (i = 0; i < BLKCG_MAX_POLS; i++)
 		if (blkg->pd[i])
 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
@@ -131,25 +138,6 @@ static void blkg_free_workfn(struct work_struct *work)
 	kfree(blkg);
 }
 
-/**
- * blkg_free - free a blkg
- * @blkg: blkg to free
- *
- * Free @blkg which may be partially allocated.
- */
-static void blkg_free(struct blkcg_gq *blkg)
-{
-	if (!blkg)
-		return;
-
-	/*
-	 * Both ->pd_free_fn() and request queue's release handler may
-	 * sleep, so free us by scheduling one work func
-	 */
-	INIT_WORK(&blkg->free_work, blkg_free_workfn);
-	schedule_work(&blkg->free_work);
-}
-
 static void __blkg_release(struct rcu_head *rcu)
 {
 	struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 1e94e404eaa80a..f126fe36001eb3 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -75,10 +75,7 @@ struct blkcg_gq {
 
 	spinlock_t			async_bio_lock;
 	struct bio_list			async_bios;
-	union {
-		struct work_struct	async_bio_work;
-		struct work_struct	free_work;
-	};
+	struct work_struct		async_bio_work;
 
 	atomic_t			use_delay;
 	atomic64_t			delay_nsec;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 02/15] block: don't call blk_throtl_stat_add for non-READ/WRITE commands
  2023-01-24  6:57 switch blk-cgroup to work on gendisk v2 Christoph Hellwig
  2023-01-24  6:57   ` Christoph Hellwig
@ 2023-01-24  6:57 ` Christoph Hellwig
  2023-01-24  6:57   ` Christoph Hellwig
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

blk_throtl_stat_add is called from blk_stat_add explicitly, unlike the
other stats that go through q->stats->callbacks.  To prepare for cgroup
data moving to the gendisk, ensure blk_throtl_stat_add is only called
for the plain READ and WRITE commands that it actually handles internally,
as blk_stat_add can also be called for passthrough commands on queues that
do not have a gendisk associated with them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/blk-stat.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/blk-stat.c b/block/blk-stat.c
index 2ea01b5c1aca04..c6ca16abf911e2 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -58,7 +58,8 @@ void blk_stat_add(struct request *rq, u64 now)
 
 	value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
 
-	blk_throtl_stat_add(rq, value);
+	if (req_op(rq) == REQ_OP_READ || req_op(rq) == REQ_OP_WRITE)
+		blk_throtl_stat_add(rq, value);
 
 	rcu_read_lock();
 	cpu = get_cpu();
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 03/15] blk-cgroup: delay blk-cgroup initialization until add_disk
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

There is no need to initialize the group code before the disk is marked
live.  Moving the cgroup initialization earlier will help to have a
fully initialized struct device in the gendisk for the cgroup code to
use in the future.  Similarly tear the cgroup information down in
del_gendisk to be symmetric and because none of the cgroup tracking is
needed once non-passthrough I/O stops.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/genhd.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 23cf83b3331cde..705dec0800d62e 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -466,10 +466,14 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 	 */
 	pm_runtime_set_memalloc_noio(ddev, true);
 
-	ret = blk_integrity_add(disk);
+	ret = blkcg_init_disk(disk);
 	if (ret)
 		goto out_del_block_link;
 
+	ret = blk_integrity_add(disk);
+	if (ret)
+		goto out_blkcg_exit;
+
 	disk->part0->bd_holder_dir =
 		kobject_create_and_add("holders", &ddev->kobj);
 	if (!disk->part0->bd_holder_dir) {
@@ -534,6 +538,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 	kobject_put(disk->part0->bd_holder_dir);
 out_del_integrity:
 	blk_integrity_del(disk);
+out_blkcg_exit:
+	blkcg_exit_disk(disk);
 out_del_block_link:
 	if (!sysfs_deprecated)
 		sysfs_remove_link(block_depr, dev_name(ddev));
@@ -662,6 +668,8 @@ void del_gendisk(struct gendisk *disk)
 	rq_qos_exit(q);
 	blk_mq_unquiesce_queue(q);
 
+	blkcg_exit_disk(disk);
+
 	/*
 	 * If the disk does not own the queue, allow using passthrough requests
 	 * again.  Else leave the queue frozen to fail all I/O.
@@ -1171,8 +1179,6 @@ static void disk_release(struct device *dev)
 	    !test_bit(GD_ADDED, &disk->state))
 		blk_mq_exit_queue(disk->queue);
 
-	blkcg_exit_disk(disk);
-
 	bioset_exit(&disk->bio_split);
 
 	disk_release_events(disk);
@@ -1385,9 +1391,6 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 	if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
 		goto out_destroy_part_tbl;
 
-	if (blkcg_init_disk(disk))
-		goto out_erase_part0;
-
 	rand_initialize_disk(disk);
 	disk_to_dev(disk)->class = &block_class;
 	disk_to_dev(disk)->type = &disk_type;
@@ -1400,8 +1403,6 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 #endif
 	return disk;
 
-out_erase_part0:
-	xa_erase(&disk->part_tbl, 0);
 out_destroy_part_tbl:
 	xa_destroy(&disk->part_tbl);
 	disk->part0->bd_disk = NULL;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 03/15] blk-cgroup: delay blk-cgroup initialization until add_disk
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

There is no need to initialize the group code before the disk is marked
live.  Moving the cgroup initialization earlier will help to have a
fully initialized struct device in the gendisk for the cgroup code to
use in the future.  Similarly tear the cgroup information down in
del_gendisk to be symmetric and because none of the cgroup tracking is
needed once non-passthrough I/O stops.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Andreas Herrmann <aherrmann-l3A5Bk7waGM@public.gmane.org>
---
 block/genhd.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 23cf83b3331cde..705dec0800d62e 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -466,10 +466,14 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 	 */
 	pm_runtime_set_memalloc_noio(ddev, true);
 
-	ret = blk_integrity_add(disk);
+	ret = blkcg_init_disk(disk);
 	if (ret)
 		goto out_del_block_link;
 
+	ret = blk_integrity_add(disk);
+	if (ret)
+		goto out_blkcg_exit;
+
 	disk->part0->bd_holder_dir =
 		kobject_create_and_add("holders", &ddev->kobj);
 	if (!disk->part0->bd_holder_dir) {
@@ -534,6 +538,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 	kobject_put(disk->part0->bd_holder_dir);
 out_del_integrity:
 	blk_integrity_del(disk);
+out_blkcg_exit:
+	blkcg_exit_disk(disk);
 out_del_block_link:
 	if (!sysfs_deprecated)
 		sysfs_remove_link(block_depr, dev_name(ddev));
@@ -662,6 +668,8 @@ void del_gendisk(struct gendisk *disk)
 	rq_qos_exit(q);
 	blk_mq_unquiesce_queue(q);
 
+	blkcg_exit_disk(disk);
+
 	/*
 	 * If the disk does not own the queue, allow using passthrough requests
 	 * again.  Else leave the queue frozen to fail all I/O.
@@ -1171,8 +1179,6 @@ static void disk_release(struct device *dev)
 	    !test_bit(GD_ADDED, &disk->state))
 		blk_mq_exit_queue(disk->queue);
 
-	blkcg_exit_disk(disk);
-
 	bioset_exit(&disk->bio_split);
 
 	disk_release_events(disk);
@@ -1385,9 +1391,6 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 	if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
 		goto out_destroy_part_tbl;
 
-	if (blkcg_init_disk(disk))
-		goto out_erase_part0;
-
 	rand_initialize_disk(disk);
 	disk_to_dev(disk)->class = &block_class;
 	disk_to_dev(disk)->type = &disk_type;
@@ -1400,8 +1403,6 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 #endif
 	return disk;
 
-out_erase_part0:
-	xa_erase(&disk->part_tbl, 0);
 out_destroy_part_tbl:
 	xa_destroy(&disk->part_tbl);
 	disk->part0->bd_disk = NULL;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 04/15] blk-cgroup: pin the gendisk in struct blkcg_gq
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

Currently each blkcg_gq holds a request_queue reference, which is what
is used in the policies.  But a lot of these interfaces will move over to
use a gendisk, so store a disk in struct blkcg_gq and hold a reference to
it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/bfq-cgroup.c        |  6 +++---
 block/blk-cgroup-rwstat.c |  2 +-
 block/blk-cgroup.c        | 29 +++++++++++++----------------
 block/blk-cgroup.h        | 11 +++++------
 block/blk-iocost.c        |  2 +-
 block/blk-iolatency.c     |  4 ++--
 block/blk-throttle.c      |  4 ++--
 7 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index a6e8da5f5cfdc1..72a033776722c9 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -407,7 +407,7 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
 
 	parent = bfqg_parent(bfqg);
 
-	lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
+	lockdep_assert_held(&bfqg_to_blkg(bfqg)->disk->queue->queue_lock);
 
 	if (unlikely(!parent))
 		return;
@@ -538,7 +538,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
 {
 	struct blkcg_gq *blkg = pd_to_blkg(pd);
 	struct bfq_group *bfqg = blkg_to_bfqg(blkg);
-	struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
+	struct bfq_data *bfqd = blkg->disk->queue->elevator->elevator_data;
 	struct bfq_entity *entity = &bfqg->entity;
 	struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
 
@@ -1203,7 +1203,7 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
 	struct cgroup_subsys_state *pos_css;
 	u64 sum = 0;
 
-	lockdep_assert_held(&blkg->q->queue_lock);
+	lockdep_assert_held(&blkg->disk->queue->queue_lock);
 
 	rcu_read_lock();
 	blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c
index 3304e841df7ce9..b8b8c82e667a3b 100644
--- a/block/blk-cgroup-rwstat.c
+++ b/block/blk-cgroup-rwstat.c
@@ -107,7 +107,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
 	struct cgroup_subsys_state *pos_css;
 	unsigned int i;
 
-	lockdep_assert_held(&blkg->q->queue_lock);
+	lockdep_assert_held(&blkg->disk->queue->queue_lock);
 
 	memset(sum, 0, sizeof(*sum));
 	rcu_read_lock();
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 30d493b43f9272..f5a634ed098db0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -131,8 +131,8 @@ static void blkg_free(struct blkcg_gq *blkg)
 		if (blkg->pd[i])
 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
-	if (blkg->q)
-		blk_put_queue(blkg->q);
+	if (blkg->disk)
+		put_disk(blkg->disk);
 	free_percpu(blkg->iostat_cpu);
 	percpu_ref_exit(&blkg->refcnt);
 	kfree(blkg);
@@ -245,10 +245,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
 	if (!blkg->iostat_cpu)
 		goto err_free;
 
-	if (!blk_get_queue(disk->queue))
-		goto err_free;
+	get_device(disk_to_dev(disk));
 
-	blkg->q = disk->queue;
+	blkg->disk = disk;
 	INIT_LIST_HEAD(&blkg->q_node);
 	spin_lock_init(&blkg->async_bio_lock);
 	bio_list_init(&blkg->async_bios);
@@ -443,7 +442,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	struct blkcg *blkcg = blkg->blkcg;
 	int i;
 
-	lockdep_assert_held(&blkg->q->queue_lock);
+	lockdep_assert_held(&blkg->disk->queue->queue_lock);
 	lockdep_assert_held(&blkcg->lock);
 
 	/* Something wrong if we are trying to remove same group twice */
@@ -459,7 +458,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 
 	blkg->online = false;
 
-	radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
+	radix_tree_delete(&blkcg->blkg_tree, blkg->disk->queue->id);
 	list_del_init(&blkg->q_node);
 	hlist_del_init_rcu(&blkg->blkcg_node);
 
@@ -547,9 +546,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
 
 const char *blkg_dev_name(struct blkcg_gq *blkg)
 {
-	if (!blkg->q->disk || !blkg->q->disk->bdi->dev)
-		return NULL;
-	return bdi_dev_name(blkg->q->disk->bdi);
+	return bdi_dev_name(blkg->disk->bdi);
 }
 
 /**
@@ -581,10 +578,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
-		spin_lock_irq(&blkg->q->queue_lock);
-		if (blkcg_policy_enabled(blkg->q, pol))
+		spin_lock_irq(&blkg->disk->queue->queue_lock);
+		if (blkcg_policy_enabled(blkg->disk->queue, pol))
 			total += prfill(sf, blkg->pd[pol->plid], data);
-		spin_unlock_irq(&blkg->q->queue_lock);
+		spin_unlock_irq(&blkg->disk->queue->queue_lock);
 	}
 	rcu_read_unlock();
 
@@ -1008,9 +1005,9 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
-		spin_lock_irq(&blkg->q->queue_lock);
+		spin_lock_irq(&blkg->disk->queue->queue_lock);
 		blkcg_print_one_stat(blkg, sf);
-		spin_unlock_irq(&blkg->q->queue_lock);
+		spin_unlock_irq(&blkg->disk->queue->queue_lock);
 	}
 	rcu_read_unlock();
 	return 0;
@@ -1080,7 +1077,7 @@ static void blkcg_destroy_blkgs(struct blkcg *blkcg)
 	while (!hlist_empty(&blkcg->blkg_list)) {
 		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
 						struct blkcg_gq, blkcg_node);
-		struct request_queue *q = blkg->q;
+		struct request_queue *q = blkg->disk->queue;
 
 		if (need_resched() || !spin_trylock(&q->queue_lock)) {
 			/*
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index f126fe36001eb3..85b267234823ab 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -53,8 +53,7 @@ struct blkg_iostat_set {
 
 /* association between a blk cgroup and a request queue */
 struct blkcg_gq {
-	/* Pointer to the associated request_queue */
-	struct request_queue		*q;
+	struct gendisk			*disk;
 	struct list_head		q_node;
 	struct hlist_node		blkcg_node;
 	struct blkcg			*blkcg;
@@ -251,11 +250,11 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 		return q->root_blkg;
 
 	blkg = rcu_dereference(blkcg->blkg_hint);
-	if (blkg && blkg->q == q)
+	if (blkg && blkg->disk->queue == q)
 		return blkg;
 
 	blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
-	if (blkg && blkg->q != q)
+	if (blkg && blkg->disk->queue != q)
 		blkg = NULL;
 	return blkg;
 }
@@ -355,7 +354,7 @@ static inline void blkg_put(struct blkcg_gq *blkg)
 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
 	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
 		if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css),	\
-					    (p_blkg)->q)))
+					    (p_blkg)->disk->queue)))
 
 /**
  * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
@@ -370,7 +369,7 @@ static inline void blkg_put(struct blkcg_gq *blkg)
 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
 	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
 		if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css),	\
-					    (p_blkg)->q)))
+					    (p_blkg)->disk->queue)))
 
 bool __blkcg_punt_bio_submit(struct bio *bio);
 
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 9694394ed883c4..a2b4e7146be5f1 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2953,7 +2953,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
 {
 	struct ioc_gq *iocg = pd_to_iocg(pd);
 	struct blkcg_gq *blkg = pd_to_blkg(&iocg->pd);
-	struct ioc *ioc = q_to_ioc(blkg->q);
+	struct ioc *ioc = q_to_ioc(blkg->disk->queue);
 	struct ioc_now now;
 	struct blkcg_gq *tblkg;
 	unsigned long flags;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index ecdc1074183625..b55eac2cf91944 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -974,12 +974,12 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
 {
 	struct iolatency_grp *iolat = pd_to_lat(pd);
 	struct blkcg_gq *blkg = lat_to_blkg(iolat);
-	struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
+	struct rq_qos *rqos = blkcg_rq_qos(blkg->disk->queue);
 	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	if (blk_queue_nonrot(blkg->q))
+	if (blk_queue_nonrot(blkg->disk->queue))
 		iolat->ssd = true;
 	else
 		iolat->ssd = false;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 6fb5a2f9e1eed5..f802d8f9099430 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -388,7 +388,7 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 {
 	struct throtl_grp *tg = pd_to_tg(pd);
 	struct blkcg_gq *blkg = tg_to_blkg(tg);
-	struct throtl_data *td = blkg->q->td;
+	struct throtl_data *td = blkg->disk->queue->td;
 	struct throtl_service_queue *sq = &tg->service_queue;
 
 	/*
@@ -1175,7 +1175,7 @@ static void throtl_pending_timer_fn(struct timer_list *t)
 
 	/* throtl_data may be gone, so figure out request queue by blkg */
 	if (tg)
-		q = tg->pd.blkg->q;
+		q = tg->pd.blkg->disk->queue;
 	else
 		q = td->queue;
 
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 04/15] blk-cgroup: pin the gendisk in struct blkcg_gq
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

Currently each blkcg_gq holds a request_queue reference, which is what
is used in the policies.  But a lot of these interfaces will move over to
use a gendisk, so store a disk in struct blkcg_gq and hold a reference to
it.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Andreas Herrmann <aherrmann-l3A5Bk7waGM@public.gmane.org>
---
 block/bfq-cgroup.c        |  6 +++---
 block/blk-cgroup-rwstat.c |  2 +-
 block/blk-cgroup.c        | 29 +++++++++++++----------------
 block/blk-cgroup.h        | 11 +++++------
 block/blk-iocost.c        |  2 +-
 block/blk-iolatency.c     |  4 ++--
 block/blk-throttle.c      |  4 ++--
 7 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index a6e8da5f5cfdc1..72a033776722c9 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -407,7 +407,7 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
 
 	parent = bfqg_parent(bfqg);
 
-	lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
+	lockdep_assert_held(&bfqg_to_blkg(bfqg)->disk->queue->queue_lock);
 
 	if (unlikely(!parent))
 		return;
@@ -538,7 +538,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
 {
 	struct blkcg_gq *blkg = pd_to_blkg(pd);
 	struct bfq_group *bfqg = blkg_to_bfqg(blkg);
-	struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
+	struct bfq_data *bfqd = blkg->disk->queue->elevator->elevator_data;
 	struct bfq_entity *entity = &bfqg->entity;
 	struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
 
@@ -1203,7 +1203,7 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
 	struct cgroup_subsys_state *pos_css;
 	u64 sum = 0;
 
-	lockdep_assert_held(&blkg->q->queue_lock);
+	lockdep_assert_held(&blkg->disk->queue->queue_lock);
 
 	rcu_read_lock();
 	blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c
index 3304e841df7ce9..b8b8c82e667a3b 100644
--- a/block/blk-cgroup-rwstat.c
+++ b/block/blk-cgroup-rwstat.c
@@ -107,7 +107,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
 	struct cgroup_subsys_state *pos_css;
 	unsigned int i;
 
-	lockdep_assert_held(&blkg->q->queue_lock);
+	lockdep_assert_held(&blkg->disk->queue->queue_lock);
 
 	memset(sum, 0, sizeof(*sum));
 	rcu_read_lock();
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 30d493b43f9272..f5a634ed098db0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -131,8 +131,8 @@ static void blkg_free(struct blkcg_gq *blkg)
 		if (blkg->pd[i])
 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
-	if (blkg->q)
-		blk_put_queue(blkg->q);
+	if (blkg->disk)
+		put_disk(blkg->disk);
 	free_percpu(blkg->iostat_cpu);
 	percpu_ref_exit(&blkg->refcnt);
 	kfree(blkg);
@@ -245,10 +245,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
 	if (!blkg->iostat_cpu)
 		goto err_free;
 
-	if (!blk_get_queue(disk->queue))
-		goto err_free;
+	get_device(disk_to_dev(disk));
 
-	blkg->q = disk->queue;
+	blkg->disk = disk;
 	INIT_LIST_HEAD(&blkg->q_node);
 	spin_lock_init(&blkg->async_bio_lock);
 	bio_list_init(&blkg->async_bios);
@@ -443,7 +442,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	struct blkcg *blkcg = blkg->blkcg;
 	int i;
 
-	lockdep_assert_held(&blkg->q->queue_lock);
+	lockdep_assert_held(&blkg->disk->queue->queue_lock);
 	lockdep_assert_held(&blkcg->lock);
 
 	/* Something wrong if we are trying to remove same group twice */
@@ -459,7 +458,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 
 	blkg->online = false;
 
-	radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
+	radix_tree_delete(&blkcg->blkg_tree, blkg->disk->queue->id);
 	list_del_init(&blkg->q_node);
 	hlist_del_init_rcu(&blkg->blkcg_node);
 
@@ -547,9 +546,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
 
 const char *blkg_dev_name(struct blkcg_gq *blkg)
 {
-	if (!blkg->q->disk || !blkg->q->disk->bdi->dev)
-		return NULL;
-	return bdi_dev_name(blkg->q->disk->bdi);
+	return bdi_dev_name(blkg->disk->bdi);
 }
 
 /**
@@ -581,10 +578,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
-		spin_lock_irq(&blkg->q->queue_lock);
-		if (blkcg_policy_enabled(blkg->q, pol))
+		spin_lock_irq(&blkg->disk->queue->queue_lock);
+		if (blkcg_policy_enabled(blkg->disk->queue, pol))
 			total += prfill(sf, blkg->pd[pol->plid], data);
-		spin_unlock_irq(&blkg->q->queue_lock);
+		spin_unlock_irq(&blkg->disk->queue->queue_lock);
 	}
 	rcu_read_unlock();
 
@@ -1008,9 +1005,9 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
-		spin_lock_irq(&blkg->q->queue_lock);
+		spin_lock_irq(&blkg->disk->queue->queue_lock);
 		blkcg_print_one_stat(blkg, sf);
-		spin_unlock_irq(&blkg->q->queue_lock);
+		spin_unlock_irq(&blkg->disk->queue->queue_lock);
 	}
 	rcu_read_unlock();
 	return 0;
@@ -1080,7 +1077,7 @@ static void blkcg_destroy_blkgs(struct blkcg *blkcg)
 	while (!hlist_empty(&blkcg->blkg_list)) {
 		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
 						struct blkcg_gq, blkcg_node);
-		struct request_queue *q = blkg->q;
+		struct request_queue *q = blkg->disk->queue;
 
 		if (need_resched() || !spin_trylock(&q->queue_lock)) {
 			/*
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index f126fe36001eb3..85b267234823ab 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -53,8 +53,7 @@ struct blkg_iostat_set {
 
 /* association between a blk cgroup and a request queue */
 struct blkcg_gq {
-	/* Pointer to the associated request_queue */
-	struct request_queue		*q;
+	struct gendisk			*disk;
 	struct list_head		q_node;
 	struct hlist_node		blkcg_node;
 	struct blkcg			*blkcg;
@@ -251,11 +250,11 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 		return q->root_blkg;
 
 	blkg = rcu_dereference(blkcg->blkg_hint);
-	if (blkg && blkg->q == q)
+	if (blkg && blkg->disk->queue == q)
 		return blkg;
 
 	blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
-	if (blkg && blkg->q != q)
+	if (blkg && blkg->disk->queue != q)
 		blkg = NULL;
 	return blkg;
 }
@@ -355,7 +354,7 @@ static inline void blkg_put(struct blkcg_gq *blkg)
 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
 	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
 		if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css),	\
-					    (p_blkg)->q)))
+					    (p_blkg)->disk->queue)))
 
 /**
  * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
@@ -370,7 +369,7 @@ static inline void blkg_put(struct blkcg_gq *blkg)
 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
 	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
 		if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css),	\
-					    (p_blkg)->q)))
+					    (p_blkg)->disk->queue)))
 
 bool __blkcg_punt_bio_submit(struct bio *bio);
 
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 9694394ed883c4..a2b4e7146be5f1 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2953,7 +2953,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
 {
 	struct ioc_gq *iocg = pd_to_iocg(pd);
 	struct blkcg_gq *blkg = pd_to_blkg(&iocg->pd);
-	struct ioc *ioc = q_to_ioc(blkg->q);
+	struct ioc *ioc = q_to_ioc(blkg->disk->queue);
 	struct ioc_now now;
 	struct blkcg_gq *tblkg;
 	unsigned long flags;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index ecdc1074183625..b55eac2cf91944 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -974,12 +974,12 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
 {
 	struct iolatency_grp *iolat = pd_to_lat(pd);
 	struct blkcg_gq *blkg = lat_to_blkg(iolat);
-	struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
+	struct rq_qos *rqos = blkcg_rq_qos(blkg->disk->queue);
 	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	if (blk_queue_nonrot(blkg->q))
+	if (blk_queue_nonrot(blkg->disk->queue))
 		iolat->ssd = true;
 	else
 		iolat->ssd = false;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 6fb5a2f9e1eed5..f802d8f9099430 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -388,7 +388,7 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 {
 	struct throtl_grp *tg = pd_to_tg(pd);
 	struct blkcg_gq *blkg = tg_to_blkg(tg);
-	struct throtl_data *td = blkg->q->td;
+	struct throtl_data *td = blkg->disk->queue->td;
 	struct throtl_service_queue *sq = &tg->service_queue;
 
 	/*
@@ -1175,7 +1175,7 @@ static void throtl_pending_timer_fn(struct timer_list *t)
 
 	/* throtl_data may be gone, so figure out request queue by blkg */
 	if (tg)
-		q = tg->pd.blkg->q;
+		q = tg->pd.blkg->disk->queue;
 	else
 		q = td->queue;
 
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

Switch from a request_queue pointer and reference to a gendisk once
for the throttle information in struct task_struct.

Move the check for the dead disk to the latest place now that is is
unbundled from the reference grab.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/blk-cgroup.c    | 37 +++++++++++++++++++------------------
 include/linux/sched.h |  2 +-
 kernel/fork.c         |  2 +-
 mm/swapfile.c         |  2 +-
 4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f5a634ed098db0..603e911d1350db 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1334,9 +1334,9 @@ static void blkcg_bind(struct cgroup_subsys_state *root_css)
 
 static void blkcg_exit(struct task_struct *tsk)
 {
-	if (tsk->throttle_queue)
-		blk_put_queue(tsk->throttle_queue);
-	tsk->throttle_queue = NULL;
+	if (tsk->throttle_disk)
+		put_disk(tsk->throttle_disk);
+	tsk->throttle_disk = NULL;
 }
 
 struct cgroup_subsys io_cgrp_subsys = {
@@ -1778,29 +1778,32 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
  *
  * This is only called if we've been marked with set_notify_resume().  Obviously
  * we can be set_notify_resume() for reasons other than blkcg throttling, so we
- * check to see if current->throttle_queue is set and if not this doesn't do
+ * check to see if current->throttle_disk is set and if not this doesn't do
  * anything.  This should only ever be called by the resume code, it's not meant
  * to be called by people willy-nilly as it will actually do the work to
  * throttle the task if it is setup for throttling.
  */
 void blkcg_maybe_throttle_current(void)
 {
-	struct request_queue *q = current->throttle_queue;
+	struct gendisk *disk = current->throttle_disk;
 	struct blkcg *blkcg;
 	struct blkcg_gq *blkg;
 	bool use_memdelay = current->use_memdelay;
 
-	if (!q)
+	if (!disk)
 		return;
 
-	current->throttle_queue = NULL;
+	current->throttle_disk = NULL;
 	current->use_memdelay = false;
 
+	if (test_bit(GD_DEAD, &disk->state))
+		goto out_put_disk;
+
 	rcu_read_lock();
 	blkcg = css_to_blkcg(blkcg_css());
 	if (!blkcg)
 		goto out;
-	blkg = blkg_lookup(blkcg, q);
+	blkg = blkg_lookup(blkcg, disk->queue);
 	if (!blkg)
 		goto out;
 	if (!blkg_tryget(blkg))
@@ -1809,11 +1812,12 @@ void blkcg_maybe_throttle_current(void)
 
 	blkcg_maybe_throttle_blkg(blkg, use_memdelay);
 	blkg_put(blkg);
-	blk_put_queue(q);
+	put_disk(disk);
 	return;
 out:
 	rcu_read_unlock();
-	blk_put_queue(q);
+out_put_disk:
+	put_disk(disk);
 }
 
 /**
@@ -1835,18 +1839,15 @@ void blkcg_maybe_throttle_current(void)
  */
 void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay)
 {
-	struct request_queue *q = disk->queue;
-
 	if (unlikely(current->flags & PF_KTHREAD))
 		return;
 
-	if (current->throttle_queue != q) {
-		if (!blk_get_queue(q))
-			return;
+	if (current->throttle_disk != disk) {
+		get_device(disk_to_dev(disk));
 
-		if (current->throttle_queue)
-			blk_put_queue(current->throttle_queue);
-		current->throttle_queue = q;
+		if (current->throttle_disk)
+			put_disk(current->throttle_disk);
+		current->throttle_disk = disk;
 	}
 
 	if (use_memdelay)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 853d08f7562bda..6f6ce9ca709798 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1436,7 +1436,7 @@ struct task_struct {
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
-	struct request_queue		*throttle_queue;
+	struct gendisk			*throttle_disk;
 #endif
 
 #ifdef CONFIG_UPROBES
diff --git a/kernel/fork.c b/kernel/fork.c
index 9f7fe354189785..d9c97704b7c9a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1044,7 +1044,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
-	tsk->throttle_queue = NULL;
+	tsk->throttle_disk = NULL;
 	tsk->use_memdelay = 0;
 #endif
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 908a529bca12c9..3e0a742fb7bbff 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3642,7 +3642,7 @@ void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
 	 * We've already scheduled a throttle, avoid taking the global swap
 	 * lock.
 	 */
-	if (current->throttle_queue)
+	if (current->throttle_disk)
 		return;
 
 	spin_lock(&swap_avail_lock);
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

Switch from a request_queue pointer and reference to a gendisk once
for the throttle information in struct task_struct.

Move the check for the dead disk to the latest place now that is is
unbundled from the reference grab.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Andreas Herrmann <aherrmann-l3A5Bk7waGM@public.gmane.org>
---
 block/blk-cgroup.c    | 37 +++++++++++++++++++------------------
 include/linux/sched.h |  2 +-
 kernel/fork.c         |  2 +-
 mm/swapfile.c         |  2 +-
 4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f5a634ed098db0..603e911d1350db 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1334,9 +1334,9 @@ static void blkcg_bind(struct cgroup_subsys_state *root_css)
 
 static void blkcg_exit(struct task_struct *tsk)
 {
-	if (tsk->throttle_queue)
-		blk_put_queue(tsk->throttle_queue);
-	tsk->throttle_queue = NULL;
+	if (tsk->throttle_disk)
+		put_disk(tsk->throttle_disk);
+	tsk->throttle_disk = NULL;
 }
 
 struct cgroup_subsys io_cgrp_subsys = {
@@ -1778,29 +1778,32 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
  *
  * This is only called if we've been marked with set_notify_resume().  Obviously
  * we can be set_notify_resume() for reasons other than blkcg throttling, so we
- * check to see if current->throttle_queue is set and if not this doesn't do
+ * check to see if current->throttle_disk is set and if not this doesn't do
  * anything.  This should only ever be called by the resume code, it's not meant
  * to be called by people willy-nilly as it will actually do the work to
  * throttle the task if it is setup for throttling.
  */
 void blkcg_maybe_throttle_current(void)
 {
-	struct request_queue *q = current->throttle_queue;
+	struct gendisk *disk = current->throttle_disk;
 	struct blkcg *blkcg;
 	struct blkcg_gq *blkg;
 	bool use_memdelay = current->use_memdelay;
 
-	if (!q)
+	if (!disk)
 		return;
 
-	current->throttle_queue = NULL;
+	current->throttle_disk = NULL;
 	current->use_memdelay = false;
 
+	if (test_bit(GD_DEAD, &disk->state))
+		goto out_put_disk;
+
 	rcu_read_lock();
 	blkcg = css_to_blkcg(blkcg_css());
 	if (!blkcg)
 		goto out;
-	blkg = blkg_lookup(blkcg, q);
+	blkg = blkg_lookup(blkcg, disk->queue);
 	if (!blkg)
 		goto out;
 	if (!blkg_tryget(blkg))
@@ -1809,11 +1812,12 @@ void blkcg_maybe_throttle_current(void)
 
 	blkcg_maybe_throttle_blkg(blkg, use_memdelay);
 	blkg_put(blkg);
-	blk_put_queue(q);
+	put_disk(disk);
 	return;
 out:
 	rcu_read_unlock();
-	blk_put_queue(q);
+out_put_disk:
+	put_disk(disk);
 }
 
 /**
@@ -1835,18 +1839,15 @@ void blkcg_maybe_throttle_current(void)
  */
 void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay)
 {
-	struct request_queue *q = disk->queue;
-
 	if (unlikely(current->flags & PF_KTHREAD))
 		return;
 
-	if (current->throttle_queue != q) {
-		if (!blk_get_queue(q))
-			return;
+	if (current->throttle_disk != disk) {
+		get_device(disk_to_dev(disk));
 
-		if (current->throttle_queue)
-			blk_put_queue(current->throttle_queue);
-		current->throttle_queue = q;
+		if (current->throttle_disk)
+			put_disk(current->throttle_disk);
+		current->throttle_disk = disk;
 	}
 
 	if (use_memdelay)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 853d08f7562bda..6f6ce9ca709798 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1436,7 +1436,7 @@ struct task_struct {
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
-	struct request_queue		*throttle_queue;
+	struct gendisk			*throttle_disk;
 #endif
 
 #ifdef CONFIG_UPROBES
diff --git a/kernel/fork.c b/kernel/fork.c
index 9f7fe354189785..d9c97704b7c9a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1044,7 +1044,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
-	tsk->throttle_queue = NULL;
+	tsk->throttle_disk = NULL;
 	tsk->use_memdelay = 0;
 #endif
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 908a529bca12c9..3e0a742fb7bbff 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3642,7 +3642,7 @@ void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
 	 * We've already scheduled a throttle, avoid taking the global swap
 	 * lock.
 	 */
-	if (current->throttle_queue)
+	if (current->throttle_disk)
 		return;
 
 	spin_lock(&swap_avail_lock);
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 06/15] blk-wbt: pass a gendisk to wbt_{enable,disable}_default
  2023-01-24  6:57 switch blk-cgroup to work on gendisk v2 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2023-01-24  6:57   ` Christoph Hellwig
@ 2023-01-24  6:57 ` Christoph Hellwig
  2023-01-24  6:57 ` [PATCH 07/15] blk-wbt: pass a gendisk to wbt_init Christoph Hellwig
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

Pass a gendisk to wbt_enable_default and wbt_disable_default to
prepare for phasing out usage of the request_queue in the blk-cgroup
code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/bfq-iosched.c | 4 ++--
 block/blk-iocost.c  | 4 ++--
 block/blk-sysfs.c   | 2 +-
 block/blk-wbt.c     | 7 ++++---
 block/blk-wbt.h     | 8 ++++----
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 815b884d6c5acf..68062243f2c142 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7165,7 +7165,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
 
 	blk_stat_disable_accounting(bfqd->queue);
 	clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags);
-	wbt_enable_default(bfqd->queue);
+	wbt_enable_default(bfqd->queue->disk);
 
 	kfree(bfqd);
 }
@@ -7354,7 +7354,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
 	blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
 
 	set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags);
-	wbt_disable_default(q);
+	wbt_disable_default(q->disk);
 	blk_stat_enable_accounting(q);
 
 	return 0;
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a2b4e7146be5f1..dbb93f4f68d979 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3285,11 +3285,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 		blk_stat_enable_accounting(disk->queue);
 		blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
 		ioc->enabled = true;
-		wbt_disable_default(disk->queue);
+		wbt_disable_default(disk);
 	} else {
 		blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
 		ioc->enabled = false;
-		wbt_enable_default(disk->queue);
+		wbt_enable_default(disk);
 	}
 
 	if (user) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 5486b6c57f6b8a..2074103865f45b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -826,7 +826,7 @@ int blk_register_queue(struct gendisk *disk)
 		goto out_elv_unregister;
 
 	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
-	wbt_enable_default(q);
+	wbt_enable_default(disk);
 	blk_throtl_register(disk);
 
 	/* Now everything is ready and send out KOBJ_ADD uevent */
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 68a774d7a7c9c0..8f9302134339c5 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -650,8 +650,9 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
 /*
  * Enable wbt if defaults are configured that way
  */
-void wbt_enable_default(struct request_queue *q)
+void wbt_enable_default(struct gendisk *disk)
 {
+	struct request_queue *q = disk->queue;
 	struct rq_qos *rqos;
 	bool disable_flag = q->elevator &&
 		    test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
@@ -718,9 +719,9 @@ static void wbt_exit(struct rq_qos *rqos)
 /*
  * Disable wbt, if enabled by default.
  */
-void wbt_disable_default(struct request_queue *q)
+void wbt_disable_default(struct gendisk *disk)
 {
-	struct rq_qos *rqos = wbt_rq_qos(q);
+	struct rq_qos *rqos = wbt_rq_qos(disk->queue);
 	struct rq_wb *rwb;
 	if (!rqos)
 		return;
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index e3ea6e7e290076..7ab1cba55c25f7 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -91,8 +91,8 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 #ifdef CONFIG_BLK_WBT
 
 int wbt_init(struct request_queue *);
-void wbt_disable_default(struct request_queue *);
-void wbt_enable_default(struct request_queue *);
+void wbt_disable_default(struct gendisk *disk);
+void wbt_enable_default(struct gendisk *disk);
 
 u64 wbt_get_min_lat(struct request_queue *q);
 void wbt_set_min_lat(struct request_queue *q, u64 val);
@@ -108,10 +108,10 @@ static inline int wbt_init(struct request_queue *q)
 {
 	return -EINVAL;
 }
-static inline void wbt_disable_default(struct request_queue *q)
+static inline void wbt_disable_default(struct gendisk *disk)
 {
 }
-static inline void wbt_enable_default(struct request_queue *q)
+static inline void wbt_enable_default(struct gendisk *disk)
 {
 }
 static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 07/15] blk-wbt: pass a gendisk to wbt_init
  2023-01-24  6:57 switch blk-cgroup to work on gendisk v2 Christoph Hellwig
                   ` (5 preceding siblings ...)
  2023-01-24  6:57 ` [PATCH 06/15] blk-wbt: pass a gendisk to wbt_{enable,disable}_default Christoph Hellwig
@ 2023-01-24  6:57 ` Christoph Hellwig
  2023-01-24  6:57 ` [PATCH 08/15] blk-wbt: open code wbt_queue_depth_changed in wbt_update_limits Christoph Hellwig
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

Pass a gendisk to wbt_init to prepare for phasing out usage of the
request_queue in the blk-cgroup code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/blk-sysfs.c | 2 +-
 block/blk-wbt.c   | 5 +++--
 block/blk-wbt.h   | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 2074103865f45b..c2adf640e5c816 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -500,7 +500,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
 
 	rqos = wbt_rq_qos(q);
 	if (!rqos) {
-		ret = wbt_init(q);
+		ret = wbt_init(q->disk);
 		if (ret)
 			return ret;
 	}
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8f9302134339c5..542271fa99e8f7 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -671,7 +671,7 @@ void wbt_enable_default(struct gendisk *disk)
 		return;
 
 	if (queue_is_mq(q) && !disable_flag)
-		wbt_init(q);
+		wbt_init(disk);
 }
 EXPORT_SYMBOL_GPL(wbt_enable_default);
 
@@ -835,8 +835,9 @@ static struct rq_qos_ops wbt_rqos_ops = {
 #endif
 };
 
-int wbt_init(struct request_queue *q)
+int wbt_init(struct gendisk *disk)
 {
+	struct request_queue *q = disk->queue;
 	struct rq_wb *rwb;
 	int i;
 	int ret;
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index 7ab1cba55c25f7..b673da41a867d3 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -90,7 +90,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 
 #ifdef CONFIG_BLK_WBT
 
-int wbt_init(struct request_queue *);
+int wbt_init(struct gendisk *disk);
 void wbt_disable_default(struct gendisk *disk);
 void wbt_enable_default(struct gendisk *disk);
 
@@ -104,7 +104,7 @@ u64 wbt_default_latency_nsec(struct request_queue *);
 
 #else
 
-static inline int wbt_init(struct request_queue *q)
+static inline int wbt_init(struct gendisk *disk)
 {
 	return -EINVAL;
 }
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 08/15] blk-wbt: open code wbt_queue_depth_changed in wbt_update_limits
  2023-01-24  6:57 switch blk-cgroup to work on gendisk v2 Christoph Hellwig
                   ` (6 preceding siblings ...)
  2023-01-24  6:57 ` [PATCH 07/15] blk-wbt: pass a gendisk to wbt_init Christoph Hellwig
@ 2023-01-24  6:57 ` Christoph Hellwig
  2023-01-27 23:56     ` Tejun Heo
  2023-01-24  6:57 ` [PATCH 09/15] blk-rq-qos: make rq_qos_add and rq_qos_del more useful Christoph Hellwig
                   ` (6 subsequent siblings)
  14 siblings, 1 reply; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

No real need to all the method here, so open code to it to prepare
for some paramter passing changes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/blk-wbt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 542271fa99e8f7..473ae72befaf1a 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -863,9 +863,9 @@ int wbt_init(struct gendisk *disk)
 	rwb->enable_state = WBT_STATE_ON_DEFAULT;
 	rwb->wc = test_bit(QUEUE_FLAG_WC, &q->queue_flags);
 	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
+	rwb->rq_depth.queue_depth = blk_queue_depth(q);
 	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
-
-	wbt_queue_depth_changed(&rwb->rqos);
+	wbt_update_limits(rwb);
 
 	/*
 	 * Assign rwb and add the stats callback.
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 09/15] blk-rq-qos: make rq_qos_add and rq_qos_del more useful
  2023-01-24  6:57 switch blk-cgroup to work on gendisk v2 Christoph Hellwig
                   ` (7 preceding siblings ...)
  2023-01-24  6:57 ` [PATCH 08/15] blk-wbt: open code wbt_queue_depth_changed in wbt_update_limits Christoph Hellwig
@ 2023-01-24  6:57 ` Christoph Hellwig
  2023-01-28  0:01   ` Tejun Heo
  2023-01-24  6:57   ` Christoph Hellwig
                   ` (5 subsequent siblings)
  14 siblings, 1 reply; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

Switch to passing a gendisk, and make rq_qos_add initialize all required
fields and drop the not required q argument from rq_qos_del.  Also move
the code out of line given how large it is.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/blk-iocost.c    | 13 ++-------
 block/blk-iolatency.c | 14 +++------
 block/blk-rq-qos.c    | 67 +++++++++++++++++++++++++++++++++++++++++++
 block/blk-rq-qos.h    | 62 ++-------------------------------------
 block/blk-wbt.c       |  5 +---
 5 files changed, 78 insertions(+), 83 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index dbb93f4f68d979..5f28463cba0afe 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2843,9 +2843,7 @@ static struct rq_qos_ops ioc_rqos_ops = {
 
 static int blk_iocost_init(struct gendisk *disk)
 {
-	struct request_queue *q = disk->queue;
 	struct ioc *ioc;
-	struct rq_qos *rqos;
 	int i, cpu, ret;
 
 	ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
@@ -2868,11 +2866,6 @@ static int blk_iocost_init(struct gendisk *disk)
 		local64_set(&ccs->rq_wait_ns, 0);
 	}
 
-	rqos = &ioc->rqos;
-	rqos->id = RQ_QOS_COST;
-	rqos->ops = &ioc_rqos_ops;
-	rqos->q = q;
-
 	spin_lock_init(&ioc->lock);
 	timer_setup(&ioc->timer, ioc_timer_fn, 0);
 	INIT_LIST_HEAD(&ioc->active_iocgs);
@@ -2896,17 +2889,17 @@ static int blk_iocost_init(struct gendisk *disk)
 	 * called before policy activation completion, can't assume that the
 	 * target bio has an iocg associated and need to test for NULL iocg.
 	 */
-	ret = rq_qos_add(q, rqos);
+	ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops);
 	if (ret)
 		goto err_free_ioc;
 
-	ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
+	ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iocost);
 	if (ret)
 		goto err_del_qos;
 	return 0;
 
 err_del_qos:
-	rq_qos_del(q, rqos);
+	rq_qos_del(&ioc->rqos);
 err_free_ioc:
 	free_percpu(ioc->pcpu_stat);
 	kfree(ioc);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index b55eac2cf91944..1c394bd77aa0b4 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -757,24 +757,18 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
 
 int blk_iolatency_init(struct gendisk *disk)
 {
-	struct request_queue *q = disk->queue;
 	struct blk_iolatency *blkiolat;
-	struct rq_qos *rqos;
 	int ret;
 
 	blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
 	if (!blkiolat)
 		return -ENOMEM;
 
-	rqos = &blkiolat->rqos;
-	rqos->id = RQ_QOS_LATENCY;
-	rqos->ops = &blkcg_iolatency_ops;
-	rqos->q = q;
-
-	ret = rq_qos_add(q, rqos);
+	ret = rq_qos_add(&blkiolat->rqos, disk, RQ_QOS_LATENCY,
+			 &blkcg_iolatency_ops);
 	if (ret)
 		goto err_free;
-	ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
+	ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iolatency);
 	if (ret)
 		goto err_qos_del;
 
@@ -784,7 +778,7 @@ int blk_iolatency_init(struct gendisk *disk)
 	return 0;
 
 err_qos_del:
-	rq_qos_del(q, rqos);
+	rq_qos_del(&blkiolat->rqos);
 err_free:
 	kfree(blkiolat);
 	return ret;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 88f0fe7dcf5451..14bee1bd761362 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -294,3 +294,70 @@ void rq_qos_exit(struct request_queue *q)
 		rqos->ops->exit(rqos);
 	}
 }
+
+int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+		struct rq_qos_ops *ops)
+{
+	struct request_queue *q = disk->queue;
+
+	rqos->q = q;
+	rqos->id = id;
+	rqos->ops = ops;
+
+	/*
+	 * No IO can be in-flight when adding rqos, so freeze queue, which
+	 * is fine since we only support rq_qos for blk-mq queue.
+	 *
+	 * Reuse ->queue_lock for protecting against other concurrent
+	 * rq_qos adding/deleting
+	 */
+	blk_mq_freeze_queue(q);
+
+	spin_lock_irq(&q->queue_lock);
+	if (rq_qos_id(q, rqos->id))
+		goto ebusy;
+	rqos->next = q->rq_qos;
+	q->rq_qos = rqos;
+	spin_unlock_irq(&q->queue_lock);
+
+	blk_mq_unfreeze_queue(q);
+
+	if (rqos->ops->debugfs_attrs) {
+		mutex_lock(&q->debugfs_mutex);
+		blk_mq_debugfs_register_rqos(rqos);
+		mutex_unlock(&q->debugfs_mutex);
+	}
+
+	return 0;
+ebusy:
+	spin_unlock_irq(&q->queue_lock);
+	blk_mq_unfreeze_queue(q);
+	return -EBUSY;
+}
+
+void rq_qos_del(struct rq_qos *rqos)
+{
+	struct request_queue *q = rqos->q;
+	struct rq_qos **cur;
+
+	/*
+	 * See comment in rq_qos_add() about freezing queue & using
+	 * ->queue_lock.
+	 */
+	blk_mq_freeze_queue(q);
+
+	spin_lock_irq(&q->queue_lock);
+	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
+		if (*cur == rqos) {
+			*cur = rqos->next;
+			break;
+		}
+	}
+	spin_unlock_irq(&q->queue_lock);
+
+	blk_mq_unfreeze_queue(q);
+
+	mutex_lock(&q->debugfs_mutex);
+	blk_mq_debugfs_unregister_rqos(rqos);
+	mutex_unlock(&q->debugfs_mutex);
+}
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 1ef1f7d4bc3cbc..22552785aa31ed 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -85,65 +85,9 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
 	init_waitqueue_head(&rq_wait->wait);
 }
 
-static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
-{
-	/*
-	 * No IO can be in-flight when adding rqos, so freeze queue, which
-	 * is fine since we only support rq_qos for blk-mq queue.
-	 *
-	 * Reuse ->queue_lock for protecting against other concurrent
-	 * rq_qos adding/deleting
-	 */
-	blk_mq_freeze_queue(q);
-
-	spin_lock_irq(&q->queue_lock);
-	if (rq_qos_id(q, rqos->id))
-		goto ebusy;
-	rqos->next = q->rq_qos;
-	q->rq_qos = rqos;
-	spin_unlock_irq(&q->queue_lock);
-
-	blk_mq_unfreeze_queue(q);
-
-	if (rqos->ops->debugfs_attrs) {
-		mutex_lock(&q->debugfs_mutex);
-		blk_mq_debugfs_register_rqos(rqos);
-		mutex_unlock(&q->debugfs_mutex);
-	}
-
-	return 0;
-ebusy:
-	spin_unlock_irq(&q->queue_lock);
-	blk_mq_unfreeze_queue(q);
-	return -EBUSY;
-
-}
-
-static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
-{
-	struct rq_qos **cur;
-
-	/*
-	 * See comment in rq_qos_add() about freezing queue & using
-	 * ->queue_lock.
-	 */
-	blk_mq_freeze_queue(q);
-
-	spin_lock_irq(&q->queue_lock);
-	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
-		if (*cur == rqos) {
-			*cur = rqos->next;
-			break;
-		}
-	}
-	spin_unlock_irq(&q->queue_lock);
-
-	blk_mq_unfreeze_queue(q);
-
-	mutex_lock(&q->debugfs_mutex);
-	blk_mq_debugfs_unregister_rqos(rqos);
-	mutex_unlock(&q->debugfs_mutex);
-}
+int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
+		struct rq_qos_ops *ops);
+void rq_qos_del(struct rq_qos *rqos);
 
 typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
 typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 473ae72befaf1a..0b339850fb761a 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -855,9 +855,6 @@ int wbt_init(struct gendisk *disk)
 	for (i = 0; i < WBT_NUM_RWQ; i++)
 		rq_wait_init(&rwb->rq_wait[i]);
 
-	rwb->rqos.id = RQ_QOS_WBT;
-	rwb->rqos.ops = &wbt_rqos_ops;
-	rwb->rqos.q = q;
 	rwb->last_comp = rwb->last_issue = jiffies;
 	rwb->win_nsec = RWB_WINDOW_NSEC;
 	rwb->enable_state = WBT_STATE_ON_DEFAULT;
@@ -870,7 +867,7 @@ int wbt_init(struct gendisk *disk)
 	/*
 	 * Assign rwb and add the stats callback.
 	 */
-	ret = rq_qos_add(q, &rwb->rqos);
+	ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
 	if (ret)
 		goto err_free;
 
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 10/15] blk-rq-qos: constify rq_qos_ops
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

These op vectors are constant, so mark them const.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/blk-iocost.c    | 2 +-
 block/blk-iolatency.c | 2 +-
 block/blk-rq-qos.c    | 2 +-
 block/blk-rq-qos.h    | 4 ++--
 block/blk-wbt.c       | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 5f28463cba0afe..6f1da7883905b3 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2832,7 +2832,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
 	kfree(ioc);
 }
 
-static struct rq_qos_ops ioc_rqos_ops = {
+static const struct rq_qos_ops ioc_rqos_ops = {
 	.throttle = ioc_rqos_throttle,
 	.merge = ioc_rqos_merge,
 	.done_bio = ioc_rqos_done_bio,
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 1c394bd77aa0b4..f6aeb3d3fdae59 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -650,7 +650,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
 	kfree(blkiolat);
 }
 
-static struct rq_qos_ops blkcg_iolatency_ops = {
+static const struct rq_qos_ops blkcg_iolatency_ops = {
 	.throttle = blkcg_iolatency_throttle,
 	.done_bio = blkcg_iolatency_done_bio,
 	.exit = blkcg_iolatency_exit,
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 14bee1bd761362..8e83734cfe8dbc 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -296,7 +296,7 @@ void rq_qos_exit(struct request_queue *q)
 }
 
 int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
-		struct rq_qos_ops *ops)
+		const struct rq_qos_ops *ops)
 {
 	struct request_queue *q = disk->queue;
 
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 22552785aa31ed..2b7b668479f71a 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -25,7 +25,7 @@ struct rq_wait {
 };
 
 struct rq_qos {
-	struct rq_qos_ops *ops;
+	const struct rq_qos_ops *ops;
 	struct request_queue *q;
 	enum rq_qos_id id;
 	struct rq_qos *next;
@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
 }
 
 int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
-		struct rq_qos_ops *ops);
+		const struct rq_qos_ops *ops);
 void rq_qos_del(struct rq_qos *rqos);
 
 typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 0b339850fb761a..43be4379cd097d 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -821,7 +821,7 @@ static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
 };
 #endif
 
-static struct rq_qos_ops wbt_rqos_ops = {
+static const struct rq_qos_ops wbt_rqos_ops = {
 	.throttle = wbt_wait,
 	.issue = wbt_issue,
 	.track = wbt_track,
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 10/15] blk-rq-qos: constify rq_qos_ops
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

These op vectors are constant, so mark them const.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Andreas Herrmann <aherrmann-l3A5Bk7waGM@public.gmane.org>
---
 block/blk-iocost.c    | 2 +-
 block/blk-iolatency.c | 2 +-
 block/blk-rq-qos.c    | 2 +-
 block/blk-rq-qos.h    | 4 ++--
 block/blk-wbt.c       | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 5f28463cba0afe..6f1da7883905b3 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2832,7 +2832,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
 	kfree(ioc);
 }
 
-static struct rq_qos_ops ioc_rqos_ops = {
+static const struct rq_qos_ops ioc_rqos_ops = {
 	.throttle = ioc_rqos_throttle,
 	.merge = ioc_rqos_merge,
 	.done_bio = ioc_rqos_done_bio,
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 1c394bd77aa0b4..f6aeb3d3fdae59 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -650,7 +650,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
 	kfree(blkiolat);
 }
 
-static struct rq_qos_ops blkcg_iolatency_ops = {
+static const struct rq_qos_ops blkcg_iolatency_ops = {
 	.throttle = blkcg_iolatency_throttle,
 	.done_bio = blkcg_iolatency_done_bio,
 	.exit = blkcg_iolatency_exit,
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 14bee1bd761362..8e83734cfe8dbc 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -296,7 +296,7 @@ void rq_qos_exit(struct request_queue *q)
 }
 
 int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
-		struct rq_qos_ops *ops)
+		const struct rq_qos_ops *ops)
 {
 	struct request_queue *q = disk->queue;
 
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 22552785aa31ed..2b7b668479f71a 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -25,7 +25,7 @@ struct rq_wait {
 };
 
 struct rq_qos {
-	struct rq_qos_ops *ops;
+	const struct rq_qos_ops *ops;
 	struct request_queue *q;
 	enum rq_qos_id id;
 	struct rq_qos *next;
@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
 }
 
 int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
-		struct rq_qos_ops *ops);
+		const struct rq_qos_ops *ops);
 void rq_qos_del(struct rq_qos *rqos);
 
 typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 0b339850fb761a..43be4379cd097d 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -821,7 +821,7 @@ static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
 };
 #endif
 
-static struct rq_qos_ops wbt_rqos_ops = {
+static const struct rq_qos_ops wbt_rqos_ops = {
 	.throttle = wbt_wait,
 	.issue = wbt_issue,
 	.track = wbt_track,
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 11/15] blk-rq-qos: store a gendisk instead of request_queue in struct rq_qos
  2023-01-24  6:57 switch blk-cgroup to work on gendisk v2 Christoph Hellwig
                   ` (9 preceding siblings ...)
  2023-01-24  6:57   ` Christoph Hellwig
@ 2023-01-24  6:57 ` Christoph Hellwig
  2023-01-24  6:57   ` Christoph Hellwig
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

This is what about half of the users already want, and it's only going to
grow more.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/blk-iocost.c     | 12 ++++++------
 block/blk-iolatency.c  | 14 +++++++-------
 block/blk-mq-debugfs.c | 10 ++++------
 block/blk-rq-qos.c     |  4 ++--
 block/blk-rq-qos.h     |  2 +-
 block/blk-wbt.c        | 16 +++++++---------
 6 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 6f1da7883905b3..a2e9bf30039bcd 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -669,7 +669,7 @@ static struct ioc *q_to_ioc(struct request_queue *q)
 
 static const char __maybe_unused *ioc_name(struct ioc *ioc)
 {
-	struct gendisk *disk = ioc->rqos.q->disk;
+	struct gendisk *disk = ioc->rqos.disk;
 
 	if (!disk)
 		return "<unknown>";
@@ -808,11 +808,11 @@ static int ioc_autop_idx(struct ioc *ioc)
 	u64 now_ns;
 
 	/* rotational? */
-	if (!blk_queue_nonrot(ioc->rqos.q))
+	if (!blk_queue_nonrot(ioc->rqos.disk->queue))
 		return AUTOP_HDD;
 
 	/* handle SATA SSDs w/ broken NCQ */
-	if (blk_queue_depth(ioc->rqos.q) == 1)
+	if (blk_queue_depth(ioc->rqos.disk->queue) == 1)
 		return AUTOP_SSD_QD1;
 
 	/* use one of the normal ssd sets */
@@ -2649,7 +2649,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 	if (use_debt) {
 		iocg_incur_debt(iocg, abs_cost, &now);
 		if (iocg_kick_delay(iocg, &now))
-			blkcg_schedule_throttle(rqos->q->disk,
+			blkcg_schedule_throttle(rqos->disk,
 					(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
 		iocg_unlock(iocg, ioc_locked, &flags);
 		return;
@@ -2750,7 +2750,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
 	if (likely(!list_empty(&iocg->active_list))) {
 		iocg_incur_debt(iocg, abs_cost, &now);
 		if (iocg_kick_delay(iocg, &now))
-			blkcg_schedule_throttle(rqos->q->disk,
+			blkcg_schedule_throttle(rqos->disk,
 					(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
 	} else {
 		iocg_commit_bio(iocg, bio, abs_cost, cost);
@@ -2821,7 +2821,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
 {
 	struct ioc *ioc = rqos_to_ioc(rqos);
 
-	blkcg_deactivate_policy(rqos->q, &blkcg_policy_iocost);
+	blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iocost);
 
 	spin_lock_irq(&ioc->lock);
 	ioc->running = IOC_STOP;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index f6aeb3d3fdae59..8e1e43bbde6f0b 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -292,7 +292,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
 	unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
 
 	if (use_delay)
-		blkcg_schedule_throttle(rqos->q->disk, use_memdelay);
+		blkcg_schedule_throttle(rqos->disk, use_memdelay);
 
 	/*
 	 * To avoid priority inversions we want to just take a slot if we are
@@ -330,7 +330,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
 				struct child_latency_info *lat_info,
 				bool up)
 {
-	unsigned long qd = blkiolat->rqos.q->nr_requests;
+	unsigned long qd = blkiolat->rqos.disk->queue->nr_requests;
 	unsigned long scale = scale_amount(qd, up);
 	unsigned long old = atomic_read(&lat_info->scale_cookie);
 	unsigned long max_scale = qd << 1;
@@ -372,7 +372,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
  */
 static void scale_change(struct iolatency_grp *iolat, bool up)
 {
-	unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
+	unsigned long qd = iolat->blkiolat->rqos.disk->queue->nr_requests;
 	unsigned long scale = scale_amount(qd, up);
 	unsigned long old = iolat->max_depth;
 
@@ -646,7 +646,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
 
 	timer_shutdown_sync(&blkiolat->timer);
 	flush_work(&blkiolat->enable_work);
-	blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
+	blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iolatency);
 	kfree(blkiolat);
 }
 
@@ -665,7 +665,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
 
 	rcu_read_lock();
 	blkg_for_each_descendant_pre(blkg, pos_css,
-				     blkiolat->rqos.q->root_blkg) {
+				     blkiolat->rqos.disk->queue->root_blkg) {
 		struct iolatency_grp *iolat;
 		struct child_latency_info *lat_info;
 		unsigned long flags;
@@ -749,9 +749,9 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
 	 */
 	enabled = atomic_read(&blkiolat->enable_cnt);
 	if (enabled != blkiolat->enabled) {
-		blk_mq_freeze_queue(blkiolat->rqos.q);
+		blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
 		blkiolat->enabled = enabled;
-		blk_mq_unfreeze_queue(blkiolat->rqos.q);
+		blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue);
 	}
 }
 
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index bd942341b6382f..b01818f8e216e3 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -813,9 +813,9 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id)
 
 void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
 {
-	lockdep_assert_held(&rqos->q->debugfs_mutex);
+	lockdep_assert_held(&rqos->disk->queue->debugfs_mutex);
 
-	if (!rqos->q->debugfs_dir)
+	if (!rqos->disk->queue->debugfs_dir)
 		return;
 	debugfs_remove_recursive(rqos->debugfs_dir);
 	rqos->debugfs_dir = NULL;
@@ -823,7 +823,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
 
 void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 {
-	struct request_queue *q = rqos->q;
+	struct request_queue *q = rqos->disk->queue;
 	const char *dir_name = rq_qos_id_to_name(rqos->id);
 
 	lockdep_assert_held(&q->debugfs_mutex);
@@ -835,9 +835,7 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 		q->rqos_debugfs_dir = debugfs_create_dir("rqos",
 							 q->debugfs_dir);
 
-	rqos->debugfs_dir = debugfs_create_dir(dir_name,
-					       rqos->q->rqos_debugfs_dir);
-
+	rqos->debugfs_dir = debugfs_create_dir(dir_name, q->rqos_debugfs_dir);
 	debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
 }
 
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 8e83734cfe8dbc..d8cc820a365e3a 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -300,7 +300,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 {
 	struct request_queue *q = disk->queue;
 
-	rqos->q = q;
+	rqos->disk = disk;
 	rqos->id = id;
 	rqos->ops = ops;
 
@@ -337,7 +337,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 
 void rq_qos_del(struct rq_qos *rqos)
 {
-	struct request_queue *q = rqos->q;
+	struct request_queue *q = rqos->disk->queue;
 	struct rq_qos **cur;
 
 	/*
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 2b7b668479f71a..b02a1a3d33a89e 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -26,7 +26,7 @@ struct rq_wait {
 
 struct rq_qos {
 	const struct rq_qos_ops *ops;
-	struct request_queue *q;
+	struct gendisk *disk;
 	enum rq_qos_id id;
 	struct rq_qos *next;
 #ifdef CONFIG_BLK_DEBUG_FS
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 43be4379cd097d..8b768761871b61 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -98,7 +98,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
  */
 static bool wb_recent_wait(struct rq_wb *rwb)
 {
-	struct bdi_writeback *wb = &rwb->rqos.q->disk->bdi->wb;
+	struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
 
 	return time_before(jiffies, wb->dirty_sleep + HZ);
 }
@@ -235,7 +235,7 @@ enum {
 
 static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 {
-	struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
+	struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
 	struct rq_depth *rqd = &rwb->rq_depth;
 	u64 thislat;
 
@@ -288,7 +288,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 
 static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
 {
-	struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
+	struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
 	struct rq_depth *rqd = &rwb->rq_depth;
 
 	trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
@@ -358,13 +358,12 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
 	unsigned int inflight = wbt_inflight(rwb);
 	int status;
 
-	if (!rwb->rqos.q->disk)
+	if (!rwb->rqos.disk)
 		return;
 
 	status = latency_exceeded(rwb, cb->stat);
 
-	trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step,
-			inflight);
+	trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight);
 
 	/*
 	 * If we exceeded the latency target, step down. If we did not,
@@ -702,16 +701,15 @@ static int wbt_data_dir(const struct request *rq)
 
 static void wbt_queue_depth_changed(struct rq_qos *rqos)
 {
-	RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
+	RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue);
 	wbt_update_limits(RQWB(rqos));
 }
 
 static void wbt_exit(struct rq_qos *rqos)
 {
 	struct rq_wb *rwb = RQWB(rqos);
-	struct request_queue *q = rqos->q;
 
-	blk_stat_remove_callback(q, rwb->cb);
+	blk_stat_remove_callback(rqos->disk->queue, rwb->cb);
 	blk_stat_free_callback(rwb->cb);
 	kfree(rwb);
 }
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 12/15] blk-cgroup: pass a gendisk to blkcg_{de,}activate_policy
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

Prepare for storing the blkcg information in the gendisk instead of
the request_queue.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/bfq-cgroup.c    |  2 +-
 block/bfq-iosched.c   |  2 +-
 block/blk-cgroup.c    | 21 +++++++++++----------
 block/blk-cgroup.h    |  9 ++++-----
 block/blk-iocost.c    |  4 ++--
 block/blk-iolatency.c |  4 ++--
 block/blk-ioprio.c    |  4 ++--
 block/blk-throttle.c  |  4 ++--
 8 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 72a033776722c9..b1b8eca99d988f 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1293,7 +1293,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
 {
 	int ret;
 
-	ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
+	ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq);
 	if (ret)
 		return NULL;
 
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 68062243f2c142..eda3a838f3c3fd 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7155,7 +7155,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
 	bfqg_and_blkg_put(bfqd->root_group);
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
-	blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq);
+	blkcg_deactivate_policy(bfqd->queue->disk, &blkcg_policy_bfq);
 #else
 	spin_lock_irq(&bfqd->lock);
 	bfq_put_async_queues(bfqd, bfqd->root_group);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 603e911d1350db..43cd02631162f8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1362,14 +1362,14 @@ struct cgroup_subsys io_cgrp_subsys = {
 EXPORT_SYMBOL_GPL(io_cgrp_subsys);
 
 /**
- * blkcg_activate_policy - activate a blkcg policy on a request_queue
- * @q: request_queue of interest
+ * blkcg_activate_policy - activate a blkcg policy on a gendisk
+ * @disk: gendisk of interest
  * @pol: blkcg policy to activate
  *
- * Activate @pol on @q.  Requires %GFP_KERNEL context.  @q goes through
+ * Activate @pol on @disk.  Requires %GFP_KERNEL context.  @disk goes through
  * bypass mode to populate its blkgs with policy_data for @pol.
  *
- * Activation happens with @q bypassed, so nobody would be accessing blkgs
+ * Activation happens with @disk bypassed, so nobody would be accessing blkgs
  * from IO path.  Update of each blkg is protected by both queue and blkcg
  * locks so that holding either lock and testing blkcg_policy_enabled() is
  * always enough for dereferencing policy data.
@@ -1377,9 +1377,9 @@ EXPORT_SYMBOL_GPL(io_cgrp_subsys);
  * The caller is responsible for synchronizing [de]activations and policy
  * [un]registerations.  Returns 0 on success, -errno on failure.
  */
-int blkcg_activate_policy(struct request_queue *q,
-			  const struct blkcg_policy *pol)
+int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 {
+	struct request_queue *q = disk->queue;
 	struct blkg_policy_data *pd_prealloc = NULL;
 	struct blkcg_gq *blkg, *pinned_blkg = NULL;
 	int ret;
@@ -1473,16 +1473,17 @@ int blkcg_activate_policy(struct request_queue *q,
 EXPORT_SYMBOL_GPL(blkcg_activate_policy);
 
 /**
- * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
- * @q: request_queue of interest
+ * blkcg_deactivate_policy - deactivate a blkcg policy on a gendisk
+ * @disk: gendisk of interest
  * @pol: blkcg policy to deactivate
  *
- * Deactivate @pol on @q.  Follows the same synchronization rules as
+ * Deactivate @pol on @disk.  Follows the same synchronization rules as
  * blkcg_activate_policy().
  */
-void blkcg_deactivate_policy(struct request_queue *q,
+void blkcg_deactivate_policy(struct gendisk *disk,
 			     const struct blkcg_policy *pol)
 {
+	struct request_queue *q = disk->queue;
 	struct blkcg_gq *blkg;
 
 	if (!blkcg_policy_enabled(q, pol))
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 85b267234823ab..e9e0c00d13d64d 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -190,9 +190,8 @@ void blkcg_exit_disk(struct gendisk *disk);
 /* Blkio controller policy registration */
 int blkcg_policy_register(struct blkcg_policy *pol);
 void blkcg_policy_unregister(struct blkcg_policy *pol);
-int blkcg_activate_policy(struct request_queue *q,
-			  const struct blkcg_policy *pol);
-void blkcg_deactivate_policy(struct request_queue *q,
+int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol);
+void blkcg_deactivate_policy(struct gendisk *disk,
 			     const struct blkcg_policy *pol);
 
 const char *blkg_dev_name(struct blkcg_gq *blkg);
@@ -491,9 +490,9 @@ static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
 static inline void blkcg_exit_disk(struct gendisk *disk) { }
 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
-static inline int blkcg_activate_policy(struct request_queue *q,
+static inline int blkcg_activate_policy(struct gendisk *disk,
 					const struct blkcg_policy *pol) { return 0; }
-static inline void blkcg_deactivate_policy(struct request_queue *q,
+static inline void blkcg_deactivate_policy(struct gendisk *disk,
 					   const struct blkcg_policy *pol) { }
 
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a2e9bf30039bcd..078b7770951962 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2821,7 +2821,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
 {
 	struct ioc *ioc = rqos_to_ioc(rqos);
 
-	blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iocost);
+	blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iocost);
 
 	spin_lock_irq(&ioc->lock);
 	ioc->running = IOC_STOP;
@@ -2893,7 +2893,7 @@ static int blk_iocost_init(struct gendisk *disk)
 	if (ret)
 		goto err_free_ioc;
 
-	ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iocost);
+	ret = blkcg_activate_policy(disk, &blkcg_policy_iocost);
 	if (ret)
 		goto err_del_qos;
 	return 0;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 8e1e43bbde6f0b..39853fc5c2b02f 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -646,7 +646,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
 
 	timer_shutdown_sync(&blkiolat->timer);
 	flush_work(&blkiolat->enable_work);
-	blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iolatency);
+	blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iolatency);
 	kfree(blkiolat);
 }
 
@@ -768,7 +768,7 @@ int blk_iolatency_init(struct gendisk *disk)
 			 &blkcg_iolatency_ops);
 	if (ret)
 		goto err_free;
-	ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iolatency);
+	ret = blkcg_activate_policy(disk, &blkcg_policy_iolatency);
 	if (ret)
 		goto err_qos_del;
 
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 8bb6b8eba4cee8..8194826cc824bc 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -204,12 +204,12 @@ void blkcg_set_ioprio(struct bio *bio)
 
 void blk_ioprio_exit(struct gendisk *disk)
 {
-	blkcg_deactivate_policy(disk->queue, &ioprio_policy);
+	blkcg_deactivate_policy(disk, &ioprio_policy);
 }
 
 int blk_ioprio_init(struct gendisk *disk)
 {
-	return blkcg_activate_policy(disk->queue, &ioprio_policy);
+	return blkcg_activate_policy(disk, &ioprio_policy);
 }
 
 static int __init ioprio_init(void)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f802d8f9099430..efc0a9092c6942 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2395,7 +2395,7 @@ int blk_throtl_init(struct gendisk *disk)
 	td->low_downgrade_time = jiffies;
 
 	/* activate policy */
-	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
+	ret = blkcg_activate_policy(disk, &blkcg_policy_throtl);
 	if (ret) {
 		free_percpu(td->latency_buckets[READ]);
 		free_percpu(td->latency_buckets[WRITE]);
@@ -2411,7 +2411,7 @@ void blk_throtl_exit(struct gendisk *disk)
 	BUG_ON(!q->td);
 	del_timer_sync(&q->td->service_queue.pending_timer);
 	throtl_shutdown_wq(q);
-	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
+	blkcg_deactivate_policy(disk, &blkcg_policy_throtl);
 	free_percpu(q->td->latency_buckets[READ]);
 	free_percpu(q->td->latency_buckets[WRITE]);
 	kfree(q->td);
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 12/15] blk-cgroup: pass a gendisk to blkcg_{de,}activate_policy
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

Prepare for storing the blkcg information in the gendisk instead of
the request_queue.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Andreas Herrmann <aherrmann-l3A5Bk7waGM@public.gmane.org>
---
 block/bfq-cgroup.c    |  2 +-
 block/bfq-iosched.c   |  2 +-
 block/blk-cgroup.c    | 21 +++++++++++----------
 block/blk-cgroup.h    |  9 ++++-----
 block/blk-iocost.c    |  4 ++--
 block/blk-iolatency.c |  4 ++--
 block/blk-ioprio.c    |  4 ++--
 block/blk-throttle.c  |  4 ++--
 8 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 72a033776722c9..b1b8eca99d988f 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1293,7 +1293,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
 {
 	int ret;
 
-	ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
+	ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq);
 	if (ret)
 		return NULL;
 
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 68062243f2c142..eda3a838f3c3fd 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7155,7 +7155,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
 	bfqg_and_blkg_put(bfqd->root_group);
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
-	blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq);
+	blkcg_deactivate_policy(bfqd->queue->disk, &blkcg_policy_bfq);
 #else
 	spin_lock_irq(&bfqd->lock);
 	bfq_put_async_queues(bfqd, bfqd->root_group);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 603e911d1350db..43cd02631162f8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1362,14 +1362,14 @@ struct cgroup_subsys io_cgrp_subsys = {
 EXPORT_SYMBOL_GPL(io_cgrp_subsys);
 
 /**
- * blkcg_activate_policy - activate a blkcg policy on a request_queue
- * @q: request_queue of interest
+ * blkcg_activate_policy - activate a blkcg policy on a gendisk
+ * @disk: gendisk of interest
  * @pol: blkcg policy to activate
  *
- * Activate @pol on @q.  Requires %GFP_KERNEL context.  @q goes through
+ * Activate @pol on @disk.  Requires %GFP_KERNEL context.  @disk goes through
  * bypass mode to populate its blkgs with policy_data for @pol.
  *
- * Activation happens with @q bypassed, so nobody would be accessing blkgs
+ * Activation happens with @disk bypassed, so nobody would be accessing blkgs
  * from IO path.  Update of each blkg is protected by both queue and blkcg
  * locks so that holding either lock and testing blkcg_policy_enabled() is
  * always enough for dereferencing policy data.
@@ -1377,9 +1377,9 @@ EXPORT_SYMBOL_GPL(io_cgrp_subsys);
  * The caller is responsible for synchronizing [de]activations and policy
  * [un]registerations.  Returns 0 on success, -errno on failure.
  */
-int blkcg_activate_policy(struct request_queue *q,
-			  const struct blkcg_policy *pol)
+int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 {
+	struct request_queue *q = disk->queue;
 	struct blkg_policy_data *pd_prealloc = NULL;
 	struct blkcg_gq *blkg, *pinned_blkg = NULL;
 	int ret;
@@ -1473,16 +1473,17 @@ int blkcg_activate_policy(struct request_queue *q,
 EXPORT_SYMBOL_GPL(blkcg_activate_policy);
 
 /**
- * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
- * @q: request_queue of interest
+ * blkcg_deactivate_policy - deactivate a blkcg policy on a gendisk
+ * @disk: gendisk of interest
  * @pol: blkcg policy to deactivate
  *
- * Deactivate @pol on @q.  Follows the same synchronization rules as
+ * Deactivate @pol on @disk.  Follows the same synchronization rules as
  * blkcg_activate_policy().
  */
-void blkcg_deactivate_policy(struct request_queue *q,
+void blkcg_deactivate_policy(struct gendisk *disk,
 			     const struct blkcg_policy *pol)
 {
+	struct request_queue *q = disk->queue;
 	struct blkcg_gq *blkg;
 
 	if (!blkcg_policy_enabled(q, pol))
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 85b267234823ab..e9e0c00d13d64d 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -190,9 +190,8 @@ void blkcg_exit_disk(struct gendisk *disk);
 /* Blkio controller policy registration */
 int blkcg_policy_register(struct blkcg_policy *pol);
 void blkcg_policy_unregister(struct blkcg_policy *pol);
-int blkcg_activate_policy(struct request_queue *q,
-			  const struct blkcg_policy *pol);
-void blkcg_deactivate_policy(struct request_queue *q,
+int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol);
+void blkcg_deactivate_policy(struct gendisk *disk,
 			     const struct blkcg_policy *pol);
 
 const char *blkg_dev_name(struct blkcg_gq *blkg);
@@ -491,9 +490,9 @@ static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
 static inline void blkcg_exit_disk(struct gendisk *disk) { }
 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
-static inline int blkcg_activate_policy(struct request_queue *q,
+static inline int blkcg_activate_policy(struct gendisk *disk,
 					const struct blkcg_policy *pol) { return 0; }
-static inline void blkcg_deactivate_policy(struct request_queue *q,
+static inline void blkcg_deactivate_policy(struct gendisk *disk,
 					   const struct blkcg_policy *pol) { }
 
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a2e9bf30039bcd..078b7770951962 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2821,7 +2821,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
 {
 	struct ioc *ioc = rqos_to_ioc(rqos);
 
-	blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iocost);
+	blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iocost);
 
 	spin_lock_irq(&ioc->lock);
 	ioc->running = IOC_STOP;
@@ -2893,7 +2893,7 @@ static int blk_iocost_init(struct gendisk *disk)
 	if (ret)
 		goto err_free_ioc;
 
-	ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iocost);
+	ret = blkcg_activate_policy(disk, &blkcg_policy_iocost);
 	if (ret)
 		goto err_del_qos;
 	return 0;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 8e1e43bbde6f0b..39853fc5c2b02f 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -646,7 +646,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
 
 	timer_shutdown_sync(&blkiolat->timer);
 	flush_work(&blkiolat->enable_work);
-	blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iolatency);
+	blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iolatency);
 	kfree(blkiolat);
 }
 
@@ -768,7 +768,7 @@ int blk_iolatency_init(struct gendisk *disk)
 			 &blkcg_iolatency_ops);
 	if (ret)
 		goto err_free;
-	ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iolatency);
+	ret = blkcg_activate_policy(disk, &blkcg_policy_iolatency);
 	if (ret)
 		goto err_qos_del;
 
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 8bb6b8eba4cee8..8194826cc824bc 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -204,12 +204,12 @@ void blkcg_set_ioprio(struct bio *bio)
 
 void blk_ioprio_exit(struct gendisk *disk)
 {
-	blkcg_deactivate_policy(disk->queue, &ioprio_policy);
+	blkcg_deactivate_policy(disk, &ioprio_policy);
 }
 
 int blk_ioprio_init(struct gendisk *disk)
 {
-	return blkcg_activate_policy(disk->queue, &ioprio_policy);
+	return blkcg_activate_policy(disk, &ioprio_policy);
 }
 
 static int __init ioprio_init(void)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f802d8f9099430..efc0a9092c6942 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2395,7 +2395,7 @@ int blk_throtl_init(struct gendisk *disk)
 	td->low_downgrade_time = jiffies;
 
 	/* activate policy */
-	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
+	ret = blkcg_activate_policy(disk, &blkcg_policy_throtl);
 	if (ret) {
 		free_percpu(td->latency_buckets[READ]);
 		free_percpu(td->latency_buckets[WRITE]);
@@ -2411,7 +2411,7 @@ void blk_throtl_exit(struct gendisk *disk)
 	BUG_ON(!q->td);
 	del_timer_sync(&q->td->service_queue.pending_timer);
 	throtl_shutdown_wq(q);
-	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
+	blkcg_deactivate_policy(disk, &blkcg_policy_throtl);
 	free_percpu(q->td->latency_buckets[READ]);
 	free_percpu(q->td->latency_buckets[WRITE]);
 	kfree(q->td);
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 13/15] blk-cgroup: pass a gendisk to pd_alloc_fn
  2023-01-24  6:57 switch blk-cgroup to work on gendisk v2 Christoph Hellwig
                   ` (11 preceding siblings ...)
  2023-01-24  6:57   ` Christoph Hellwig
@ 2023-01-24  6:57 ` Christoph Hellwig
  2023-01-24  6:57 ` [PATCH 14/15] blk-cgroup: pass a gendisk to blkg_lookup Christoph Hellwig
  2023-01-24  6:57   ` Christoph Hellwig
  14 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

No need to the request_queue here, pass a gendisk and extract the
node ids from that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/bfq-cgroup.c    |  6 +++---
 block/blk-cgroup.c    | 10 +++++-----
 block/blk-cgroup.h    |  4 ++--
 block/blk-iocost.c    |  7 ++++---
 block/blk-iolatency.c |  7 +++----
 block/blk-ioprio.c    |  2 +-
 block/blk-throttle.c  |  7 +++----
 7 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index b1b8eca99d988f..055f9684c1c502 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -515,12 +515,12 @@ static void bfq_cpd_free(struct blkcg_policy_data *cpd)
 	kfree(cpd_to_bfqgd(cpd));
 }
 
-static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
-					     struct blkcg *blkcg)
+static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk,
+		struct blkcg *blkcg, gfp_t gfp)
 {
 	struct bfq_group *bfqg;
 
-	bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
+	bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id);
 	if (!bfqg)
 		return NULL;
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 43cd02631162f8..2fa42b5b538069 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -268,7 +268,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
 			continue;
 
 		/* alloc per-policy data and attach it to blkg */
-		pd = pol->pd_alloc_fn(gfp_mask, disk->queue, blkcg);
+		pd = pol->pd_alloc_fn(disk, blkcg, gfp_mask);
 		if (!pd)
 			goto err_free;
 
@@ -1404,8 +1404,8 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 			pd = pd_prealloc;
 			pd_prealloc = NULL;
 		} else {
-			pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
-					      blkg->blkcg);
+			pd = pol->pd_alloc_fn(disk, blkg->blkcg,
+					      GFP_NOWAIT | __GFP_NOWARN);
 		}
 
 		if (!pd) {
@@ -1422,8 +1422,8 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 
 			if (pd_prealloc)
 				pol->pd_free_fn(pd_prealloc);
-			pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
-						       blkg->blkcg);
+			pd_prealloc = pol->pd_alloc_fn(disk, blkg->blkcg,
+						       GFP_KERNEL);
 			if (pd_prealloc)
 				goto retry;
 			else
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index e9e0c00d13d64d..9a2cd3c71a94a2 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -150,8 +150,8 @@ typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
-typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
-				struct request_queue *q, struct blkcg *blkcg);
+typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(struct gendisk *disk,
+		struct blkcg *blkcg, gfp_t gfp);
 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 078b7770951962..7a2dc9dc8e3ba0 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2923,13 +2923,14 @@ static void ioc_cpd_free(struct blkcg_policy_data *cpd)
 	kfree(container_of(cpd, struct ioc_cgrp, cpd));
 }
 
-static struct blkg_policy_data *ioc_pd_alloc(gfp_t gfp, struct request_queue *q,
-					     struct blkcg *blkcg)
+static struct blkg_policy_data *ioc_pd_alloc(struct gendisk *disk,
+		struct blkcg *blkcg, gfp_t gfp)
 {
 	int levels = blkcg->css.cgroup->level + 1;
 	struct ioc_gq *iocg;
 
-	iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, q->node);
+	iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp,
+			    disk->node_id);
 	if (!iocg)
 		return NULL;
 
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 39853fc5c2b02f..bc0d217f5c1723 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -946,13 +946,12 @@ static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
 			iolat->max_depth, avg_lat, cur_win);
 }
 
-static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
-						   struct request_queue *q,
-						   struct blkcg *blkcg)
+static struct blkg_policy_data *iolatency_pd_alloc(struct gendisk *disk,
+		struct blkcg *blkcg, gfp_t gfp)
 {
 	struct iolatency_grp *iolat;
 
-	iolat = kzalloc_node(sizeof(*iolat), gfp, q->node);
+	iolat = kzalloc_node(sizeof(*iolat), gfp, disk->node_id);
 	if (!iolat)
 		return NULL;
 	iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 8194826cc824bc..055529b9b92bab 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -116,7 +116,7 @@ static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
 }
 
 static struct blkg_policy_data *
-ioprio_alloc_pd(gfp_t gfp, struct request_queue *q, struct blkcg *blkcg)
+ioprio_alloc_pd(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp)
 {
 	struct ioprio_blkg *ioprio_blkg;
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index efc0a9092c6942..74bb1e753ea09d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -335,14 +335,13 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq)
 	timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
 }
 
-static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
-						struct request_queue *q,
-						struct blkcg *blkcg)
+static struct blkg_policy_data *throtl_pd_alloc(struct gendisk *disk,
+		struct blkcg *blkcg, gfp_t gfp)
 {
 	struct throtl_grp *tg;
 	int rw;
 
-	tg = kzalloc_node(sizeof(*tg), gfp, q->node);
+	tg = kzalloc_node(sizeof(*tg), gfp, disk->node_id);
 	if (!tg)
 		return NULL;
 
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 14/15] blk-cgroup: pass a gendisk to blkg_lookup
  2023-01-24  6:57 switch blk-cgroup to work on gendisk v2 Christoph Hellwig
                   ` (12 preceding siblings ...)
  2023-01-24  6:57 ` [PATCH 13/15] blk-cgroup: pass a gendisk to pd_alloc_fn Christoph Hellwig
@ 2023-01-24  6:57 ` Christoph Hellwig
  2023-01-24  6:57   ` Christoph Hellwig
  14 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

Pass a gendisk to blkg_lookup and use that to find the match as part
of phasing out usage of the request_queue in the blk-cgroup code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/blk-cgroup.c | 16 ++++++++--------
 block/blk-cgroup.h | 20 ++++++++++----------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2fa42b5b538069..b9883ea81ad0da 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -320,7 +320,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
 
 	/* link parent */
 	if (blkcg_parent(blkcg)) {
-		blkg->parent = blkg_lookup(blkcg_parent(blkcg), disk->queue);
+		blkg->parent = blkg_lookup(blkcg_parent(blkcg), disk);
 		if (WARN_ON_ONCE(!blkg->parent)) {
 			ret = -ENODEV;
 			goto err_put_css;
@@ -389,12 +389,12 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
-	blkg = blkg_lookup(blkcg, q);
+	blkg = blkg_lookup(blkcg, disk);
 	if (blkg)
 		return blkg;
 
 	spin_lock_irqsave(&q->queue_lock, flags);
-	blkg = blkg_lookup(blkcg, q);
+	blkg = blkg_lookup(blkcg, disk);
 	if (blkg) {
 		if (blkcg != &blkcg_root &&
 		    blkg != rcu_dereference(blkcg->blkg_hint))
@@ -413,7 +413,7 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 		struct blkcg_gq *ret_blkg = q->root_blkg;
 
 		while (parent) {
-			blkg = blkg_lookup(parent, q);
+			blkg = blkg_lookup(parent, disk);
 			if (blkg) {
 				/* remember closest blkg */
 				ret_blkg = blkg;
@@ -692,7 +692,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		goto fail_unlock;
 	}
 
-	blkg = blkg_lookup(blkcg, q);
+	blkg = blkg_lookup(blkcg, disk);
 	if (blkg)
 		goto success;
 
@@ -706,7 +706,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		struct blkcg_gq *new_blkg;
 
 		parent = blkcg_parent(blkcg);
-		while (parent && !blkg_lookup(parent, q)) {
+		while (parent && !blkg_lookup(parent, disk)) {
 			pos = parent;
 			parent = blkcg_parent(parent);
 		}
@@ -736,7 +736,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 			goto fail_preloaded;
 		}
 
-		blkg = blkg_lookup(pos, q);
+		blkg = blkg_lookup(pos, disk);
 		if (blkg) {
 			blkg_free(new_blkg);
 		} else {
@@ -1804,7 +1804,7 @@ void blkcg_maybe_throttle_current(void)
 	blkcg = css_to_blkcg(blkcg_css());
 	if (!blkcg)
 		goto out;
-	blkg = blkg_lookup(blkcg, disk->queue);
+	blkg = blkg_lookup(blkcg, disk);
 	if (!blkg)
 		goto out;
 	if (!blkg_tryget(blkg))
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 9a2cd3c71a94a2..3e7508907f33d8 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -230,30 +230,30 @@ static inline bool bio_issue_as_root_blkg(struct bio *bio)
 }
 
 /**
- * blkg_lookup - lookup blkg for the specified blkcg - q pair
+ * blkg_lookup - lookup blkg for the specified blkcg - disk pair
  * @blkcg: blkcg of interest
- * @q: request_queue of interest
+ * @disk: gendisk of interest
  *
- * Lookup blkg for the @blkcg - @q pair.
+ * Lookup blkg for the @blkcg - @disk pair.
 
  * Must be called in a RCU critical section.
  */
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
-					   struct request_queue *q)
+					   struct gendisk *disk)
 {
 	struct blkcg_gq *blkg;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	if (blkcg == &blkcg_root)
-		return q->root_blkg;
+		return disk->queue->root_blkg;
 
 	blkg = rcu_dereference(blkcg->blkg_hint);
-	if (blkg && blkg->disk->queue == q)
+	if (blkg && blkg->disk == disk)
 		return blkg;
 
-	blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
-	if (blkg && blkg->disk->queue != q)
+	blkg = radix_tree_lookup(&blkcg->blkg_tree, disk->queue->id);
+	if (blkg && blkg->disk != disk)
 		blkg = NULL;
 	return blkg;
 }
@@ -353,7 +353,7 @@ static inline void blkg_put(struct blkcg_gq *blkg)
 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
 	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
 		if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css),	\
-					    (p_blkg)->disk->queue)))
+					    (p_blkg)->disk)))
 
 /**
  * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
@@ -368,7 +368,7 @@ static inline void blkg_put(struct blkcg_gq *blkg)
 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
 	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
 		if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css),	\
-					    (p_blkg)->disk->queue)))
+					    (p_blkg)->disk)))
 
 bool __blkcg_punt_bio_submit(struct bio *bio);
 
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 15/15] blk-cgroup: move the cgroup information to struct gendisk
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups, Andreas Herrmann

cgroup information only makes sense on a live gendisk that allows
file system I/O (which includes the raw block device).  So move over
the cgroup related members.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
---
 block/bfq-cgroup.c     |  4 ++--
 block/blk-cgroup.c     | 48 +++++++++++++++++++++---------------------
 block/blk-cgroup.h     |  2 +-
 block/blk-iolatency.c  |  2 +-
 block/blk-throttle.c   | 16 ++++++++------
 include/linux/blkdev.h | 10 ++++-----
 6 files changed, 43 insertions(+), 39 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 055f9684c1c502..c13ba851c5221a 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1003,7 +1003,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
 {
 	struct blkcg_gq *blkg;
 
-	list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
+	list_for_each_entry(blkg, &bfqd->queue->disk->blkg_list, q_node) {
 		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
 
 		bfq_end_wr_async_queues(bfqd, bfqg);
@@ -1297,7 +1297,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
 	if (ret)
 		return NULL;
 
-	return blkg_to_bfqg(bfqd->queue->root_blkg);
+	return blkg_to_bfqg(bfqd->queue->disk->root_blkg);
 }
 
 struct blkcg_policy blkcg_policy_bfq = {
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b9883ea81ad0da..6a0aa51668db62 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -108,10 +108,10 @@ static struct cgroup_subsys_state *blkcg_css(void)
 	return task_css(current, io_cgrp_id);
 }
 
-static bool blkcg_policy_enabled(struct request_queue *q,
+static bool blkcg_policy_enabled(struct gendisk *disk,
 				 const struct blkcg_policy *pol)
 {
-	return pol && test_bit(pol->plid, q->blkcg_pols);
+	return pol && test_bit(pol->plid, disk->blkcg_pols);
 }
 
 /**
@@ -264,7 +264,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
 		struct blkcg_policy *pol = blkcg_policy[i];
 		struct blkg_policy_data *pd;
 
-		if (!blkcg_policy_enabled(disk->queue, pol))
+		if (!blkcg_policy_enabled(disk, pol))
 			continue;
 
 		/* alloc per-policy data and attach it to blkg */
@@ -341,7 +341,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
 	ret = radix_tree_insert(&blkcg->blkg_tree, disk->queue->id, blkg);
 	if (likely(!ret)) {
 		hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
-		list_add(&blkg->q_node, &disk->queue->blkg_list);
+		list_add(&blkg->q_node, &disk->blkg_list);
 
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkcg_policy *pol = blkcg_policy[i];
@@ -410,7 +410,7 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	while (true) {
 		struct blkcg *pos = blkcg;
 		struct blkcg *parent = blkcg_parent(blkcg);
-		struct blkcg_gq *ret_blkg = q->root_blkg;
+		struct blkcg_gq *ret_blkg = disk->root_blkg;
 
 		while (parent) {
 			blkg = blkg_lookup(parent, disk);
@@ -485,7 +485,7 @@ static void blkg_destroy_all(struct gendisk *disk)
 
 restart:
 	spin_lock_irq(&q->queue_lock);
-	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
+	list_for_each_entry_safe(blkg, n, &disk->blkg_list, q_node) {
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
@@ -504,7 +504,7 @@ static void blkg_destroy_all(struct gendisk *disk)
 		}
 	}
 
-	q->root_blkg = NULL;
+	disk->root_blkg = NULL;
 	spin_unlock_irq(&q->queue_lock);
 }
 
@@ -579,7 +579,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
 		spin_lock_irq(&blkg->disk->queue->queue_lock);
-		if (blkcg_policy_enabled(blkg->disk->queue, pol))
+		if (blkcg_policy_enabled(blkg->disk, pol))
 			total += prfill(sf, blkg->pd[pol->plid], data);
 		spin_unlock_irq(&blkg->disk->queue->queue_lock);
 	}
@@ -687,7 +687,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	rcu_read_lock();
 	spin_lock_irq(&q->queue_lock);
 
-	if (!blkcg_policy_enabled(q, pol)) {
+	if (!blkcg_policy_enabled(disk, pol)) {
 		ret = -EOPNOTSUPP;
 		goto fail_unlock;
 	}
@@ -730,7 +730,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		rcu_read_lock();
 		spin_lock_irq(&q->queue_lock);
 
-		if (!blkcg_policy_enabled(q, pol)) {
+		if (!blkcg_policy_enabled(disk, pol)) {
 			blkg_free(new_blkg);
 			ret = -EOPNOTSUPP;
 			goto fail_preloaded;
@@ -910,7 +910,7 @@ static void blkcg_fill_root_iostats(void)
 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 	while ((dev = class_dev_iter_next(&iter))) {
 		struct block_device *bdev = dev_to_bdev(dev);
-		struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg;
+		struct blkcg_gq *blkg = bdev->bd_disk->root_blkg;
 		struct blkg_iostat tmp;
 		int cpu;
 		unsigned long flags;
@@ -1257,7 +1257,7 @@ int blkcg_init_disk(struct gendisk *disk)
 	bool preloaded;
 	int ret;
 
-	INIT_LIST_HEAD(&q->blkg_list);
+	INIT_LIST_HEAD(&disk->blkg_list);
 
 	new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
 	if (!new_blkg)
@@ -1271,7 +1271,7 @@ int blkcg_init_disk(struct gendisk *disk)
 	blkg = blkg_create(&blkcg_root, disk, new_blkg);
 	if (IS_ERR(blkg))
 		goto err_unlock;
-	q->root_blkg = blkg;
+	disk->root_blkg = blkg;
 	spin_unlock_irq(&q->queue_lock);
 
 	if (preloaded)
@@ -1384,7 +1384,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 	struct blkcg_gq *blkg, *pinned_blkg = NULL;
 	int ret;
 
-	if (blkcg_policy_enabled(q, pol))
+	if (blkcg_policy_enabled(disk, pol))
 		return 0;
 
 	if (queue_is_mq(q))
@@ -1393,7 +1393,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 	spin_lock_irq(&q->queue_lock);
 
 	/* blkg_list is pushed at the head, reverse walk to allocate parents first */
-	list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
+	list_for_each_entry_reverse(blkg, &disk->blkg_list, q_node) {
 		struct blkg_policy_data *pd;
 
 		if (blkg->pd[pol->plid])
@@ -1437,10 +1437,10 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 
 	/* all allocated, init in the same order */
 	if (pol->pd_init_fn)
-		list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+		list_for_each_entry_reverse(blkg, &disk->blkg_list, q_node)
 			pol->pd_init_fn(blkg->pd[pol->plid]);
 
-	__set_bit(pol->plid, q->blkcg_pols);
+	__set_bit(pol->plid, disk->blkcg_pols);
 	ret = 0;
 
 	spin_unlock_irq(&q->queue_lock);
@@ -1456,7 +1456,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 enomem:
 	/* alloc failed, nothing's initialized yet, free everything */
 	spin_lock_irq(&q->queue_lock);
-	list_for_each_entry(blkg, &q->blkg_list, q_node) {
+	list_for_each_entry(blkg, &disk->blkg_list, q_node) {
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
@@ -1486,7 +1486,7 @@ void blkcg_deactivate_policy(struct gendisk *disk,
 	struct request_queue *q = disk->queue;
 	struct blkcg_gq *blkg;
 
-	if (!blkcg_policy_enabled(q, pol))
+	if (!blkcg_policy_enabled(disk, pol))
 		return;
 
 	if (queue_is_mq(q))
@@ -1494,9 +1494,9 @@ void blkcg_deactivate_policy(struct gendisk *disk,
 
 	spin_lock_irq(&q->queue_lock);
 
-	__clear_bit(pol->plid, q->blkcg_pols);
+	__clear_bit(pol->plid, disk->blkcg_pols);
 
-	list_for_each_entry(blkg, &q->blkg_list, q_node) {
+	list_for_each_entry(blkg, &disk->blkg_list, q_node) {
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
@@ -1909,7 +1909,7 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
  * Associate @bio with the blkg found by combining the css's blkg and the
  * request_queue of the @bio.  An association failure is handled by walking up
  * the blkg tree.  Therefore, the blkg associated can be anything between @blkg
- * and q->root_blkg.  This situation only happens when a cgroup is dying and
+ * and disk->root_blkg.  This situation only happens when a cgroup is dying and
  * then the remaining bios will spill to the closest alive blkg.
  *
  * A reference will be taken on the blkg and will be released when @bio is
@@ -1924,8 +1924,8 @@ void bio_associate_blkg_from_css(struct bio *bio,
 	if (css && css->parent) {
 		bio->bi_blkg = blkg_tryget_closest(bio, css);
 	} else {
-		blkg_get(bdev_get_queue(bio->bi_bdev)->root_blkg);
-		bio->bi_blkg = bdev_get_queue(bio->bi_bdev)->root_blkg;
+		blkg_get(bio->bi_bdev->bd_disk->root_blkg);
+		bio->bi_blkg = bio->bi_bdev->bd_disk->root_blkg;
 	}
 }
 EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 3e7508907f33d8..81eca9be7f9105 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -246,7 +246,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	if (blkcg == &blkcg_root)
-		return disk->queue->root_blkg;
+		return disk->root_blkg;
 
 	blkg = rcu_dereference(blkcg->blkg_hint);
 	if (blkg && blkg->disk == disk)
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index bc0d217f5c1723..5d5aa1e526b742 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -665,7 +665,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
 
 	rcu_read_lock();
 	blkg_for_each_descendant_pre(blkg, pos_css,
-				     blkiolat->rqos.disk->queue->root_blkg) {
+				     blkiolat->rqos.disk->root_blkg) {
 		struct iolatency_grp *iolat;
 		struct child_latency_info *lat_info;
 		unsigned long flags;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 74bb1e753ea09d..902203bdddb4b4 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -451,7 +451,8 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td)
 	bool low_valid = false;
 
 	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+	blkg_for_each_descendant_post(blkg, pos_css,
+			td->queue->disk->root_blkg) {
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 
 		if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||
@@ -1180,7 +1181,7 @@ static void throtl_pending_timer_fn(struct timer_list *t)
 
 	spin_lock_irq(&q->queue_lock);
 
-	if (!q->root_blkg)
+	if (!q->disk->root_blkg)
 		goto out_unlock;
 
 	if (throtl_can_upgrade(td, NULL))
@@ -1322,7 +1323,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
 	 * blk-throttle.
 	 */
 	blkg_for_each_descendant_pre(blkg, pos_css,
-			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
+			global ? tg->td->queue->disk->root_blkg :
+			tg_to_blkg(tg)) {
 		struct throtl_grp *this_tg = blkg_to_tg(blkg);
 		struct throtl_grp *parent_tg;
 
@@ -1717,7 +1719,7 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
 	 * path need RCU protection and to prevent warning from lockdep.
 	 */
 	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
+	blkg_for_each_descendant_post(blkg, pos_css, disk->root_blkg) {
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 		struct throtl_service_queue *sq = &tg->service_queue;
 
@@ -1871,7 +1873,8 @@ static bool throtl_can_upgrade(struct throtl_data *td,
 		return false;
 
 	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+	blkg_for_each_descendant_post(blkg, pos_css,
+			td->queue->disk->root_blkg) {
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 
 		if (tg == this_tg)
@@ -1917,7 +1920,8 @@ static void throtl_upgrade_state(struct throtl_data *td)
 	td->low_upgrade_time = jiffies;
 	td->scale = 0;
 	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+	blkg_for_each_descendant_post(blkg, pos_css,
+			td->queue->disk->root_blkg) {
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 		struct throtl_service_queue *sq = &tg->service_queue;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 89f51d68c68ad6..8d16e3da0d5c68 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -163,6 +163,11 @@ struct gendisk {
 	struct timer_rand_state *random;
 	atomic_t sync_io;		/* RAID */
 	struct disk_events *ev;
+#ifdef CONFIG_BLK_CGROUP
+	DECLARE_BITMAP		(blkcg_pols, BLKCG_MAX_POLS);
+	struct blkcg_gq		*root_blkg;
+	struct list_head	blkg_list;
+#endif
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct kobject integrity_kobj;
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
@@ -481,11 +486,6 @@ struct request_queue {
 	struct blk_mq_tags	*sched_shared_tags;
 
 	struct list_head	icq_list;
-#ifdef CONFIG_BLK_CGROUP
-	DECLARE_BITMAP		(blkcg_pols, BLKCG_MAX_POLS);
-	struct blkcg_gq		*root_blkg;
-	struct list_head	blkg_list;
-#endif
 
 	struct queue_limits	limits;
 
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* [PATCH 15/15] blk-cgroup: move the cgroup information to struct gendisk
@ 2023-01-24  6:57   ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-24  6:57 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

cgroup information only makes sense on a live gendisk that allows
file system I/O (which includes the raw block device).  So move over
the cgroup related members.

Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Andreas Herrmann <aherrmann-l3A5Bk7waGM@public.gmane.org>
---
 block/bfq-cgroup.c     |  4 ++--
 block/blk-cgroup.c     | 48 +++++++++++++++++++++---------------------
 block/blk-cgroup.h     |  2 +-
 block/blk-iolatency.c  |  2 +-
 block/blk-throttle.c   | 16 ++++++++------
 include/linux/blkdev.h | 10 ++++-----
 6 files changed, 43 insertions(+), 39 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 055f9684c1c502..c13ba851c5221a 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1003,7 +1003,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
 {
 	struct blkcg_gq *blkg;
 
-	list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
+	list_for_each_entry(blkg, &bfqd->queue->disk->blkg_list, q_node) {
 		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
 
 		bfq_end_wr_async_queues(bfqd, bfqg);
@@ -1297,7 +1297,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
 	if (ret)
 		return NULL;
 
-	return blkg_to_bfqg(bfqd->queue->root_blkg);
+	return blkg_to_bfqg(bfqd->queue->disk->root_blkg);
 }
 
 struct blkcg_policy blkcg_policy_bfq = {
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b9883ea81ad0da..6a0aa51668db62 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -108,10 +108,10 @@ static struct cgroup_subsys_state *blkcg_css(void)
 	return task_css(current, io_cgrp_id);
 }
 
-static bool blkcg_policy_enabled(struct request_queue *q,
+static bool blkcg_policy_enabled(struct gendisk *disk,
 				 const struct blkcg_policy *pol)
 {
-	return pol && test_bit(pol->plid, q->blkcg_pols);
+	return pol && test_bit(pol->plid, disk->blkcg_pols);
 }
 
 /**
@@ -264,7 +264,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
 		struct blkcg_policy *pol = blkcg_policy[i];
 		struct blkg_policy_data *pd;
 
-		if (!blkcg_policy_enabled(disk->queue, pol))
+		if (!blkcg_policy_enabled(disk, pol))
 			continue;
 
 		/* alloc per-policy data and attach it to blkg */
@@ -341,7 +341,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
 	ret = radix_tree_insert(&blkcg->blkg_tree, disk->queue->id, blkg);
 	if (likely(!ret)) {
 		hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
-		list_add(&blkg->q_node, &disk->queue->blkg_list);
+		list_add(&blkg->q_node, &disk->blkg_list);
 
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkcg_policy *pol = blkcg_policy[i];
@@ -410,7 +410,7 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	while (true) {
 		struct blkcg *pos = blkcg;
 		struct blkcg *parent = blkcg_parent(blkcg);
-		struct blkcg_gq *ret_blkg = q->root_blkg;
+		struct blkcg_gq *ret_blkg = disk->root_blkg;
 
 		while (parent) {
 			blkg = blkg_lookup(parent, disk);
@@ -485,7 +485,7 @@ static void blkg_destroy_all(struct gendisk *disk)
 
 restart:
 	spin_lock_irq(&q->queue_lock);
-	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
+	list_for_each_entry_safe(blkg, n, &disk->blkg_list, q_node) {
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
@@ -504,7 +504,7 @@ static void blkg_destroy_all(struct gendisk *disk)
 		}
 	}
 
-	q->root_blkg = NULL;
+	disk->root_blkg = NULL;
 	spin_unlock_irq(&q->queue_lock);
 }
 
@@ -579,7 +579,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
 		spin_lock_irq(&blkg->disk->queue->queue_lock);
-		if (blkcg_policy_enabled(blkg->disk->queue, pol))
+		if (blkcg_policy_enabled(blkg->disk, pol))
 			total += prfill(sf, blkg->pd[pol->plid], data);
 		spin_unlock_irq(&blkg->disk->queue->queue_lock);
 	}
@@ -687,7 +687,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	rcu_read_lock();
 	spin_lock_irq(&q->queue_lock);
 
-	if (!blkcg_policy_enabled(q, pol)) {
+	if (!blkcg_policy_enabled(disk, pol)) {
 		ret = -EOPNOTSUPP;
 		goto fail_unlock;
 	}
@@ -730,7 +730,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		rcu_read_lock();
 		spin_lock_irq(&q->queue_lock);
 
-		if (!blkcg_policy_enabled(q, pol)) {
+		if (!blkcg_policy_enabled(disk, pol)) {
 			blkg_free(new_blkg);
 			ret = -EOPNOTSUPP;
 			goto fail_preloaded;
@@ -910,7 +910,7 @@ static void blkcg_fill_root_iostats(void)
 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 	while ((dev = class_dev_iter_next(&iter))) {
 		struct block_device *bdev = dev_to_bdev(dev);
-		struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg;
+		struct blkcg_gq *blkg = bdev->bd_disk->root_blkg;
 		struct blkg_iostat tmp;
 		int cpu;
 		unsigned long flags;
@@ -1257,7 +1257,7 @@ int blkcg_init_disk(struct gendisk *disk)
 	bool preloaded;
 	int ret;
 
-	INIT_LIST_HEAD(&q->blkg_list);
+	INIT_LIST_HEAD(&disk->blkg_list);
 
 	new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
 	if (!new_blkg)
@@ -1271,7 +1271,7 @@ int blkcg_init_disk(struct gendisk *disk)
 	blkg = blkg_create(&blkcg_root, disk, new_blkg);
 	if (IS_ERR(blkg))
 		goto err_unlock;
-	q->root_blkg = blkg;
+	disk->root_blkg = blkg;
 	spin_unlock_irq(&q->queue_lock);
 
 	if (preloaded)
@@ -1384,7 +1384,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 	struct blkcg_gq *blkg, *pinned_blkg = NULL;
 	int ret;
 
-	if (blkcg_policy_enabled(q, pol))
+	if (blkcg_policy_enabled(disk, pol))
 		return 0;
 
 	if (queue_is_mq(q))
@@ -1393,7 +1393,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 	spin_lock_irq(&q->queue_lock);
 
 	/* blkg_list is pushed at the head, reverse walk to allocate parents first */
-	list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
+	list_for_each_entry_reverse(blkg, &disk->blkg_list, q_node) {
 		struct blkg_policy_data *pd;
 
 		if (blkg->pd[pol->plid])
@@ -1437,10 +1437,10 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 
 	/* all allocated, init in the same order */
 	if (pol->pd_init_fn)
-		list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+		list_for_each_entry_reverse(blkg, &disk->blkg_list, q_node)
 			pol->pd_init_fn(blkg->pd[pol->plid]);
 
-	__set_bit(pol->plid, q->blkcg_pols);
+	__set_bit(pol->plid, disk->blkcg_pols);
 	ret = 0;
 
 	spin_unlock_irq(&q->queue_lock);
@@ -1456,7 +1456,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 enomem:
 	/* alloc failed, nothing's initialized yet, free everything */
 	spin_lock_irq(&q->queue_lock);
-	list_for_each_entry(blkg, &q->blkg_list, q_node) {
+	list_for_each_entry(blkg, &disk->blkg_list, q_node) {
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
@@ -1486,7 +1486,7 @@ void blkcg_deactivate_policy(struct gendisk *disk,
 	struct request_queue *q = disk->queue;
 	struct blkcg_gq *blkg;
 
-	if (!blkcg_policy_enabled(q, pol))
+	if (!blkcg_policy_enabled(disk, pol))
 		return;
 
 	if (queue_is_mq(q))
@@ -1494,9 +1494,9 @@ void blkcg_deactivate_policy(struct gendisk *disk,
 
 	spin_lock_irq(&q->queue_lock);
 
-	__clear_bit(pol->plid, q->blkcg_pols);
+	__clear_bit(pol->plid, disk->blkcg_pols);
 
-	list_for_each_entry(blkg, &q->blkg_list, q_node) {
+	list_for_each_entry(blkg, &disk->blkg_list, q_node) {
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
@@ -1909,7 +1909,7 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
  * Associate @bio with the blkg found by combining the css's blkg and the
  * request_queue of the @bio.  An association failure is handled by walking up
  * the blkg tree.  Therefore, the blkg associated can be anything between @blkg
- * and q->root_blkg.  This situation only happens when a cgroup is dying and
+ * and disk->root_blkg.  This situation only happens when a cgroup is dying and
  * then the remaining bios will spill to the closest alive blkg.
  *
  * A reference will be taken on the blkg and will be released when @bio is
@@ -1924,8 +1924,8 @@ void bio_associate_blkg_from_css(struct bio *bio,
 	if (css && css->parent) {
 		bio->bi_blkg = blkg_tryget_closest(bio, css);
 	} else {
-		blkg_get(bdev_get_queue(bio->bi_bdev)->root_blkg);
-		bio->bi_blkg = bdev_get_queue(bio->bi_bdev)->root_blkg;
+		blkg_get(bio->bi_bdev->bd_disk->root_blkg);
+		bio->bi_blkg = bio->bi_bdev->bd_disk->root_blkg;
 	}
 }
 EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 3e7508907f33d8..81eca9be7f9105 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -246,7 +246,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	if (blkcg == &blkcg_root)
-		return disk->queue->root_blkg;
+		return disk->root_blkg;
 
 	blkg = rcu_dereference(blkcg->blkg_hint);
 	if (blkg && blkg->disk == disk)
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index bc0d217f5c1723..5d5aa1e526b742 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -665,7 +665,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
 
 	rcu_read_lock();
 	blkg_for_each_descendant_pre(blkg, pos_css,
-				     blkiolat->rqos.disk->queue->root_blkg) {
+				     blkiolat->rqos.disk->root_blkg) {
 		struct iolatency_grp *iolat;
 		struct child_latency_info *lat_info;
 		unsigned long flags;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 74bb1e753ea09d..902203bdddb4b4 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -451,7 +451,8 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td)
 	bool low_valid = false;
 
 	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+	blkg_for_each_descendant_post(blkg, pos_css,
+			td->queue->disk->root_blkg) {
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 
 		if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||
@@ -1180,7 +1181,7 @@ static void throtl_pending_timer_fn(struct timer_list *t)
 
 	spin_lock_irq(&q->queue_lock);
 
-	if (!q->root_blkg)
+	if (!q->disk->root_blkg)
 		goto out_unlock;
 
 	if (throtl_can_upgrade(td, NULL))
@@ -1322,7 +1323,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
 	 * blk-throttle.
 	 */
 	blkg_for_each_descendant_pre(blkg, pos_css,
-			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
+			global ? tg->td->queue->disk->root_blkg :
+			tg_to_blkg(tg)) {
 		struct throtl_grp *this_tg = blkg_to_tg(blkg);
 		struct throtl_grp *parent_tg;
 
@@ -1717,7 +1719,7 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
 	 * path need RCU protection and to prevent warning from lockdep.
 	 */
 	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
+	blkg_for_each_descendant_post(blkg, pos_css, disk->root_blkg) {
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 		struct throtl_service_queue *sq = &tg->service_queue;
 
@@ -1871,7 +1873,8 @@ static bool throtl_can_upgrade(struct throtl_data *td,
 		return false;
 
 	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+	blkg_for_each_descendant_post(blkg, pos_css,
+			td->queue->disk->root_blkg) {
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 
 		if (tg == this_tg)
@@ -1917,7 +1920,8 @@ static void throtl_upgrade_state(struct throtl_data *td)
 	td->low_upgrade_time = jiffies;
 	td->scale = 0;
 	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+	blkg_for_each_descendant_post(blkg, pos_css,
+			td->queue->disk->root_blkg) {
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 		struct throtl_service_queue *sq = &tg->service_queue;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 89f51d68c68ad6..8d16e3da0d5c68 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -163,6 +163,11 @@ struct gendisk {
 	struct timer_rand_state *random;
 	atomic_t sync_io;		/* RAID */
 	struct disk_events *ev;
+#ifdef CONFIG_BLK_CGROUP
+	DECLARE_BITMAP		(blkcg_pols, BLKCG_MAX_POLS);
+	struct blkcg_gq		*root_blkg;
+	struct list_head	blkg_list;
+#endif
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct kobject integrity_kobj;
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
@@ -481,11 +486,6 @@ struct request_queue {
 	struct blk_mq_tags	*sched_shared_tags;
 
 	struct list_head	icq_list;
-#ifdef CONFIG_BLK_CGROUP
-	DECLARE_BITMAP		(blkcg_pols, BLKCG_MAX_POLS);
-	struct blkcg_gq		*root_blkg;
-	struct list_head	blkg_list;
-#endif
 
 	struct queue_limits	limits;
 
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

* Re: [PATCH 01/15] blk-cgroup: don't defer blkg_free to a workqueue
  2023-01-24  6:57   ` Christoph Hellwig
  (?)
@ 2023-01-27 22:33   ` Tejun Heo
  -1 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-27 22:33 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block, cgroups, Andreas Herrmann

On Tue, Jan 24, 2023 at 07:57:01AM +0100, Christoph Hellwig wrote:
> Now that blk_put_queue can be called from process context, there is no
> need for the asynchronous execution.
> 
> This effectively reverts commit d578c770c85233af592e54537f93f3831bde7e9a.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Andreas Herrmann <aherrmann@suse.de>

Acked-by: Tejun Heo <tj@kernel.org>

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 03/15] blk-cgroup: delay blk-cgroup initialization until add_disk
@ 2023-01-27 22:59     ` Tejun Heo
  0 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-27 22:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block, cgroups, Andreas Herrmann

On Tue, Jan 24, 2023 at 07:57:03AM +0100, Christoph Hellwig wrote:
> There is no need to initialize the group code before the disk is marked
                                     ^
				     cgroup

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 03/15] blk-cgroup: delay blk-cgroup initialization until add_disk
@ 2023-01-27 22:59     ` Tejun Heo
  0 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-27 22:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

On Tue, Jan 24, 2023 at 07:57:03AM +0100, Christoph Hellwig wrote:
> There is no need to initialize the group code before the disk is marked
                                     ^
				     cgroup

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 04/15] blk-cgroup: pin the gendisk in struct blkcg_gq
@ 2023-01-27 23:15     ` Tejun Heo
  0 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-27 23:15 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block, cgroups, Andreas Herrmann

Hello,

On Tue, Jan 24, 2023 at 07:57:04AM +0100, Christoph Hellwig wrote:
> @@ -245,10 +245,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
>  	if (!blkg->iostat_cpu)
>  		goto err_free;
>  
> -	if (!blk_get_queue(disk->queue))
> -		goto err_free;
> +	get_device(disk_to_dev(disk));

We lose the blk_queue_dying() test here. It'd be great if the patch
description explains why this is safe.

> @@ -547,9 +546,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
>  
>  const char *blkg_dev_name(struct blkcg_gq *blkg)
>  {
> -	if (!blkg->q->disk || !blkg->q->disk->bdi->dev)
> -		return NULL;
> -	return bdi_dev_name(blkg->q->disk->bdi);
> +	return bdi_dev_name(blkg->disk->bdi);

This isn't completely trivial like other conversions. Maybe worth mentioning
in the description too?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 04/15] blk-cgroup: pin the gendisk in struct blkcg_gq
@ 2023-01-27 23:15     ` Tejun Heo
  0 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-27 23:15 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

Hello,

On Tue, Jan 24, 2023 at 07:57:04AM +0100, Christoph Hellwig wrote:
> @@ -245,10 +245,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
>  	if (!blkg->iostat_cpu)
>  		goto err_free;
>  
> -	if (!blk_get_queue(disk->queue))
> -		goto err_free;
> +	get_device(disk_to_dev(disk));

We lose the blk_queue_dying() test here. It'd be great if the patch
description explains why this is safe.

> @@ -547,9 +546,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
>  
>  const char *blkg_dev_name(struct blkcg_gq *blkg)
>  {
> -	if (!blkg->q->disk || !blkg->q->disk->bdi->dev)
> -		return NULL;
> -	return bdi_dev_name(blkg->q->disk->bdi);
> +	return bdi_dev_name(blkg->disk->bdi);

This isn't completely trivial like other conversions. Maybe worth mentioning
in the description too?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
  2023-01-24  6:57   ` Christoph Hellwig
  (?)
@ 2023-01-27 23:40   ` Tejun Heo
  2023-02-01  8:06       ` Christoph Hellwig
  -1 siblings, 1 reply; 45+ messages in thread
From: Tejun Heo @ 2023-01-27 23:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block, cgroups, Andreas Herrmann

On Tue, Jan 24, 2023 at 07:57:05AM +0100, Christoph Hellwig wrote:
>  void blkcg_maybe_throttle_current(void)
>  {
> -	struct request_queue *q = current->throttle_queue;
> +	struct gendisk *disk = current->throttle_disk;
>  	struct blkcg *blkcg;
>  	struct blkcg_gq *blkg;
>  	bool use_memdelay = current->use_memdelay;
>  
> -	if (!q)
> +	if (!disk)
>  		return;
>  
> -	current->throttle_queue = NULL;
> +	current->throttle_disk = NULL;
>  	current->use_memdelay = false;
>  
> +	if (test_bit(GD_DEAD, &disk->state))
> +		goto out_put_disk;
...
> @@ -1835,18 +1839,15 @@ void blkcg_maybe_throttle_current(void)
>   */
>  void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay)
>  {
> -	struct request_queue *q = disk->queue;
> -
>  	if (unlikely(current->flags & PF_KTHREAD))
>  		return;
>  
> -	if (current->throttle_queue != q) {
> -		if (!blk_get_queue(q))
> -			return;
> +	if (current->throttle_disk != disk) {
> +		get_device(disk_to_dev(disk));

So, we're shifting the dead test from schedule to the actual throttle path,
which makes sense but I think this should at least be mentioned in the
description if not put in its own patch.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 08/15] blk-wbt: open code wbt_queue_depth_changed in wbt_update_limits
@ 2023-01-27 23:56     ` Tejun Heo
  0 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-27 23:56 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block, cgroups, Andreas Herrmann

On Tue, Jan 24, 2023 at 07:57:08AM +0100, Christoph Hellwig wrote:
> No real need to all the method here, so open code to it to prepare

I might be being slow but can't understand what the first sentence is
saying. I think making the descriptions a bit more detailed would be
generally helpful.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 08/15] blk-wbt: open code wbt_queue_depth_changed in wbt_update_limits
@ 2023-01-27 23:56     ` Tejun Heo
  0 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-27 23:56 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

On Tue, Jan 24, 2023 at 07:57:08AM +0100, Christoph Hellwig wrote:
> No real need to all the method here, so open code to it to prepare

I might be being slow but can't understand what the first sentence is
saying. I think making the descriptions a bit more detailed would be
generally helpful.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 09/15] blk-rq-qos: make rq_qos_add and rq_qos_del more useful
  2023-01-24  6:57 ` [PATCH 09/15] blk-rq-qos: make rq_qos_add and rq_qos_del more useful Christoph Hellwig
@ 2023-01-28  0:01   ` Tejun Heo
  0 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-28  0:01 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block, cgroups, Andreas Herrmann

On Tue, Jan 24, 2023 at 07:57:09AM +0100, Christoph Hellwig wrote:
> Switch to passing a gendisk, and make rq_qos_add initialize all required
> fields and drop the not required q argument from rq_qos_del.  Also move
> the code out of line given how large it is.

Sorry about all the nit picks but can you please separate out the uninlining
into its own patch? Code move + small changes in the same patch can be error
prone.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 15/15] blk-cgroup: move the cgroup information to struct gendisk
  2023-01-24  6:57   ` Christoph Hellwig
  (?)
@ 2023-01-28  0:13   ` Tejun Heo
  -1 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-28  0:13 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block, cgroups, Andreas Herrmann

On Tue, Jan 24, 2023 at 07:57:15AM +0100, Christoph Hellwig wrote:
> cgroup information only makes sense on a live gendisk that allows
> file system I/O (which includes the raw block device).  So move over
> the cgroup related members.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Andreas Herrmann <aherrmann@suse.de>

The series looks fine to me other than the nits.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 01/15] blk-cgroup: don't defer blkg_free to a workqueue
  2023-01-24  6:57   ` Christoph Hellwig
  (?)
  (?)
@ 2023-01-28 15:12   ` Ming Lei
  2023-01-30 18:01       ` Tejun Heo
  -1 siblings, 1 reply; 45+ messages in thread
From: Ming Lei @ 2023-01-28 15:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Tejun Heo, Josef Bacik, linux-block, cgroups,
	Andreas Herrmann, ming.lei

On Tue, Jan 24, 2023 at 07:57:01AM +0100, Christoph Hellwig wrote:
> Now that blk_put_queue can be called from process context, there is no

s/process/atomic?

> need for the asynchronous execution.
> 
> This effectively reverts commit d578c770c85233af592e54537f93f3831bde7e9a.

blkg_free() can be called with ->queue_lock, and:

- del_timer_sync(&tg->service_queue.pending_timer) is called from throtl_pd_free().

- queue_lock is required by throtl_pending_timer_fn

So there is deadlock risk if the above commit is reverted.

Thanks,
Ming


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 01/15] blk-cgroup: don't defer blkg_free to a workqueue
@ 2023-01-29  6:09     ` Yu Kuai
  0 siblings, 0 replies; 45+ messages in thread
From: Yu Kuai @ 2023-01-29  6:09 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block, cgroups, Andreas Herrmann, yukuai (C)

Hi, Christoph,

在 2023/01/24 14:57, Christoph Hellwig 写道:
> Now that blk_put_queue can be called from process context, there is no
> need for the asynchronous execution.
> 
> This effectively reverts commit d578c770c85233af592e54537f93f3831bde7e9a.

blkg_free() can be called from rcu callback, and my patch

blk-cgroup: synchronize pd_free_fn() from blkg_free_workfn() and 
blkcg_deactivate_policy()

will hold a mutex in blkg_free_workfn(), I think it's better not do such
thing in rcu callback, right?

Thanks,
Kuai


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 01/15] blk-cgroup: don't defer blkg_free to a workqueue
@ 2023-01-29  6:09     ` Yu Kuai
  0 siblings, 0 replies; 45+ messages in thread
From: Yu Kuai @ 2023-01-29  6:09 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann, yukuai (C)

Hi, Christoph,

ÔÚ 2023/01/24 14:57, Christoph Hellwig дµÀ:
> Now that blk_put_queue can be called from process context, there is no
> need for the asynchronous execution.
> 
> This effectively reverts commit d578c770c85233af592e54537f93f3831bde7e9a.

blkg_free() can be called from rcu callback, and my patch

blk-cgroup: synchronize pd_free_fn() from blkg_free_workfn() and 
blkcg_deactivate_policy()

will hold a mutex in blkg_free_workfn(), I think it's better not do such
thing in rcu callback, right?

Thanks,
Kuai


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 01/15] blk-cgroup: don't defer blkg_free to a workqueue
  2023-01-28 15:12   ` Ming Lei
@ 2023-01-30 18:01       ` Tejun Heo
  0 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-30 18:01 UTC (permalink / raw)
  To: Ming Lei
  Cc: Christoph Hellwig, Jens Axboe, Josef Bacik, linux-block, cgroups,
	Andreas Herrmann

On Sat, Jan 28, 2023 at 11:12:13PM +0800, Ming Lei wrote:
> On Tue, Jan 24, 2023 at 07:57:01AM +0100, Christoph Hellwig wrote:
> > Now that blk_put_queue can be called from process context, there is no
> 
> s/process/atomic?
> 
> > need for the asynchronous execution.
> > 
> > This effectively reverts commit d578c770c85233af592e54537f93f3831bde7e9a.
> 
> blkg_free() can be called with ->queue_lock, and:
> 
> - del_timer_sync(&tg->service_queue.pending_timer) is called from throtl_pd_free().
> 
> - queue_lock is required by throtl_pending_timer_fn
> 
> So there is deadlock risk if the above commit is reverted.

Ah, good catch. Looks like we'd have to leave this async.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 01/15] blk-cgroup: don't defer blkg_free to a workqueue
@ 2023-01-30 18:01       ` Tejun Heo
  0 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-01-30 18:01 UTC (permalink / raw)
  To: Ming Lei
  Cc: Christoph Hellwig, Jens Axboe, Josef Bacik,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

On Sat, Jan 28, 2023 at 11:12:13PM +0800, Ming Lei wrote:
> On Tue, Jan 24, 2023 at 07:57:01AM +0100, Christoph Hellwig wrote:
> > Now that blk_put_queue can be called from process context, there is no
> 
> s/process/atomic?
> 
> > need for the asynchronous execution.
> > 
> > This effectively reverts commit d578c770c85233af592e54537f93f3831bde7e9a.
> 
> blkg_free() can be called with ->queue_lock, and:
> 
> - del_timer_sync(&tg->service_queue.pending_timer) is called from throtl_pd_free().
> 
> - queue_lock is required by throtl_pending_timer_fn
> 
> So there is deadlock risk if the above commit is reverted.

Ah, good catch. Looks like we'd have to leave this async.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
@ 2023-02-01  8:06       ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-02-01  8:06 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Christoph Hellwig, Jens Axboe, Josef Bacik, linux-block, cgroups,
	Andreas Herrmann

On Fri, Jan 27, 2023 at 01:40:00PM -1000, Tejun Heo wrote:
> So, we're shifting the dead test from schedule to the actual throttle path,
> which makes sense but I think this should at least be mentioned in the
> description if not put in its own patch.

That's what I tried to say with:

"Move the check for the dead disk to the latest place now that is is
 unbundled from the reference grab."

what else would you want me to write?

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
@ 2023-02-01  8:06       ` Christoph Hellwig
  0 siblings, 0 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-02-01  8:06 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Christoph Hellwig, Jens Axboe, Josef Bacik,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, Andreas Herrmann

On Fri, Jan 27, 2023 at 01:40:00PM -1000, Tejun Heo wrote:
> So, we're shifting the dead test from schedule to the actual throttle path,
> which makes sense but I think this should at least be mentioned in the
> description if not put in its own patch.

That's what I tried to say with:

"Move the check for the dead disk to the latest place now that is is
 unbundled from the reference grab."

what else would you want me to write?

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
  2023-02-01  8:06       ` Christoph Hellwig
  (?)
@ 2023-02-02 17:57       ` Tejun Heo
  -1 siblings, 0 replies; 45+ messages in thread
From: Tejun Heo @ 2023-02-02 17:57 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Josef Bacik, linux-block, cgroups, Andreas Herrmann

Sorry about late reply.

On Wed, Feb 01, 2023 at 09:06:35AM +0100, Christoph Hellwig wrote:
> On Fri, Jan 27, 2023 at 01:40:00PM -1000, Tejun Heo wrote:
> > So, we're shifting the dead test from schedule to the actual throttle path,
> > which makes sense but I think this should at least be mentioned in the
> > description if not put in its own patch.
> 
> That's what I tried to say with:
> 
> "Move the check for the dead disk to the latest place now that is is
>  unbundled from the reference grab."
> 
> what else would you want me to write?

It'd be better if the paragraph actually said where that change is being
made along with why and whether it causes any behavior difference.

This seems safe to me but I can't tell why it's being moved. Maybe it's
better because that's "latest" but that's a lot hanging on that single word.
If this is better because we're testing it later instead of to accomodate
other changes being made, this should be a separate patch, right? It's a
subtle behavior change which is buried with a bunch of other mechanical
changes.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
  2023-01-17  8:12 ` [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct Christoph Hellwig
@ 2023-01-27  7:03     ` Hannes Reinecke
  2023-01-27  7:03     ` Hannes Reinecke
  1 sibling, 0 replies; 45+ messages in thread
From: Hannes Reinecke @ 2023-01-27  7:03 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block, cgroups

On 1/17/23 09:12, Christoph Hellwig wrote:
> Switch from a request_queue pointer and reference to a gendisk once
> for the throttle information in struct task_struct.
> 
> Move the check for the dead disk to the latest place now that is is
> unboundled from the reference grab.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/blk-cgroup.c    | 37 +++++++++++++++++++------------------
>   include/linux/sched.h |  2 +-
>   kernel/fork.c         |  2 +-
>   mm/swapfile.c         |  2 +-
>   4 files changed, 22 insertions(+), 21 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
@ 2023-01-27  7:03     ` Hannes Reinecke
  0 siblings, 0 replies; 45+ messages in thread
From: Hannes Reinecke @ 2023-01-27  7:03 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe, Tejun Heo, Josef Bacik
  Cc: linux-block, cgroups

On 1/17/23 09:12, Christoph Hellwig wrote:
> Switch from a request_queue pointer and reference to a gendisk once
> for the throttle information in struct task_struct.
> 
> Move the check for the dead disk to the latest place now that is is
> unboundled from the reference grab.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/blk-cgroup.c    | 37 +++++++++++++++++++------------------
>   include/linux/sched.h |  2 +-
>   kernel/fork.c         |  2 +-
>   mm/swapfile.c         |  2 +-
>   4 files changed, 22 insertions(+), 21 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 N체rnberg
HRB 36809 (AG N체rnberg), Gesch채ftsf체hrer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman


^ permalink raw reply	[flat|nested] 45+ messages in thread

* Re: [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
  2023-01-17  8:12 ` [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct Christoph Hellwig
@ 2023-01-20  9:00   ` Andreas Herrmann
  2023-01-27  7:03     ` Hannes Reinecke
  1 sibling, 0 replies; 45+ messages in thread
From: Andreas Herrmann @ 2023-01-20  9:00 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Tejun Heo, Josef Bacik, linux-block, cgroups

On Tue, Jan 17, 2023 at 09:12:47AM +0100, Christoph Hellwig wrote:
> Switch from a request_queue pointer and reference to a gendisk once
> for the throttle information in struct task_struct.
> 
> Move the check for the dead disk to the latest place now that is is
                                                                ^^
								it
> unboundled from the reference grab.
  ^^^^^^^^^^
  unbundled
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/blk-cgroup.c    | 37 +++++++++++++++++++------------------
>  include/linux/sched.h |  2 +-
>  kernel/fork.c         |  2 +-
>  mm/swapfile.c         |  2 +-
>  4 files changed, 22 insertions(+), 21 deletions(-)

Looks good to me. Feel free to add
Reviewed-by: Andreas Herrmann <aherrmann@suse.de>

> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index f5a634ed098db0..603e911d1350db 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -1334,9 +1334,9 @@ static void blkcg_bind(struct cgroup_subsys_state *root_css)
>  
>  static void blkcg_exit(struct task_struct *tsk)
>  {
> -	if (tsk->throttle_queue)
> -		blk_put_queue(tsk->throttle_queue);
> -	tsk->throttle_queue = NULL;
> +	if (tsk->throttle_disk)
> +		put_disk(tsk->throttle_disk);
> +	tsk->throttle_disk = NULL;
>  }
>  
>  struct cgroup_subsys io_cgrp_subsys = {
> @@ -1778,29 +1778,32 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
>   *
>   * This is only called if we've been marked with set_notify_resume().  Obviously
>   * we can be set_notify_resume() for reasons other than blkcg throttling, so we
> - * check to see if current->throttle_queue is set and if not this doesn't do
> + * check to see if current->throttle_disk is set and if not this doesn't do
>   * anything.  This should only ever be called by the resume code, it's not meant
>   * to be called by people willy-nilly as it will actually do the work to
>   * throttle the task if it is setup for throttling.
>   */
>  void blkcg_maybe_throttle_current(void)
>  {
> -	struct request_queue *q = current->throttle_queue;
> +	struct gendisk *disk = current->throttle_disk;
>  	struct blkcg *blkcg;
>  	struct blkcg_gq *blkg;
>  	bool use_memdelay = current->use_memdelay;
>  
> -	if (!q)
> +	if (!disk)
>  		return;
>  
> -	current->throttle_queue = NULL;
> +	current->throttle_disk = NULL;
>  	current->use_memdelay = false;
>  
> +	if (test_bit(GD_DEAD, &disk->state))
> +		goto out_put_disk;
> +
>  	rcu_read_lock();
>  	blkcg = css_to_blkcg(blkcg_css());
>  	if (!blkcg)
>  		goto out;
> -	blkg = blkg_lookup(blkcg, q);
> +	blkg = blkg_lookup(blkcg, disk->queue);
>  	if (!blkg)
>  		goto out;
>  	if (!blkg_tryget(blkg))
> @@ -1809,11 +1812,12 @@ void blkcg_maybe_throttle_current(void)
>  
>  	blkcg_maybe_throttle_blkg(blkg, use_memdelay);
>  	blkg_put(blkg);
> -	blk_put_queue(q);
> +	put_disk(disk);
>  	return;
>  out:
>  	rcu_read_unlock();
> -	blk_put_queue(q);
> +out_put_disk:
> +	put_disk(disk);
>  }
>  
>  /**
> @@ -1835,18 +1839,15 @@ void blkcg_maybe_throttle_current(void)
>   */
>  void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay)
>  {
> -	struct request_queue *q = disk->queue;
> -
>  	if (unlikely(current->flags & PF_KTHREAD))
>  		return;
>  
> -	if (current->throttle_queue != q) {
> -		if (!blk_get_queue(q))
> -			return;
> +	if (current->throttle_disk != disk) {
> +		get_device(disk_to_dev(disk));
>  
> -		if (current->throttle_queue)
> -			blk_put_queue(current->throttle_queue);
> -		current->throttle_queue = q;
> +		if (current->throttle_disk)
> +			put_disk(current->throttle_disk);
> +		current->throttle_disk = disk;
>  	}
>  
>  	if (use_memdelay)
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 853d08f7562bda..6f6ce9ca709798 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1436,7 +1436,7 @@ struct task_struct {
>  #endif
>  
>  #ifdef CONFIG_BLK_CGROUP
> -	struct request_queue		*throttle_queue;
> +	struct gendisk			*throttle_disk;
>  #endif
>  
>  #ifdef CONFIG_UPROBES
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 9f7fe354189785..d9c97704b7c9a4 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1044,7 +1044,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
>  #endif
>  
>  #ifdef CONFIG_BLK_CGROUP
> -	tsk->throttle_queue = NULL;
> +	tsk->throttle_disk = NULL;
>  	tsk->use_memdelay = 0;
>  #endif
>  
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 908a529bca12c9..3e0a742fb7bbff 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -3642,7 +3642,7 @@ void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
>  	 * We've already scheduled a throttle, avoid taking the global swap
>  	 * lock.
>  	 */
> -	if (current->throttle_queue)
> +	if (current->throttle_disk)
>  		return;
>  
>  	spin_lock(&swap_avail_lock);
> -- 
> 2.39.0
> 

-- 
Regards,
Andreas

SUSE Software Solutions Germany GmbH
Frankenstrasse 146, 90461 Nürnberg, Germany
GF: Ivo Totev, Andrew Myers, Andrew McDonald, Martje Boudien Moerman
(HRB 36809, AG Nürnberg)

^ permalink raw reply	[flat|nested] 45+ messages in thread

* [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct
  2023-01-17  8:12 switch blk-cgroup to work on gendisk Christoph Hellwig
@ 2023-01-17  8:12 ` Christoph Hellwig
  2023-01-20  9:00   ` Andreas Herrmann
  2023-01-27  7:03     ` Hannes Reinecke
  0 siblings, 2 replies; 45+ messages in thread
From: Christoph Hellwig @ 2023-01-17  8:12 UTC (permalink / raw)
  To: Jens Axboe, Tejun Heo, Josef Bacik; +Cc: linux-block, cgroups

Switch from a request_queue pointer and reference to a gendisk once
for the throttle information in struct task_struct.

Move the check for the dead disk to the latest place now that is is
unboundled from the reference grab.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-cgroup.c    | 37 +++++++++++++++++++------------------
 include/linux/sched.h |  2 +-
 kernel/fork.c         |  2 +-
 mm/swapfile.c         |  2 +-
 4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f5a634ed098db0..603e911d1350db 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1334,9 +1334,9 @@ static void blkcg_bind(struct cgroup_subsys_state *root_css)
 
 static void blkcg_exit(struct task_struct *tsk)
 {
-	if (tsk->throttle_queue)
-		blk_put_queue(tsk->throttle_queue);
-	tsk->throttle_queue = NULL;
+	if (tsk->throttle_disk)
+		put_disk(tsk->throttle_disk);
+	tsk->throttle_disk = NULL;
 }
 
 struct cgroup_subsys io_cgrp_subsys = {
@@ -1778,29 +1778,32 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
  *
  * This is only called if we've been marked with set_notify_resume().  Obviously
  * we can be set_notify_resume() for reasons other than blkcg throttling, so we
- * check to see if current->throttle_queue is set and if not this doesn't do
+ * check to see if current->throttle_disk is set and if not this doesn't do
  * anything.  This should only ever be called by the resume code, it's not meant
  * to be called by people willy-nilly as it will actually do the work to
  * throttle the task if it is setup for throttling.
  */
 void blkcg_maybe_throttle_current(void)
 {
-	struct request_queue *q = current->throttle_queue;
+	struct gendisk *disk = current->throttle_disk;
 	struct blkcg *blkcg;
 	struct blkcg_gq *blkg;
 	bool use_memdelay = current->use_memdelay;
 
-	if (!q)
+	if (!disk)
 		return;
 
-	current->throttle_queue = NULL;
+	current->throttle_disk = NULL;
 	current->use_memdelay = false;
 
+	if (test_bit(GD_DEAD, &disk->state))
+		goto out_put_disk;
+
 	rcu_read_lock();
 	blkcg = css_to_blkcg(blkcg_css());
 	if (!blkcg)
 		goto out;
-	blkg = blkg_lookup(blkcg, q);
+	blkg = blkg_lookup(blkcg, disk->queue);
 	if (!blkg)
 		goto out;
 	if (!blkg_tryget(blkg))
@@ -1809,11 +1812,12 @@ void blkcg_maybe_throttle_current(void)
 
 	blkcg_maybe_throttle_blkg(blkg, use_memdelay);
 	blkg_put(blkg);
-	blk_put_queue(q);
+	put_disk(disk);
 	return;
 out:
 	rcu_read_unlock();
-	blk_put_queue(q);
+out_put_disk:
+	put_disk(disk);
 }
 
 /**
@@ -1835,18 +1839,15 @@ void blkcg_maybe_throttle_current(void)
  */
 void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay)
 {
-	struct request_queue *q = disk->queue;
-
 	if (unlikely(current->flags & PF_KTHREAD))
 		return;
 
-	if (current->throttle_queue != q) {
-		if (!blk_get_queue(q))
-			return;
+	if (current->throttle_disk != disk) {
+		get_device(disk_to_dev(disk));
 
-		if (current->throttle_queue)
-			blk_put_queue(current->throttle_queue);
-		current->throttle_queue = q;
+		if (current->throttle_disk)
+			put_disk(current->throttle_disk);
+		current->throttle_disk = disk;
 	}
 
 	if (use_memdelay)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 853d08f7562bda..6f6ce9ca709798 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1436,7 +1436,7 @@ struct task_struct {
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
-	struct request_queue		*throttle_queue;
+	struct gendisk			*throttle_disk;
 #endif
 
 #ifdef CONFIG_UPROBES
diff --git a/kernel/fork.c b/kernel/fork.c
index 9f7fe354189785..d9c97704b7c9a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1044,7 +1044,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
-	tsk->throttle_queue = NULL;
+	tsk->throttle_disk = NULL;
 	tsk->use_memdelay = 0;
 #endif
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 908a529bca12c9..3e0a742fb7bbff 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3642,7 +3642,7 @@ void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
 	 * We've already scheduled a throttle, avoid taking the global swap
 	 * lock.
 	 */
-	if (current->throttle_queue)
+	if (current->throttle_disk)
 		return;
 
 	spin_lock(&swap_avail_lock);
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 45+ messages in thread

end of thread, other threads:[~2023-02-02 17:57 UTC | newest]

Thread overview: 45+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-24  6:57 switch blk-cgroup to work on gendisk v2 Christoph Hellwig
2023-01-24  6:57 ` [PATCH 01/15] blk-cgroup: don't defer blkg_free to a workqueue Christoph Hellwig
2023-01-24  6:57   ` Christoph Hellwig
2023-01-27 22:33   ` Tejun Heo
2023-01-28 15:12   ` Ming Lei
2023-01-30 18:01     ` Tejun Heo
2023-01-30 18:01       ` Tejun Heo
2023-01-29  6:09   ` Yu Kuai
2023-01-29  6:09     ` Yu Kuai
2023-01-24  6:57 ` [PATCH 02/15] block: don't call blk_throtl_stat_add for non-READ/WRITE commands Christoph Hellwig
2023-01-24  6:57 ` [PATCH 03/15] blk-cgroup: delay blk-cgroup initialization until add_disk Christoph Hellwig
2023-01-24  6:57   ` Christoph Hellwig
2023-01-27 22:59   ` Tejun Heo
2023-01-27 22:59     ` Tejun Heo
2023-01-24  6:57 ` [PATCH 04/15] blk-cgroup: pin the gendisk in struct blkcg_gq Christoph Hellwig
2023-01-24  6:57   ` Christoph Hellwig
2023-01-27 23:15   ` Tejun Heo
2023-01-27 23:15     ` Tejun Heo
2023-01-24  6:57 ` [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct Christoph Hellwig
2023-01-24  6:57   ` Christoph Hellwig
2023-01-27 23:40   ` Tejun Heo
2023-02-01  8:06     ` Christoph Hellwig
2023-02-01  8:06       ` Christoph Hellwig
2023-02-02 17:57       ` Tejun Heo
2023-01-24  6:57 ` [PATCH 06/15] blk-wbt: pass a gendisk to wbt_{enable,disable}_default Christoph Hellwig
2023-01-24  6:57 ` [PATCH 07/15] blk-wbt: pass a gendisk to wbt_init Christoph Hellwig
2023-01-24  6:57 ` [PATCH 08/15] blk-wbt: open code wbt_queue_depth_changed in wbt_update_limits Christoph Hellwig
2023-01-27 23:56   ` Tejun Heo
2023-01-27 23:56     ` Tejun Heo
2023-01-24  6:57 ` [PATCH 09/15] blk-rq-qos: make rq_qos_add and rq_qos_del more useful Christoph Hellwig
2023-01-28  0:01   ` Tejun Heo
2023-01-24  6:57 ` [PATCH 10/15] blk-rq-qos: constify rq_qos_ops Christoph Hellwig
2023-01-24  6:57   ` Christoph Hellwig
2023-01-24  6:57 ` [PATCH 11/15] blk-rq-qos: store a gendisk instead of request_queue in struct rq_qos Christoph Hellwig
2023-01-24  6:57 ` [PATCH 12/15] blk-cgroup: pass a gendisk to blkcg_{de,}activate_policy Christoph Hellwig
2023-01-24  6:57   ` Christoph Hellwig
2023-01-24  6:57 ` [PATCH 13/15] blk-cgroup: pass a gendisk to pd_alloc_fn Christoph Hellwig
2023-01-24  6:57 ` [PATCH 14/15] blk-cgroup: pass a gendisk to blkg_lookup Christoph Hellwig
2023-01-24  6:57 ` [PATCH 15/15] blk-cgroup: move the cgroup information to struct gendisk Christoph Hellwig
2023-01-24  6:57   ` Christoph Hellwig
2023-01-28  0:13   ` Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2023-01-17  8:12 switch blk-cgroup to work on gendisk Christoph Hellwig
2023-01-17  8:12 ` [PATCH 05/15] blk-cgroup: store a gendisk to throttle in struct task_struct Christoph Hellwig
2023-01-20  9:00   ` Andreas Herrmann
2023-01-27  7:03   ` Hannes Reinecke
2023-01-27  7:03     ` Hannes Reinecke

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.