All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH -next 0/4] block/rq_qos: protect rq_qos apis with global mutex
@ 2023-01-04  8:53 Yu Kuai
  2023-01-04  8:53 ` [PATCH -next 1/4] block/rq_qos: move implementions of init/exit rq-qos apis to blk-rq-qos.c Yu Kuai
                   ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-04  8:53 UTC (permalink / raw)
  To: tj, hch, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang,
	yangerkun

From: Yu Kuai <yukuai3@huawei.com>

This patchset is a new version, use a different solution suggested by
Tejun in [1].

[1] https://lore.kernel.org/all/Y6DP3aOSad8+D1yY@slm.duckdns.org/

Yu Kuai (4):
  block/rq_qos: move implementions of init/exit rq-qos apis to
    blk-rq-qos.c
  block/rq_qos: factor out a helper to add rq_qos and activate policy
  block/rq_qos: use a global mutex to protect rq_qos apis
  block/rq_qos: fail rq_qos_add() after rq_qos_exit()

 block/blk-iocost.c    |  14 +----
 block/blk-iolatency.c |   7 +--
 block/blk-rq-qos.c    | 118 ++++++++++++++++++++++++++++++++++++++++--
 block/blk-rq-qos.h    |  69 +++---------------------
 4 files changed, 125 insertions(+), 83 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH -next 1/4] block/rq_qos: move implementions of init/exit rq-qos apis to blk-rq-qos.c
  2023-01-04  8:53 [PATCH -next 0/4] block/rq_qos: protect rq_qos apis with global mutex Yu Kuai
@ 2023-01-04  8:53 ` Yu Kuai
  2023-01-04 21:30     ` Tejun Heo
  2023-01-04  8:53   ` Yu Kuai
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 21+ messages in thread
From: Yu Kuai @ 2023-01-04  8:53 UTC (permalink / raw)
  To: tj, hch, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang,
	yangerkun

From: Yu Kuai <yukuai3@huawei.com>

These init/exit rq-qos apis are super cold path, there is no need to
inline them to improve performance. This patch also prepare to use a
global mutex to protect these apis, move these implementions to
blk-rq-qos.c so that the global mutex won't be exposed. There are no
functional changes.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-rq-qos.c | 59 +++++++++++++++++++++++++++++++++++++++++
 block/blk-rq-qos.h | 65 +++-------------------------------------------
 2 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 88f0fe7dcf54..b6ea40775b2a 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -286,6 +286,65 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 	finish_wait(&rqw->wait, &data.wq);
 }
 
+int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+{
+	/*
+	 * No IO can be in-flight when adding rqos, so freeze queue, which
+	 * is fine since we only support rq_qos for blk-mq queue.
+	 *
+	 * Reuse ->queue_lock for protecting against other concurrent
+	 * rq_qos adding/deleting
+	 */
+	blk_mq_freeze_queue(q);
+
+	spin_lock_irq(&q->queue_lock);
+	if (rq_qos_id(q, rqos->id))
+		goto ebusy;
+	rqos->next = q->rq_qos;
+	q->rq_qos = rqos;
+	spin_unlock_irq(&q->queue_lock);
+
+	blk_mq_unfreeze_queue(q);
+
+	if (rqos->ops->debugfs_attrs) {
+		mutex_lock(&q->debugfs_mutex);
+		blk_mq_debugfs_register_rqos(rqos);
+		mutex_unlock(&q->debugfs_mutex);
+	}
+
+	return 0;
+ebusy:
+	spin_unlock_irq(&q->queue_lock);
+	blk_mq_unfreeze_queue(q);
+	return -EBUSY;
+}
+
+void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+{
+	struct rq_qos **cur;
+
+	/*
+	 * See comment in rq_qos_add() about freezing queue & using
+	 * ->queue_lock.
+	 */
+	blk_mq_freeze_queue(q);
+
+	spin_lock_irq(&q->queue_lock);
+	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
+		if (*cur == rqos) {
+			*cur = rqos->next;
+			break;
+		}
+	}
+	spin_unlock_irq(&q->queue_lock);
+
+	blk_mq_unfreeze_queue(q);
+
+	mutex_lock(&q->debugfs_mutex);
+	blk_mq_debugfs_unregister_rqos(rqos);
+	mutex_unlock(&q->debugfs_mutex);
+}
+
 void rq_qos_exit(struct request_queue *q)
 {
 	while (q->rq_qos) {
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 1ef1f7d4bc3c..f2d95e19d7a8 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -85,69 +85,12 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
 	init_waitqueue_head(&rq_wait->wait);
 }
 
-static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
-{
-	/*
-	 * No IO can be in-flight when adding rqos, so freeze queue, which
-	 * is fine since we only support rq_qos for blk-mq queue.
-	 *
-	 * Reuse ->queue_lock for protecting against other concurrent
-	 * rq_qos adding/deleting
-	 */
-	blk_mq_freeze_queue(q);
-
-	spin_lock_irq(&q->queue_lock);
-	if (rq_qos_id(q, rqos->id))
-		goto ebusy;
-	rqos->next = q->rq_qos;
-	q->rq_qos = rqos;
-	spin_unlock_irq(&q->queue_lock);
-
-	blk_mq_unfreeze_queue(q);
-
-	if (rqos->ops->debugfs_attrs) {
-		mutex_lock(&q->debugfs_mutex);
-		blk_mq_debugfs_register_rqos(rqos);
-		mutex_unlock(&q->debugfs_mutex);
-	}
-
-	return 0;
-ebusy:
-	spin_unlock_irq(&q->queue_lock);
-	blk_mq_unfreeze_queue(q);
-	return -EBUSY;
-
-}
-
-static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
-{
-	struct rq_qos **cur;
-
-	/*
-	 * See comment in rq_qos_add() about freezing queue & using
-	 * ->queue_lock.
-	 */
-	blk_mq_freeze_queue(q);
-
-	spin_lock_irq(&q->queue_lock);
-	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
-		if (*cur == rqos) {
-			*cur = rqos->next;
-			break;
-		}
-	}
-	spin_unlock_irq(&q->queue_lock);
-
-	blk_mq_unfreeze_queue(q);
-
-	mutex_lock(&q->debugfs_mutex);
-	blk_mq_debugfs_unregister_rqos(rqos);
-	mutex_unlock(&q->debugfs_mutex);
-}
-
 typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
 typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
 
+int rq_qos_add(struct request_queue *q, struct rq_qos *rqos);
+void rq_qos_del(struct request_queue *q, struct rq_qos *rqos);
+void rq_qos_exit(struct request_queue *q);
 void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 		 acquire_inflight_cb_t *acquire_inflight_cb,
 		 cleanup_cb_t *cleanup_cb);
@@ -230,6 +173,4 @@ static inline void rq_qos_queue_depth_changed(struct request_queue *q)
 		__rq_qos_queue_depth_changed(q->rq_qos);
 }
 
-void rq_qos_exit(struct request_queue *);
-
 #endif
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH -next 2/4] block/rq_qos: factor out a helper to add rq_qos and activate policy
@ 2023-01-04  8:53   ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-04  8:53 UTC (permalink / raw)
  To: tj, hch, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang,
	yangerkun

From: Yu Kuai <yukuai3@huawei.com>

For the policy that use both rq_qos and blkcg_policy, rq_qos_add() and
blkcg_activate_policy() should be atomic, otherwise null-ptr-deference
can be triggered. This patch prepare to use a global mutex to protect
them, there are no functional changes.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-iocost.c    | 14 +-------------
 block/blk-iolatency.c |  7 +------
 block/blk-rq-qos.c    | 23 +++++++++++++++++++++++
 block/blk-rq-qos.h    |  6 ++++++
 4 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 6955605629e4..9199124f0cc2 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2883,23 +2883,11 @@ static int blk_iocost_init(struct gendisk *disk)
 	ioc_refresh_params(ioc, true);
 	spin_unlock_irq(&ioc->lock);
 
-	/*
-	 * rqos must be added before activation to allow ioc_pd_init() to
-	 * lookup the ioc from q. This means that the rqos methods may get
-	 * called before policy activation completion, can't assume that the
-	 * target bio has an iocg associated and need to test for NULL iocg.
-	 */
-	ret = rq_qos_add(q, rqos);
+	ret = rq_qos_add_and_activate_policy(q, rqos, &blkcg_policy_iocost);
 	if (ret)
 		goto err_free_ioc;
-
-	ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
-	if (ret)
-		goto err_del_qos;
 	return 0;
 
-err_del_qos:
-	rq_qos_del(q, rqos);
 err_free_ioc:
 	free_percpu(ioc->pcpu_stat);
 	kfree(ioc);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index ecdc10741836..a29b923e2a6a 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -771,20 +771,15 @@ int blk_iolatency_init(struct gendisk *disk)
 	rqos->ops = &blkcg_iolatency_ops;
 	rqos->q = q;
 
-	ret = rq_qos_add(q, rqos);
+	ret = rq_qos_add_and_activate_policy(q, rqos, &blkcg_policy_iolatency);
 	if (ret)
 		goto err_free;
-	ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
-	if (ret)
-		goto err_qos_del;
 
 	timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
 	INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
 
 	return 0;
 
-err_qos_del:
-	rq_qos_del(q, rqos);
 err_free:
 	kfree(blkiolat);
 	return ret;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index b6ea40775b2a..50544bfb12f1 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -353,3 +353,26 @@ void rq_qos_exit(struct request_queue *q)
 		rqos->ops->exit(rqos);
 	}
 }
+
+#ifdef CONFIG_BLK_CGROUP
+int rq_qos_add_and_activate_policy(struct request_queue *q, struct rq_qos *rqos,
+				   const struct blkcg_policy *pol)
+{
+	/*
+	 * rqos must be added before activation to allow pd_init_fn() to
+	 * lookup the global structure from q. This means that the rqos methods
+	 * may get called before policy activation completion, can't assume that
+	 * the target bio has an pd associated and need to test for NULL.
+	 */
+	int ret = rq_qos_add(q, rqos);
+
+	if (ret)
+		return ret;
+
+	ret = blkcg_activate_policy(q, pol);
+	if (ret)
+		rq_qos_del(q, rqos);
+
+	return ret;
+}
+#endif
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index f2d95e19d7a8..0778cff3777c 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -173,4 +173,10 @@ static inline void rq_qos_queue_depth_changed(struct request_queue *q)
 		__rq_qos_queue_depth_changed(q->rq_qos);
 }
 
+#ifdef CONFIG_BLK_CGROUP
+#include "blk-cgroup.h"
+int rq_qos_add_and_activate_policy(struct request_queue *q, struct rq_qos *rqos,
+				   const struct blkcg_policy *pol);
+#endif
+
 #endif
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH -next 2/4] block/rq_qos: factor out a helper to add rq_qos and activate policy
@ 2023-01-04  8:53   ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-04  8:53 UTC (permalink / raw)
  To: tj-DgEjT+Ai2ygdnm+yROfE0A, hch-wEGCiKHe2LqWVfeAwA7xHQ,
	josef-DigfWCa+lFGyeJad7bwFQA, axboe-tSWWG44O7X1aa/9Udqfwiw
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	yukuai3-hv44wF8Li93QT0dZR+AlfA, yukuai1-XF6JlduFytWkHkcT6e4Xnw,
	yi.zhang-hv44wF8Li93QT0dZR+AlfA,
	yangerkun-hv44wF8Li93QT0dZR+AlfA

From: Yu Kuai <yukuai3-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

For the policy that use both rq_qos and blkcg_policy, rq_qos_add() and
blkcg_activate_policy() should be atomic, otherwise null-ptr-deference
can be triggered. This patch prepare to use a global mutex to protect
them, there are no functional changes.

Signed-off-by: Yu Kuai <yukuai3-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 block/blk-iocost.c    | 14 +-------------
 block/blk-iolatency.c |  7 +------
 block/blk-rq-qos.c    | 23 +++++++++++++++++++++++
 block/blk-rq-qos.h    |  6 ++++++
 4 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 6955605629e4..9199124f0cc2 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2883,23 +2883,11 @@ static int blk_iocost_init(struct gendisk *disk)
 	ioc_refresh_params(ioc, true);
 	spin_unlock_irq(&ioc->lock);
 
-	/*
-	 * rqos must be added before activation to allow ioc_pd_init() to
-	 * lookup the ioc from q. This means that the rqos methods may get
-	 * called before policy activation completion, can't assume that the
-	 * target bio has an iocg associated and need to test for NULL iocg.
-	 */
-	ret = rq_qos_add(q, rqos);
+	ret = rq_qos_add_and_activate_policy(q, rqos, &blkcg_policy_iocost);
 	if (ret)
 		goto err_free_ioc;
-
-	ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
-	if (ret)
-		goto err_del_qos;
 	return 0;
 
-err_del_qos:
-	rq_qos_del(q, rqos);
 err_free_ioc:
 	free_percpu(ioc->pcpu_stat);
 	kfree(ioc);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index ecdc10741836..a29b923e2a6a 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -771,20 +771,15 @@ int blk_iolatency_init(struct gendisk *disk)
 	rqos->ops = &blkcg_iolatency_ops;
 	rqos->q = q;
 
-	ret = rq_qos_add(q, rqos);
+	ret = rq_qos_add_and_activate_policy(q, rqos, &blkcg_policy_iolatency);
 	if (ret)
 		goto err_free;
-	ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
-	if (ret)
-		goto err_qos_del;
 
 	timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
 	INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
 
 	return 0;
 
-err_qos_del:
-	rq_qos_del(q, rqos);
 err_free:
 	kfree(blkiolat);
 	return ret;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index b6ea40775b2a..50544bfb12f1 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -353,3 +353,26 @@ void rq_qos_exit(struct request_queue *q)
 		rqos->ops->exit(rqos);
 	}
 }
+
+#ifdef CONFIG_BLK_CGROUP
+int rq_qos_add_and_activate_policy(struct request_queue *q, struct rq_qos *rqos,
+				   const struct blkcg_policy *pol)
+{
+	/*
+	 * rqos must be added before activation to allow pd_init_fn() to
+	 * lookup the global structure from q. This means that the rqos methods
+	 * may get called before policy activation completion, can't assume that
+	 * the target bio has an pd associated and need to test for NULL.
+	 */
+	int ret = rq_qos_add(q, rqos);
+
+	if (ret)
+		return ret;
+
+	ret = blkcg_activate_policy(q, pol);
+	if (ret)
+		rq_qos_del(q, rqos);
+
+	return ret;
+}
+#endif
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index f2d95e19d7a8..0778cff3777c 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -173,4 +173,10 @@ static inline void rq_qos_queue_depth_changed(struct request_queue *q)
 		__rq_qos_queue_depth_changed(q->rq_qos);
 }
 
+#ifdef CONFIG_BLK_CGROUP
+#include "blk-cgroup.h"
+int rq_qos_add_and_activate_policy(struct request_queue *q, struct rq_qos *rqos,
+				   const struct blkcg_policy *pol);
+#endif
+
 #endif
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
  2023-01-04  8:53 [PATCH -next 0/4] block/rq_qos: protect rq_qos apis with global mutex Yu Kuai
  2023-01-04  8:53 ` [PATCH -next 1/4] block/rq_qos: move implementions of init/exit rq-qos apis to blk-rq-qos.c Yu Kuai
  2023-01-04  8:53   ` Yu Kuai
@ 2023-01-04  8:53 ` Yu Kuai
  2023-01-04 21:39   ` Tejun Heo
  2023-01-04  8:53   ` Yu Kuai
  3 siblings, 1 reply; 21+ messages in thread
From: Yu Kuai @ 2023-01-04  8:53 UTC (permalink / raw)
  To: tj, hch, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang,
	yangerkun

From: Yu Kuai <yukuai3@huawei.com>

This patch fix following problems:

1) rq_qos_add() and rq_qos_del() is protected, while rq_qos_exit() is
   not.
2) rq_qos_add() and blkcg_activate_policy() is not atomic, if
   rq_qos_exit() is done before blkcg_activate_policy(),
   null-ptr-deference can be triggered.

rq_qos_add(), rq_qos_del() and rq_qos_exit() are super cold path, hence
fix the problems by using a global mutex to protect them.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-rq-qos.c | 50 ++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 50544bfb12f1..5f7ccc249c11 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -2,6 +2,8 @@
 
 #include "blk-rq-qos.h"
 
+static DEFINE_MUTEX(rq_qos_lock);
+
 /*
  * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
  * false if 'v' + 1 would be bigger than 'below'.
@@ -286,23 +288,18 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 	finish_wait(&rqw->wait, &data.wq);
 }
 
-int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+static int __rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 {
 	/*
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
 	 * is fine since we only support rq_qos for blk-mq queue.
-	 *
-	 * Reuse ->queue_lock for protecting against other concurrent
-	 * rq_qos adding/deleting
 	 */
 	blk_mq_freeze_queue(q);
 
-	spin_lock_irq(&q->queue_lock);
 	if (rq_qos_id(q, rqos->id))
 		goto ebusy;
 	rqos->next = q->rq_qos;
 	q->rq_qos = rqos;
-	spin_unlock_irq(&q->queue_lock);
 
 	blk_mq_unfreeze_queue(q);
 
@@ -314,29 +311,23 @@ int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 
 	return 0;
 ebusy:
-	spin_unlock_irq(&q->queue_lock);
 	blk_mq_unfreeze_queue(q);
 	return -EBUSY;
 }
 
-void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+static void __rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
 {
 	struct rq_qos **cur;
 
-	/*
-	 * See comment in rq_qos_add() about freezing queue & using
-	 * ->queue_lock.
-	 */
+	/* See comment in __rq_qos_add() about freezing queue */
 	blk_mq_freeze_queue(q);
 
-	spin_lock_irq(&q->queue_lock);
 	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
 		if (*cur == rqos) {
 			*cur = rqos->next;
 			break;
 		}
 	}
-	spin_unlock_irq(&q->queue_lock);
 
 	blk_mq_unfreeze_queue(q);
 
@@ -345,13 +336,33 @@ void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
 	mutex_unlock(&q->debugfs_mutex);
 }
 
+int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+{
+	int ret;
+
+	mutex_lock(&rq_qos_lock);
+	ret = __rq_qos_add(q, rqos);
+	mutex_unlock(&rq_qos_lock);
+
+	return ret;
+}
+
+void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+{
+	mutex_lock(&rq_qos_lock);
+	__rq_qos_del(q, rqos);
+	mutex_unlock(&rq_qos_lock);
+}
+
 void rq_qos_exit(struct request_queue *q)
 {
+	mutex_lock(&rq_qos_lock);
 	while (q->rq_qos) {
 		struct rq_qos *rqos = q->rq_qos;
 		q->rq_qos = rqos->next;
 		rqos->ops->exit(rqos);
 	}
+	mutex_unlock(&rq_qos_lock);
 }
 
 #ifdef CONFIG_BLK_CGROUP
@@ -364,15 +375,20 @@ int rq_qos_add_and_activate_policy(struct request_queue *q, struct rq_qos *rqos,
 	 * may get called before policy activation completion, can't assume that
 	 * the target bio has an pd associated and need to test for NULL.
 	 */
-	int ret = rq_qos_add(q, rqos);
+	int ret;
 
-	if (ret)
+	mutex_lock(&rq_qos_lock);
+	ret = __rq_qos_add(q, rqos);
+	if (ret) {
+		mutex_unlock(&rq_qos_lock);
 		return ret;
+	}
 
 	ret = blkcg_activate_policy(q, pol);
 	if (ret)
-		rq_qos_del(q, rqos);
+		__rq_qos_del(q, rqos);
 
+	mutex_unlock(&rq_qos_lock);
 	return ret;
 }
 #endif
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH -next 4/4] block/rq_qos: fail rq_qos_add() after rq_qos_exit()
@ 2023-01-04  8:53   ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-04  8:53 UTC (permalink / raw)
  To: tj, hch, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang,
	yangerkun

From: Yu Kuai <yukuai3@huawei.com>

rq_qos_add() can still succeed after rq_qos_exit() is done, which will
cause memory leak because such rq_qos will never be removed.

t1                      t2
// configure iocost
blk_iocost_init
			//remove device
			del_gendisk
			 rq_qos_exit
			 // done nothing because rq_qos doesn't exist
 rq_qos_add
 // will succeed, and rq_qos won't be removed

Fix the problem by setting q->rq_qos to a special value in
rq_qos_exit(), and check the value in rq_qos_add().

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-rq-qos.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 5f7ccc249c11..cfd8024ff6e8 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -290,6 +290,10 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 
 static int __rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 {
+	/* See details in rq_qos_exit() for this specail value. */
+	if (IS_ERR(q->rq_qos))
+		return PTR_ERR(q->rq_qos);
+
 	/*
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
 	 * is fine since we only support rq_qos for blk-mq queue.
@@ -356,12 +360,22 @@ void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
 
 void rq_qos_exit(struct request_queue *q)
 {
+	struct rq_qos *rqos;
+
 	mutex_lock(&rq_qos_lock);
-	while (q->rq_qos) {
-		struct rq_qos *rqos = q->rq_qos;
-		q->rq_qos = rqos->next;
+	rqos = q->rq_qos;
+
+	/*
+	 * Set q->rq_qos to a special value to make sure rq_qos_add() will fail
+	 * after rq_qos_exit().
+	 */
+	q->rq_qos = ERR_PTR(-ENODEV);
+
+	while (rqos) {
 		rqos->ops->exit(rqos);
+		rqos = rqos->next;
 	}
+
 	mutex_unlock(&rq_qos_lock);
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH -next 4/4] block/rq_qos: fail rq_qos_add() after rq_qos_exit()
@ 2023-01-04  8:53   ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-04  8:53 UTC (permalink / raw)
  To: tj-DgEjT+Ai2ygdnm+yROfE0A, hch-wEGCiKHe2LqWVfeAwA7xHQ,
	josef-DigfWCa+lFGyeJad7bwFQA, axboe-tSWWG44O7X1aa/9Udqfwiw
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	yukuai3-hv44wF8Li93QT0dZR+AlfA, yukuai1-XF6JlduFytWkHkcT6e4Xnw,
	yi.zhang-hv44wF8Li93QT0dZR+AlfA,
	yangerkun-hv44wF8Li93QT0dZR+AlfA

From: Yu Kuai <yukuai3-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

rq_qos_add() can still succeed after rq_qos_exit() is done, which will
cause memory leak because such rq_qos will never be removed.

t1                      t2
// configure iocost
blk_iocost_init
			//remove device
			del_gendisk
			 rq_qos_exit
			 // done nothing because rq_qos doesn't exist
 rq_qos_add
 // will succeed, and rq_qos won't be removed

Fix the problem by setting q->rq_qos to a special value in
rq_qos_exit(), and check the value in rq_qos_add().

Signed-off-by: Yu Kuai <yukuai3-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 block/blk-rq-qos.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 5f7ccc249c11..cfd8024ff6e8 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -290,6 +290,10 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 
 static int __rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 {
+	/* See details in rq_qos_exit() for this specail value. */
+	if (IS_ERR(q->rq_qos))
+		return PTR_ERR(q->rq_qos);
+
 	/*
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
 	 * is fine since we only support rq_qos for blk-mq queue.
@@ -356,12 +360,22 @@ void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
 
 void rq_qos_exit(struct request_queue *q)
 {
+	struct rq_qos *rqos;
+
 	mutex_lock(&rq_qos_lock);
-	while (q->rq_qos) {
-		struct rq_qos *rqos = q->rq_qos;
-		q->rq_qos = rqos->next;
+	rqos = q->rq_qos;
+
+	/*
+	 * Set q->rq_qos to a special value to make sure rq_qos_add() will fail
+	 * after rq_qos_exit().
+	 */
+	q->rq_qos = ERR_PTR(-ENODEV);
+
+	while (rqos) {
 		rqos->ops->exit(rqos);
+		rqos = rqos->next;
 	}
+
 	mutex_unlock(&rq_qos_lock);
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 1/4] block/rq_qos: move implementions of init/exit rq-qos apis to blk-rq-qos.c
@ 2023-01-04 21:30     ` Tejun Heo
  0 siblings, 0 replies; 21+ messages in thread
From: Tejun Heo @ 2023-01-04 21:30 UTC (permalink / raw)
  To: Yu Kuai
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yukuai3,
	yi.zhang, yangerkun

On Wed, Jan 04, 2023 at 04:53:51PM +0800, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> These init/exit rq-qos apis are super cold path, there is no need to
> inline them to improve performance. This patch also prepare to use a
> global mutex to protect these apis, move these implementions to
> blk-rq-qos.c so that the global mutex won't be exposed. There are no
> functional changes.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>

Acked-by: Tejun Heo <tj@kernel.org>

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 1/4] block/rq_qos: move implementions of init/exit rq-qos apis to blk-rq-qos.c
@ 2023-01-04 21:30     ` Tejun Heo
  0 siblings, 0 replies; 21+ messages in thread
From: Tejun Heo @ 2023-01-04 21:30 UTC (permalink / raw)
  To: Yu Kuai
  Cc: hch-wEGCiKHe2LqWVfeAwA7xHQ, josef-DigfWCa+lFGyeJad7bwFQA,
	axboe-tSWWG44O7X1aa/9Udqfwiw, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	yukuai3-hv44wF8Li93QT0dZR+AlfA, yi.zhang-hv44wF8Li93QT0dZR+AlfA,
	yangerkun-hv44wF8Li93QT0dZR+AlfA

On Wed, Jan 04, 2023 at 04:53:51PM +0800, Yu Kuai wrote:
> From: Yu Kuai <yukuai3-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
> 
> These init/exit rq-qos apis are super cold path, there is no need to
> inline them to improve performance. This patch also prepare to use a
> global mutex to protect these apis, move these implementions to
> blk-rq-qos.c so that the global mutex won't be exposed. There are no
> functional changes.
> 
> Signed-off-by: Yu Kuai <yukuai3-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

Acked-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
  2023-01-04  8:53 ` [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis Yu Kuai
@ 2023-01-04 21:39   ` Tejun Heo
  2023-01-05  0:28     ` Tejun Heo
  0 siblings, 1 reply; 21+ messages in thread
From: Tejun Heo @ 2023-01-04 21:39 UTC (permalink / raw)
  To: Yu Kuai
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yukuai3,
	yi.zhang, yangerkun

On Wed, Jan 04, 2023 at 04:53:53PM +0800, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> This patch fix following problems:
> 
> 1) rq_qos_add() and rq_qos_del() is protected, while rq_qos_exit() is
>    not.

This part makes sense.

> 2) rq_qos_add() and blkcg_activate_policy() is not atomic, if
>    rq_qos_exit() is done before blkcg_activate_policy(),
>    null-ptr-deference can be triggered.

I'm not sure this part does. I think it'd be better to guarantee that device
destruction is blocked while these configuration operations are in progress
which can be built into blkg_conf helpers.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
  2023-01-04 21:39   ` Tejun Heo
@ 2023-01-05  0:28     ` Tejun Heo
  2023-01-05  1:35         ` Yu Kuai
  0 siblings, 1 reply; 21+ messages in thread
From: Tejun Heo @ 2023-01-05  0:28 UTC (permalink / raw)
  To: Yu Kuai
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yukuai3,
	yi.zhang, yangerkun

Hello, again.

On Wed, Jan 04, 2023 at 11:39:47AM -1000, Tejun Heo wrote:
> > 2) rq_qos_add() and blkcg_activate_policy() is not atomic, if
> >    rq_qos_exit() is done before blkcg_activate_policy(),
> >    null-ptr-deference can be triggered.
> 
> I'm not sure this part does. I think it'd be better to guarantee that device
> destruction is blocked while these configuration operations are in progress
> which can be built into blkg_conf helpers.

A bit more explanation:

Usually, this would be handled in the core - when a device goes away, its
sysfs files get shut down before stuff gets freed and the sysfs file removal
waits for in-flight operations to finish and prevents new ones from
starting, so we don't have to worry about in-flight config file operations
racing against device removal.

Here, the problem isn't solved by that because the config files live on
cgroupfs and their lifetimes are not coupled with the block devices'. So, we
need to synchronize manually. And, given that, the right place to do is the
blkg config helpers cuz they're the ones which establish the connection
between cgroup and block layer.

Can you please take a look at the following patchset I just posted:

  https://lkml.kernel.org/r/20230105002007.157497-1-tj@kernel.org

After that, all these configuration operations are wrapped between
blkg_conf_init() and blkg_conf_exit() which probably are the right place to
implement the synchronization.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
@ 2023-01-05  1:35         ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-05  1:35 UTC (permalink / raw)
  To: Tejun Heo, Yu Kuai
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yi.zhang,
	yangerkun, yukuai (C)

Hi,

在 2023/01/05 8:28, Tejun Heo 写道:
> Hello, again.
> 
> On Wed, Jan 04, 2023 at 11:39:47AM -1000, Tejun Heo wrote:
>>> 2) rq_qos_add() and blkcg_activate_policy() is not atomic, if
>>>     rq_qos_exit() is done before blkcg_activate_policy(),
>>>     null-ptr-deference can be triggered.
>>
>> I'm not sure this part does. I think it'd be better to guarantee that device
>> destruction is blocked while these configuration operations are in progress
>> which can be built into blkg_conf helpers.
> 
> A bit more explanation:
> 
> Usually, this would be handled in the core - when a device goes away, its
> sysfs files get shut down before stuff gets freed and the sysfs file removal
> waits for in-flight operations to finish and prevents new ones from
> starting, so we don't have to worry about in-flight config file operations
> racing against device removal.
> 
> Here, the problem isn't solved by that because the config files live on
> cgroupfs and their lifetimes are not coupled with the block devices'. So, we
> need to synchronize manually. And, given that, the right place to do is the
> blkg config helpers cuz they're the ones which establish the connection
> between cgroup and block layer.

Thanks for the explanation, I agree with that.
> 
> Can you please take a look at the following patchset I just posted:
> 
>    https://lkml.kernel.org/r/20230105002007.157497-1-tj@kernel.org
> 
> After that, all these configuration operations are wrapped between
> blkg_conf_init() and blkg_conf_exit() which probably are the right place to
> implement the synchronization.

I see that, blkg_conf_init() and blkg_conf_exit() is good, however there
are some details I want to confirm:

1) rq_qos_add() can be called from iocost/iolatency, where
blkg_conf_init() will be called first, while rq_qos_add() can also be
called from wbt, where there is no blkg_conf_init(). Hence it seems to
me we need two locks here, one to protect rq_qos apis; one to
synchronize policy configuration and device removal.

2) If you agree with 1), it seems better to use the other lock in device
level, consider that there is no need to synchronize confituration for
different devices.

Thanks,
Kuai
> 
> Thanks.
> 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
@ 2023-01-05  1:35         ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-05  1:35 UTC (permalink / raw)
  To: Tejun Heo, Yu Kuai
  Cc: hch-wEGCiKHe2LqWVfeAwA7xHQ, josef-DigfWCa+lFGyeJad7bwFQA,
	axboe-tSWWG44O7X1aa/9Udqfwiw, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	yi.zhang-hv44wF8Li93QT0dZR+AlfA,
	yangerkun-hv44wF8Li93QT0dZR+AlfA, yukuai (C)

Hi,

ÔÚ 2023/01/05 8:28, Tejun Heo дµÀ:
> Hello, again.
> 
> On Wed, Jan 04, 2023 at 11:39:47AM -1000, Tejun Heo wrote:
>>> 2) rq_qos_add() and blkcg_activate_policy() is not atomic, if
>>>     rq_qos_exit() is done before blkcg_activate_policy(),
>>>     null-ptr-deference can be triggered.
>>
>> I'm not sure this part does. I think it'd be better to guarantee that device
>> destruction is blocked while these configuration operations are in progress
>> which can be built into blkg_conf helpers.
> 
> A bit more explanation:
> 
> Usually, this would be handled in the core - when a device goes away, its
> sysfs files get shut down before stuff gets freed and the sysfs file removal
> waits for in-flight operations to finish and prevents new ones from
> starting, so we don't have to worry about in-flight config file operations
> racing against device removal.
> 
> Here, the problem isn't solved by that because the config files live on
> cgroupfs and their lifetimes are not coupled with the block devices'. So, we
> need to synchronize manually. And, given that, the right place to do is the
> blkg config helpers cuz they're the ones which establish the connection
> between cgroup and block layer.

Thanks for the explanation, I agree with that.
> 
> Can you please take a look at the following patchset I just posted:
> 
>    https://lkml.kernel.org/r/20230105002007.157497-1-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> 
> After that, all these configuration operations are wrapped between
> blkg_conf_init() and blkg_conf_exit() which probably are the right place to
> implement the synchronization.

I see that, blkg_conf_init() and blkg_conf_exit() is good, however there
are some details I want to confirm:

1) rq_qos_add() can be called from iocost/iolatency, where
blkg_conf_init() will be called first, while rq_qos_add() can also be
called from wbt, where there is no blkg_conf_init(). Hence it seems to
me we need two locks here, one to protect rq_qos apis; one to
synchronize policy configuration and device removal.

2) If you agree with 1), it seems better to use the other lock in device
level, consider that there is no need to synchronize confituration for
different devices.

Thanks,
Kuai
> 
> Thanks.
> 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
  2023-01-05  1:35         ` Yu Kuai
  (?)
@ 2023-01-05 18:34         ` Tejun Heo
  2023-01-06  1:33             ` Yu Kuai
  -1 siblings, 1 reply; 21+ messages in thread
From: Tejun Heo @ 2023-01-05 18:34 UTC (permalink / raw)
  To: Yu Kuai
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yi.zhang,
	yangerkun, yukuai (C)

Hello,

On Thu, Jan 05, 2023 at 09:35:21AM +0800, Yu Kuai wrote:
> > Can you please take a look at the following patchset I just posted:
> > 
> >    https://lkml.kernel.org/r/20230105002007.157497-1-tj@kernel.org
> > 
> > After that, all these configuration operations are wrapped between
> > blkg_conf_init() and blkg_conf_exit() which probably are the right place to
> > implement the synchronization.
> 
> I see that, blkg_conf_init() and blkg_conf_exit() is good, however there
> are some details I want to confirm:
> 
> 1) rq_qos_add() can be called from iocost/iolatency, where
> blkg_conf_init() will be called first, while rq_qos_add() can also be
> called from wbt, where there is no blkg_conf_init(). Hence it seems to
> me we need two locks here, one to protect rq_qos apis; one to
> synchronize policy configuration and device removal.

wbt's lazy init is tied to one of the block device sysfs files, right? So,
it *should* already be protected against device removal.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
@ 2023-01-06  1:33             ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-06  1:33 UTC (permalink / raw)
  To: Tejun Heo, Yu Kuai
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yi.zhang,
	yangerkun, yukuai (C)

Hi,

在 2023/01/06 2:34, Tejun Heo 写道:
> Hello,
> 
> On Thu, Jan 05, 2023 at 09:35:21AM +0800, Yu Kuai wrote:
>>> Can you please take a look at the following patchset I just posted:
>>>
>>>     https://lkml.kernel.org/r/20230105002007.157497-1-tj@kernel.org
>>>
>>> After that, all these configuration operations are wrapped between
>>> blkg_conf_init() and blkg_conf_exit() which probably are the right place to
>>> implement the synchronization.
>>
>> I see that, blkg_conf_init() and blkg_conf_exit() is good, however there
>> are some details I want to confirm:
>>
>> 1) rq_qos_add() can be called from iocost/iolatency, where
>> blkg_conf_init() will be called first, while rq_qos_add() can also be
>> called from wbt, where there is no blkg_conf_init(). Hence it seems to
>> me we need two locks here, one to protect rq_qos apis; one to
>> synchronize policy configuration and device removal.
> 
> wbt's lazy init is tied to one of the block device sysfs files, right? So,
> it *should* already be protected against device removal.

That seems not true, I don't think q->sysfs_lock can protect that,
consider that queue_wb_lat_store() doesn't check if del_gendisk() is
called or not:

t1: wbt lazy init		t2: remove device
queue_attr_store
				del_gendisk
				blk_unregister_queue
				 mutex_lock(&q->sysfs_lock)
			         ...
				 mutex_unlock(&q->sysfs_lock);
				rq_qos_exit
  mutex_lock(&q->sysfs_lock);
   queue_wb_lat_store
   wbt_init
    rq_qos_add
  mutex_unlock(&q->sysfs_lock);

I tried to comfirm that by adding following delay:

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 93d9e9c9a6ea..101c33cb0a2b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -11,6 +11,7 @@
  #include <linux/blktrace_api.h>
  #include <linux/blk-mq.h>
  #include <linux/debugfs.h>
+#include <linux/delay.h>

  #include "blk.h"
  #include "blk-mq.h"
@@ -734,6 +735,8 @@ queue_attr_store(struct kobject *kobj, struct 
attribute *attr,
         if (!entry->store)
                 return -EIO;

+       msleep(10000);
+
         mutex_lock(&q->sysfs_lock);
         res = entry->store(q, page, length);
         mutex_unlock(&q->sysfs_lock);

And then do the following test:

1) echo 10000 > /sys/block/sdb/queue/wbt_lat_usec &
2) echo 1 > /sys/block/sda/device/delete

Then, following bug is triggered:

[   51.923642] BUG: unable to handle page fault for address: 
ffffffffffffffed
[   51.924294] #PF: supervisor read access in kernel mode
[   51.924773] #PF: error_code(0x0000) - not-present page
[   51.925252] PGD 1820b067 P4D 1820b067 PUD 1820d067 PMD 0
[   51.925754] Oops: 0000 [#1] PREEMPT SMP
[   51.926123] CPU: 1 PID: 539 Comm: bash Tainted: G        W 
6.2.0-rc1-next-202212267
[   51.927124] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS ?-20190727_073836-b4
[   51.928334] RIP: 0010:__rq_qos_issue+0x30/0x60
[   51.928761] Code: 48 89 f5 53 48 89 fb 48 83 05 eb eb c9 0b 01 eb 19 
48 89 df 48 83 05 e6 e9
[   51.930426] RSP: 0018:ffffc90000c3b9b0 EFLAGS: 00010206
[   51.930905] RAX: 0000000000000000 RBX: ffffffffffffffed RCX: 
0000000000000017
[   51.931554] RDX: 000007c329800000 RSI: ffff8881022c0380 RDI: 
ffffffffffffffed
[   51.932197] RBP: ffff8881022c0380 R08: 0000000c385056e3 R09: 
ffff8881022c05c8
[   51.932841] R10: 0000000000000000 R11: ffff888100a94000 R12: 
ffff888102145000
[   51.933488] R13: 0000000000000000 R14: ffff888100a94000 R15: 
ffff8881022c04a0
[   51.934140] FS:  00007fd23def9700(0000) GS:ffff88813bd00000(0000) 
knlGS:0000000000000000
[   51.934856] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   51.935379] CR2: ffffffffffffffed CR3: 0000000106fff000 CR4: 
00000000000006e0
[   51.936036] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 
0000000000000000
[   51.936675] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 
0000000000000400
[   51.937315] Call Trace:
[   51.937545]  <TASK>
[   51.937749]  blk_mq_start_request+0x1d1/0x240
[   51.938151]  scsi_queue_rq+0x347/0x1190
[   51.938513]  blk_mq_dispatch_rq_list+0x366/0xef0
[   51.938938]  ? tick_nohz_tick_stopped+0x1a/0x40
[   51.939356]  ? __irq_work_queue_local+0x59/0xd0
[   51.939769]  ? __sbitmap_get_word+0x3b/0xb0
[   51.940153]  __blk_mq_sched_dispatch_requests+0xdd/0x210
[   51.940633]  blk_mq_sched_dispatch_requests+0x38/0xa0
[   51.941089]  __blk_mq_run_hw_queue+0xca/0x110
[   51.941483]  __blk_mq_delay_run_hw_queue+0x1fc/0x210
[   51.941931]  blk_mq_run_hw_queue+0x15c/0x1d0
[   51.942327]  blk_mq_sched_insert_request+0x9c/0x210
[   51.942769]  blk_execute_rq+0xec/0x290
[   51.943121]  __scsi_execute+0x131/0x310
[   51.943492]  sd_sync_cache+0xc6/0x280
[   51.943831]  sd_shutdown+0x7f/0x180
[   51.944155]  sd_remove+0x53/0x80
[   51.944457]  device_remove+0x80/0xa0
[   51.944785]  device_release_driver_internal+0x131/0x270
[   51.945257]  device_release_driver+0x16/0x20
[   51.945643]  bus_remove_device+0x135/0x200

Thanks,
Kuai
> 
> Thanks.
> 


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
@ 2023-01-06  1:33             ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-06  1:33 UTC (permalink / raw)
  To: Tejun Heo, Yu Kuai
  Cc: hch-wEGCiKHe2LqWVfeAwA7xHQ, josef-DigfWCa+lFGyeJad7bwFQA,
	axboe-tSWWG44O7X1aa/9Udqfwiw, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	yi.zhang-hv44wF8Li93QT0dZR+AlfA,
	yangerkun-hv44wF8Li93QT0dZR+AlfA, yukuai (C)

Hi,

ÔÚ 2023/01/06 2:34, Tejun Heo дµÀ:
> Hello,
> 
> On Thu, Jan 05, 2023 at 09:35:21AM +0800, Yu Kuai wrote:
>>> Can you please take a look at the following patchset I just posted:
>>>
>>>     https://lkml.kernel.org/r/20230105002007.157497-1-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
>>>
>>> After that, all these configuration operations are wrapped between
>>> blkg_conf_init() and blkg_conf_exit() which probably are the right place to
>>> implement the synchronization.
>>
>> I see that, blkg_conf_init() and blkg_conf_exit() is good, however there
>> are some details I want to confirm:
>>
>> 1) rq_qos_add() can be called from iocost/iolatency, where
>> blkg_conf_init() will be called first, while rq_qos_add() can also be
>> called from wbt, where there is no blkg_conf_init(). Hence it seems to
>> me we need two locks here, one to protect rq_qos apis; one to
>> synchronize policy configuration and device removal.
> 
> wbt's lazy init is tied to one of the block device sysfs files, right? So,
> it *should* already be protected against device removal.

That seems not true, I don't think q->sysfs_lock can protect that,
consider that queue_wb_lat_store() doesn't check if del_gendisk() is
called or not:

t1: wbt lazy init		t2: remove device
queue_attr_store
				del_gendisk
				blk_unregister_queue
				 mutex_lock(&q->sysfs_lock)
			         ...
				 mutex_unlock(&q->sysfs_lock);
				rq_qos_exit
  mutex_lock(&q->sysfs_lock);
   queue_wb_lat_store
   wbt_init
    rq_qos_add
  mutex_unlock(&q->sysfs_lock);

I tried to comfirm that by adding following delay:

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 93d9e9c9a6ea..101c33cb0a2b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -11,6 +11,7 @@
  #include <linux/blktrace_api.h>
  #include <linux/blk-mq.h>
  #include <linux/debugfs.h>
+#include <linux/delay.h>

  #include "blk.h"
  #include "blk-mq.h"
@@ -734,6 +735,8 @@ queue_attr_store(struct kobject *kobj, struct 
attribute *attr,
         if (!entry->store)
                 return -EIO;

+       msleep(10000);
+
         mutex_lock(&q->sysfs_lock);
         res = entry->store(q, page, length);
         mutex_unlock(&q->sysfs_lock);

And then do the following test:

1) echo 10000 > /sys/block/sdb/queue/wbt_lat_usec &
2) echo 1 > /sys/block/sda/device/delete

Then, following bug is triggered:

[   51.923642] BUG: unable to handle page fault for address: 
ffffffffffffffed
[   51.924294] #PF: supervisor read access in kernel mode
[   51.924773] #PF: error_code(0x0000) - not-present page
[   51.925252] PGD 1820b067 P4D 1820b067 PUD 1820d067 PMD 0
[   51.925754] Oops: 0000 [#1] PREEMPT SMP
[   51.926123] CPU: 1 PID: 539 Comm: bash Tainted: G        W 
6.2.0-rc1-next-202212267
[   51.927124] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS ?-20190727_073836-b4
[   51.928334] RIP: 0010:__rq_qos_issue+0x30/0x60
[   51.928761] Code: 48 89 f5 53 48 89 fb 48 83 05 eb eb c9 0b 01 eb 19 
48 89 df 48 83 05 e6 e9
[   51.930426] RSP: 0018:ffffc90000c3b9b0 EFLAGS: 00010206
[   51.930905] RAX: 0000000000000000 RBX: ffffffffffffffed RCX: 
0000000000000017
[   51.931554] RDX: 000007c329800000 RSI: ffff8881022c0380 RDI: 
ffffffffffffffed
[   51.932197] RBP: ffff8881022c0380 R08: 0000000c385056e3 R09: 
ffff8881022c05c8
[   51.932841] R10: 0000000000000000 R11: ffff888100a94000 R12: 
ffff888102145000
[   51.933488] R13: 0000000000000000 R14: ffff888100a94000 R15: 
ffff8881022c04a0
[   51.934140] FS:  00007fd23def9700(0000) GS:ffff88813bd00000(0000) 
knlGS:0000000000000000
[   51.934856] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   51.935379] CR2: ffffffffffffffed CR3: 0000000106fff000 CR4: 
00000000000006e0
[   51.936036] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 
0000000000000000
[   51.936675] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 
0000000000000400
[   51.937315] Call Trace:
[   51.937545]  <TASK>
[   51.937749]  blk_mq_start_request+0x1d1/0x240
[   51.938151]  scsi_queue_rq+0x347/0x1190
[   51.938513]  blk_mq_dispatch_rq_list+0x366/0xef0
[   51.938938]  ? tick_nohz_tick_stopped+0x1a/0x40
[   51.939356]  ? __irq_work_queue_local+0x59/0xd0
[   51.939769]  ? __sbitmap_get_word+0x3b/0xb0
[   51.940153]  __blk_mq_sched_dispatch_requests+0xdd/0x210
[   51.940633]  blk_mq_sched_dispatch_requests+0x38/0xa0
[   51.941089]  __blk_mq_run_hw_queue+0xca/0x110
[   51.941483]  __blk_mq_delay_run_hw_queue+0x1fc/0x210
[   51.941931]  blk_mq_run_hw_queue+0x15c/0x1d0
[   51.942327]  blk_mq_sched_insert_request+0x9c/0x210
[   51.942769]  blk_execute_rq+0xec/0x290
[   51.943121]  __scsi_execute+0x131/0x310
[   51.943492]  sd_sync_cache+0xc6/0x280
[   51.943831]  sd_shutdown+0x7f/0x180
[   51.944155]  sd_remove+0x53/0x80
[   51.944457]  device_remove+0x80/0xa0
[   51.944785]  device_release_driver_internal+0x131/0x270
[   51.945257]  device_release_driver+0x16/0x20
[   51.945643]  bus_remove_device+0x135/0x200

Thanks,
Kuai
> 
> Thanks.
> 


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
  2023-01-06  1:33             ` Yu Kuai
  (?)
@ 2023-01-06 18:23             ` Tejun Heo
  2023-01-09  1:38                 ` Yu Kuai
  -1 siblings, 1 reply; 21+ messages in thread
From: Tejun Heo @ 2023-01-06 18:23 UTC (permalink / raw)
  To: Yu Kuai
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yi.zhang,
	yangerkun, yukuai (C)

Hello,

On Fri, Jan 06, 2023 at 09:33:26AM +0800, Yu Kuai wrote:
> > wbt's lazy init is tied to one of the block device sysfs files, right? So,
> > it *should* already be protected against device removal.
> 
> That seems not true, I don't think q->sysfs_lock can protect that,
> consider that queue_wb_lat_store() doesn't check if del_gendisk() is
> called or not:
> 
> t1: wbt lazy init		t2: remove device
> queue_attr_store
> 				del_gendisk
> 				blk_unregister_queue
> 				 mutex_lock(&q->sysfs_lock)
> 			         ...
> 				 mutex_unlock(&q->sysfs_lock);
> 				rq_qos_exit
>  mutex_lock(&q->sysfs_lock);
>   queue_wb_lat_store
>   wbt_init
>    rq_qos_add
>  mutex_unlock(&q->sysfs_lock);

So, it's not sysfs_lock but sysfs file deletion. When a kernfs, which backs
sysfs, file is removed, it disables future operations and drains all
inflight ones before returning, so you remove the interface files before
cleaning up the object that it interacts with, you don't have to worry about
racing against file operations as none can be in flight at that point.

> I tried to comfirm that by adding following delay:
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 93d9e9c9a6ea..101c33cb0a2b 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -11,6 +11,7 @@
>  #include <linux/blktrace_api.h>
>  #include <linux/blk-mq.h>
>  #include <linux/debugfs.h>
> +#include <linux/delay.h>
> 
>  #include "blk.h"
>  #include "blk-mq.h"
> @@ -734,6 +735,8 @@ queue_attr_store(struct kobject *kobj, struct attribute
> *attr,
>         if (!entry->store)
>                 return -EIO;
> 
> +       msleep(10000);
> +
>         mutex_lock(&q->sysfs_lock);
>         res = entry->store(q, page, length);
>         mutex_unlock(&q->sysfs_lock);
> 
> And then do the following test:
> 
> 1) echo 10000 > /sys/block/sdb/queue/wbt_lat_usec &
> 2) echo 1 > /sys/block/sda/device/delete
> 
> Then, following bug is triggered:
> 
> [   51.923642] BUG: unable to handle page fault for address:
> ffffffffffffffed
> [   51.924294] #PF: supervisor read access in kernel mode
> [   51.924773] #PF: error_code(0x0000) - not-present page
> [   51.925252] PGD 1820b067 P4D 1820b067 PUD 1820d067 PMD 0
> [   51.925754] Oops: 0000 [#1] PREEMPT SMP
> [   51.926123] CPU: 1 PID: 539 Comm: bash Tainted: G        W
> 6.2.0-rc1-next-202212267
> [   51.927124] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> ?-20190727_073836-b4
> [   51.928334] RIP: 0010:__rq_qos_issue+0x30/0x60

This indicates that we aren't getting the destruction order right. It could
be that there are other reasons why the ordering is like this and we might
have to synchronize separately.

Sorry that I've been asking you to go round and round but block device
add/remove paths have always been really tricky and we wanna avoid adding
more complications if at all possible. Can you see why the device is being
destroyed before the queue attr is removed?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
  2023-01-06 18:23             ` Tejun Heo
@ 2023-01-09  1:38                 ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-09  1:38 UTC (permalink / raw)
  To: Tejun Heo, Yu Kuai
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yi.zhang,
	yangerkun, yukuai (C)

Hi,

在 2023/01/07 2:23, Tejun Heo 写道:
> Hello,
> 
> On Fri, Jan 06, 2023 at 09:33:26AM +0800, Yu Kuai wrote:
>>> wbt's lazy init is tied to one of the block device sysfs files, right? So,
>>> it *should* already be protected against device removal.
>>
>> That seems not true, I don't think q->sysfs_lock can protect that,
>> consider that queue_wb_lat_store() doesn't check if del_gendisk() is
>> called or not:
>>
>> t1: wbt lazy init		t2: remove device
>> queue_attr_store
>> 				del_gendisk
>> 				blk_unregister_queue
>> 				 mutex_lock(&q->sysfs_lock)
>> 			         ...
>> 				 mutex_unlock(&q->sysfs_lock);
>> 				rq_qos_exit
>>   mutex_lock(&q->sysfs_lock);
>>    queue_wb_lat_store
>>    wbt_init
>>     rq_qos_add
>>   mutex_unlock(&q->sysfs_lock);
> 
> So, it's not sysfs_lock but sysfs file deletion. When a kernfs, which backs
> sysfs, file is removed, it disables future operations and drains all
> inflight ones before returning, so you remove the interface files before
> cleaning up the object that it interacts with, you don't have to worry about
> racing against file operations as none can be in flight at that point.

Ok, thanks for explanation, I'll look into this and try to find out how
this works.

> 
>> I tried to comfirm that by adding following delay:
>>
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index 93d9e9c9a6ea..101c33cb0a2b 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -11,6 +11,7 @@
>>   #include <linux/blktrace_api.h>
>>   #include <linux/blk-mq.h>
>>   #include <linux/debugfs.h>
>> +#include <linux/delay.h>
>>
>>   #include "blk.h"
>>   #include "blk-mq.h"
>> @@ -734,6 +735,8 @@ queue_attr_store(struct kobject *kobj, struct attribute
>> *attr,
>>          if (!entry->store)
>>                  return -EIO;
>>
>> +       msleep(10000);
>> +
>>          mutex_lock(&q->sysfs_lock);
>>          res = entry->store(q, page, length);
>>          mutex_unlock(&q->sysfs_lock);
>>
>> And then do the following test:
>>
>> 1) echo 10000 > /sys/block/sdb/queue/wbt_lat_usec &
>> 2) echo 1 > /sys/block/sda/device/delete
>>
>> Then, following bug is triggered:
>>
>> [   51.923642] BUG: unable to handle page fault for address:
>> ffffffffffffffed
>> [   51.924294] #PF: supervisor read access in kernel mode
>> [   51.924773] #PF: error_code(0x0000) - not-present page
>> [   51.925252] PGD 1820b067 P4D 1820b067 PUD 1820d067 PMD 0
>> [   51.925754] Oops: 0000 [#1] PREEMPT SMP
>> [   51.926123] CPU: 1 PID: 539 Comm: bash Tainted: G        W
>> 6.2.0-rc1-next-202212267
>> [   51.927124] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
>> ?-20190727_073836-b4
>> [   51.928334] RIP: 0010:__rq_qos_issue+0x30/0x60
> 
> This indicates that we aren't getting the destruction order right. It could
> be that there are other reasons why the ordering is like this and we might
> have to synchronize separately.
> 
> Sorry that I've been asking you to go round and round but block device
> add/remove paths have always been really tricky and we wanna avoid adding
> more complications if at all possible. Can you see why the device is being
> destroyed before the queue attr is removed?

Of course, I'll glad to help, I'll let you know if I have any progress.

Thanks,
Kuai


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
@ 2023-01-09  1:38                 ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-09  1:38 UTC (permalink / raw)
  To: Tejun Heo, Yu Kuai
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yi.zhang,
	yangerkun, yukuai (C)

Hi,

ÔÚ 2023/01/07 2:23, Tejun Heo дµÀ:
> Hello,
> 
> On Fri, Jan 06, 2023 at 09:33:26AM +0800, Yu Kuai wrote:
>>> wbt's lazy init is tied to one of the block device sysfs files, right? So,
>>> it *should* already be protected against device removal.
>>
>> That seems not true, I don't think q->sysfs_lock can protect that,
>> consider that queue_wb_lat_store() doesn't check if del_gendisk() is
>> called or not:
>>
>> t1: wbt lazy init		t2: remove device
>> queue_attr_store
>> 				del_gendisk
>> 				blk_unregister_queue
>> 				 mutex_lock(&q->sysfs_lock)
>> 			         ...
>> 				 mutex_unlock(&q->sysfs_lock);
>> 				rq_qos_exit
>>   mutex_lock(&q->sysfs_lock);
>>    queue_wb_lat_store
>>    wbt_init
>>     rq_qos_add
>>   mutex_unlock(&q->sysfs_lock);
> 
> So, it's not sysfs_lock but sysfs file deletion. When a kernfs, which backs
> sysfs, file is removed, it disables future operations and drains all
> inflight ones before returning, so you remove the interface files before
> cleaning up the object that it interacts with, you don't have to worry about
> racing against file operations as none can be in flight at that point.

Ok, thanks for explanation, I'll look into this and try to find out how
this works.

> 
>> I tried to comfirm that by adding following delay:
>>
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index 93d9e9c9a6ea..101c33cb0a2b 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -11,6 +11,7 @@
>>   #include <linux/blktrace_api.h>
>>   #include <linux/blk-mq.h>
>>   #include <linux/debugfs.h>
>> +#include <linux/delay.h>
>>
>>   #include "blk.h"
>>   #include "blk-mq.h"
>> @@ -734,6 +735,8 @@ queue_attr_store(struct kobject *kobj, struct attribute
>> *attr,
>>          if (!entry->store)
>>                  return -EIO;
>>
>> +       msleep(10000);
>> +
>>          mutex_lock(&q->sysfs_lock);
>>          res = entry->store(q, page, length);
>>          mutex_unlock(&q->sysfs_lock);
>>
>> And then do the following test:
>>
>> 1) echo 10000 > /sys/block/sdb/queue/wbt_lat_usec &
>> 2) echo 1 > /sys/block/sda/device/delete
>>
>> Then, following bug is triggered:
>>
>> [   51.923642] BUG: unable to handle page fault for address:
>> ffffffffffffffed
>> [   51.924294] #PF: supervisor read access in kernel mode
>> [   51.924773] #PF: error_code(0x0000) - not-present page
>> [   51.925252] PGD 1820b067 P4D 1820b067 PUD 1820d067 PMD 0
>> [   51.925754] Oops: 0000 [#1] PREEMPT SMP
>> [   51.926123] CPU: 1 PID: 539 Comm: bash Tainted: G        W
>> 6.2.0-rc1-next-202212267
>> [   51.927124] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
>> ?-20190727_073836-b4
>> [   51.928334] RIP: 0010:__rq_qos_issue+0x30/0x60
> 
> This indicates that we aren't getting the destruction order right. It could
> be that there are other reasons why the ordering is like this and we might
> have to synchronize separately.
> 
> Sorry that I've been asking you to go round and round but block device
> add/remove paths have always been really tricky and we wanna avoid adding
> more complications if at all possible. Can you see why the device is being
> destroyed before the queue attr is removed?

Of course, I'll glad to help, I'll let you know if I have any progress.

Thanks,
Kuai


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
@ 2023-01-09  6:37                   ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-09  6:37 UTC (permalink / raw)
  To: Yu Kuai, Tejun Heo
  Cc: hch, josef, axboe, cgroups, linux-block, linux-kernel, yi.zhang,
	yangerkun, yukuai (C)

Hi,

在 2023/01/09 9:38, Yu Kuai 写道:
>> So, it's not sysfs_lock but sysfs file deletion. When a kernfs, which 
>> backs
>> sysfs, file is removed, it disables future operations and drains all
>> inflight ones before returning, so you remove the interface files before
>> cleaning up the object that it interacts with, you don't have to worry 
>> about
>> racing against file operations as none can be in flight at that point.
> 

I understand this know, kernfs_fop_write_iter() will grab
kernfs_node->active, and kobject_del() will wait for active to be
dropped in kernfs_drain().

>> Sorry that I've been asking you to go round and round but block device
>> add/remove paths have always been really tricky and we wanna avoid adding
>> more complications if at all possible. Can you see why the device is 
>> being
>> destroyed before the queue attr is removed?
> 

Sorry that I actually tested with patch 4 applied, and this is a bug
introduced by patch 4, my apologies. It set rqos to ERR_PTR() in
rq_qos_exit, and follow up rq_qos_issue() just check if rqos is NULL.

I'll wait for your patchset to be apllied, and then send a new version.
Just one thing to confirm, do you think it's better to use a global
mutex rather than a disk level mutex? I'm not sure because this will
cause different cgroup configurations from different disk can't
concurrent.

Thanks,
Kuai


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis
@ 2023-01-09  6:37                   ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-01-09  6:37 UTC (permalink / raw)
  To: Yu Kuai, Tejun Heo
  Cc: hch-wEGCiKHe2LqWVfeAwA7xHQ, josef-DigfWCa+lFGyeJad7bwFQA,
	axboe-tSWWG44O7X1aa/9Udqfwiw, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	yi.zhang-hv44wF8Li93QT0dZR+AlfA,
	yangerkun-hv44wF8Li93QT0dZR+AlfA, yukuai (C)

Hi,

ÔÚ 2023/01/09 9:38, Yu Kuai дµÀ:
>> So, it's not sysfs_lock but sysfs file deletion. When a kernfs, which 
>> backs
>> sysfs, file is removed, it disables future operations and drains all
>> inflight ones before returning, so you remove the interface files before
>> cleaning up the object that it interacts with, you don't have to worry 
>> about
>> racing against file operations as none can be in flight at that point.
> 

I understand this know, kernfs_fop_write_iter() will grab
kernfs_node->active, and kobject_del() will wait for active to be
dropped in kernfs_drain().

>> Sorry that I've been asking you to go round and round but block device
>> add/remove paths have always been really tricky and we wanna avoid adding
>> more complications if at all possible. Can you see why the device is 
>> being
>> destroyed before the queue attr is removed?
> 

Sorry that I actually tested with patch 4 applied, and this is a bug
introduced by patch 4, my apologies. It set rqos to ERR_PTR() in
rq_qos_exit, and follow up rq_qos_issue() just check if rqos is NULL.

I'll wait for your patchset to be apllied, and then send a new version.
Just one thing to confirm, do you think it's better to use a global
mutex rather than a disk level mutex? I'm not sure because this will
cause different cgroup configurations from different disk can't
concurrent.

Thanks,
Kuai


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2023-01-09  6:37 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-04  8:53 [PATCH -next 0/4] block/rq_qos: protect rq_qos apis with global mutex Yu Kuai
2023-01-04  8:53 ` [PATCH -next 1/4] block/rq_qos: move implementions of init/exit rq-qos apis to blk-rq-qos.c Yu Kuai
2023-01-04 21:30   ` Tejun Heo
2023-01-04 21:30     ` Tejun Heo
2023-01-04  8:53 ` [PATCH -next 2/4] block/rq_qos: factor out a helper to add rq_qos and activate policy Yu Kuai
2023-01-04  8:53   ` Yu Kuai
2023-01-04  8:53 ` [PATCH -next 3/4] block/rq_qos: use a global mutex to protect rq_qos apis Yu Kuai
2023-01-04 21:39   ` Tejun Heo
2023-01-05  0:28     ` Tejun Heo
2023-01-05  1:35       ` Yu Kuai
2023-01-05  1:35         ` Yu Kuai
2023-01-05 18:34         ` Tejun Heo
2023-01-06  1:33           ` Yu Kuai
2023-01-06  1:33             ` Yu Kuai
2023-01-06 18:23             ` Tejun Heo
2023-01-09  1:38               ` Yu Kuai
2023-01-09  1:38                 ` Yu Kuai
2023-01-09  6:37                 ` Yu Kuai
2023-01-09  6:37                   ` Yu Kuai
2023-01-04  8:53 ` [PATCH -next 4/4] block/rq_qos: fail rq_qos_add() after rq_qos_exit() Yu Kuai
2023-01-04  8:53   ` Yu Kuai

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.