All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] block: don't allow the same type rq_qos add more than once
@ 2022-07-18  8:36 ` Jinke Han
  0 siblings, 0 replies; 6+ messages in thread
From: Jinke Han @ 2022-07-18  8:36 UTC (permalink / raw)
  To: axboe, tj; +Cc: linux-block, linux-kernel, cgroups, hanjinke

From: hanjinke <hanjinke.666@bytedance.com>

In our test of iocost, we encounttered some list add/del corrutions of
inner_walk list in ioc_timer_fn.
The resean can be descripted as follow:

cpu 0						cpu 1
ioc_qos_write					ioc_qos_write

ioc = q_to_ioc(bdev_get_queue(bdev));
if (!ioc) {
	ioc = kzalloc();			ioc = q_to_ioc(bdev_get_queue(bdev));
							if (!ioc) {
								ioc = kzalloc();
								...
								rq_qos_add(q, rqos);
							 }
	...
	rq_qos_add(q, rqos);
	...
}

When the io.cost.qos file is written by two cpu concurrently, rq_qos may
be added to one disk twice. In that case, there will be two iocs enabled
and running on one disk. They own different iocgs on their active list.
In the ioc_timer_fn function, because of the iocgs from two ioc have the
same root iocg, the root iocg's walk_list may be overwritten by each
other and this lead to list add/del corrutions in building or destorying
the inner_walk list.

And so far, the blk-rq-qos framework works in case that one instance for
one type rq_qos per queue by default. This patch make this explicit and
also fix the crash above.

Signed-off-by: hanjinke <hanjinke.666@bytedance.com>
---
 block/blk-iocost.c    |  7 ++++++-
 block/blk-iolatency.c |  6 +++++-
 block/blk-ioprio.c    |  5 +++++
 block/blk-rq-qos.h    | 11 ++++++++++-
 block/blk-wbt.c       |  8 +++++++-
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 33a11ba971ea..2c41edc3fd70 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2886,7 +2886,12 @@ static int blk_iocost_init(struct request_queue *q)
 	 * called before policy activation completion, can't assume that the
 	 * target bio has an iocg associated and need to test for NULL iocg.
 	 */
-	rq_qos_add(q, rqos);
+	ret = rq_qos_add(q, rqos);
+	if (ret) {
+		free_percpu(ioc->pcpu_stat);
+		kfree(ioc);
+		return ret;
+	}
 	ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
 	if (ret) {
 		rq_qos_del(q, rqos);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 9568bf8dfe82..9a572439f326 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -773,7 +773,11 @@ int blk_iolatency_init(struct request_queue *q)
 	rqos->ops = &blkcg_iolatency_ops;
 	rqos->q = q;
 
-	rq_qos_add(q, rqos);
+	ret = rq_qos_add(q, rqos);
+	if (ret) {
+		kfree(blkiolat);
+		return ret;
+	}
 
 	ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
 	if (ret) {
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 79e797f5d194..931bffdf0cab 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -251,6 +251,11 @@ int blk_ioprio_init(struct request_queue *q)
 	 * rq-qos callbacks.
 	 */
 	rq_qos_add(q, rqos);
+	if (ret) {
+		blkcg_deactivate_policy(q, &ioprio_policy);
+		kfree(blkioprio_blkg);
+		return ret;
+	}
 
 	return 0;
 }
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 0e46052b018a..bbc70cf21c9c 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
 	init_waitqueue_head(&rq_wait->wait);
 }
 
-static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 {
 	/*
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
@@ -98,6 +98,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 	blk_mq_freeze_queue(q);
 
 	spin_lock_irq(&q->queue_lock);
+	if (rq_qos_id(q, rqos->id))
+		goto out;
 	rqos->next = q->rq_qos;
 	q->rq_qos = rqos;
 	spin_unlock_irq(&q->queue_lock);
@@ -109,6 +111,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 		blk_mq_debugfs_register_rqos(rqos);
 		mutex_unlock(&q->debugfs_mutex);
 	}
+
+	return 0;
+out:
+	spin_unlock_irq(&q->queue_lock);
+	blk_mq_unfreeze_queue(q);
+	return -EBUSY;
+
 }
 
 static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 0c119be0e813..cc8f45929b31 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -820,6 +820,7 @@ int wbt_init(struct request_queue *q)
 {
 	struct rq_wb *rwb;
 	int i;
+	int ret;
 
 	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
 	if (!rwb)
@@ -846,7 +847,12 @@ int wbt_init(struct request_queue *q)
 	/*
 	 * Assign rwb and add the stats callback.
 	 */
-	rq_qos_add(q, &rwb->rqos);
+	ret = rq_qos_add(q, &rwb->rqos);
+	if (ret) {
+		blk_stat_free_callback(rwb->cb);
+		kfree(rwb);
+		return ret;
+	}
 	blk_stat_add_callback(q, rwb->cb);
 
 	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] block: don't allow the same type rq_qos add more than once
@ 2022-07-18  8:36 ` Jinke Han
  0 siblings, 0 replies; 6+ messages in thread
From: Jinke Han @ 2022-07-18  8:36 UTC (permalink / raw)
  To: axboe-tSWWG44O7X1aa/9Udqfwiw, tj-DgEjT+Ai2ygdnm+yROfE0A
  Cc: linux-block-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA, hanjinke

From: hanjinke <hanjinke.666-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>

In our test of iocost, we encounttered some list add/del corrutions of
inner_walk list in ioc_timer_fn.
The resean can be descripted as follow:

cpu 0						cpu 1
ioc_qos_write					ioc_qos_write

ioc = q_to_ioc(bdev_get_queue(bdev));
if (!ioc) {
	ioc = kzalloc();			ioc = q_to_ioc(bdev_get_queue(bdev));
							if (!ioc) {
								ioc = kzalloc();
								...
								rq_qos_add(q, rqos);
							 }
	...
	rq_qos_add(q, rqos);
	...
}

When the io.cost.qos file is written by two cpu concurrently, rq_qos may
be added to one disk twice. In that case, there will be two iocs enabled
and running on one disk. They own different iocgs on their active list.
In the ioc_timer_fn function, because of the iocgs from two ioc have the
same root iocg, the root iocg's walk_list may be overwritten by each
other and this lead to list add/del corrutions in building or destorying
the inner_walk list.

And so far, the blk-rq-qos framework works in case that one instance for
one type rq_qos per queue by default. This patch make this explicit and
also fix the crash above.

Signed-off-by: hanjinke <hanjinke.666-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
---
 block/blk-iocost.c    |  7 ++++++-
 block/blk-iolatency.c |  6 +++++-
 block/blk-ioprio.c    |  5 +++++
 block/blk-rq-qos.h    | 11 ++++++++++-
 block/blk-wbt.c       |  8 +++++++-
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 33a11ba971ea..2c41edc3fd70 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2886,7 +2886,12 @@ static int blk_iocost_init(struct request_queue *q)
 	 * called before policy activation completion, can't assume that the
 	 * target bio has an iocg associated and need to test for NULL iocg.
 	 */
-	rq_qos_add(q, rqos);
+	ret = rq_qos_add(q, rqos);
+	if (ret) {
+		free_percpu(ioc->pcpu_stat);
+		kfree(ioc);
+		return ret;
+	}
 	ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
 	if (ret) {
 		rq_qos_del(q, rqos);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 9568bf8dfe82..9a572439f326 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -773,7 +773,11 @@ int blk_iolatency_init(struct request_queue *q)
 	rqos->ops = &blkcg_iolatency_ops;
 	rqos->q = q;
 
-	rq_qos_add(q, rqos);
+	ret = rq_qos_add(q, rqos);
+	if (ret) {
+		kfree(blkiolat);
+		return ret;
+	}
 
 	ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
 	if (ret) {
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 79e797f5d194..931bffdf0cab 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -251,6 +251,11 @@ int blk_ioprio_init(struct request_queue *q)
 	 * rq-qos callbacks.
 	 */
 	rq_qos_add(q, rqos);
+	if (ret) {
+		blkcg_deactivate_policy(q, &ioprio_policy);
+		kfree(blkioprio_blkg);
+		return ret;
+	}
 
 	return 0;
 }
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 0e46052b018a..bbc70cf21c9c 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
 	init_waitqueue_head(&rq_wait->wait);
 }
 
-static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 {
 	/*
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
@@ -98,6 +98,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 	blk_mq_freeze_queue(q);
 
 	spin_lock_irq(&q->queue_lock);
+	if (rq_qos_id(q, rqos->id))
+		goto out;
 	rqos->next = q->rq_qos;
 	q->rq_qos = rqos;
 	spin_unlock_irq(&q->queue_lock);
@@ -109,6 +111,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 		blk_mq_debugfs_register_rqos(rqos);
 		mutex_unlock(&q->debugfs_mutex);
 	}
+
+	return 0;
+out:
+	spin_unlock_irq(&q->queue_lock);
+	blk_mq_unfreeze_queue(q);
+	return -EBUSY;
+
 }
 
 static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 0c119be0e813..cc8f45929b31 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -820,6 +820,7 @@ int wbt_init(struct request_queue *q)
 {
 	struct rq_wb *rwb;
 	int i;
+	int ret;
 
 	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
 	if (!rwb)
@@ -846,7 +847,12 @@ int wbt_init(struct request_queue *q)
 	/*
 	 * Assign rwb and add the stats callback.
 	 */
-	rq_qos_add(q, &rwb->rqos);
+	ret = rq_qos_add(q, &rwb->rqos);
+	if (ret) {
+		blk_stat_free_callback(rwb->cb);
+		kfree(rwb);
+		return ret;
+	}
 	blk_stat_add_callback(q, rwb->cb);
 
 	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] block: don't allow the same type rq_qos add more than once
  2022-07-18  8:36 ` Jinke Han
  (?)
@ 2022-07-18 19:22 ` Tejun Heo
  -1 siblings, 0 replies; 6+ messages in thread
From: Tejun Heo @ 2022-07-18 19:22 UTC (permalink / raw)
  To: Jinke Han; +Cc: axboe, linux-block, linux-kernel, cgroups

Hello,

On Mon, Jul 18, 2022 at 04:36:46PM +0800, Jinke Han wrote:
> When the io.cost.qos file is written by two cpu concurrently, rq_qos may
> be added to one disk twice. In that case, there will be two iocs enabled
> and running on one disk. They own different iocgs on their active list.
> In the ioc_timer_fn function, because of the iocgs from two ioc have the
> same root iocg, the root iocg's walk_list may be overwritten by each
> other and this lead to list add/del corrutions in building or destorying
> the inner_walk list.
> 
> And so far, the blk-rq-qos framework works in case that one instance for
> one type rq_qos per queue by default. This patch make this explicit and
> also fix the crash above.

Ah, good catch. Looks great. Just a few nits below.

> Signed-off-by: hanjinke <hanjinke.666@bytedance.com>

Can you please use your full name in FIRST LAST form on the SOB line?

> --- a/block/blk-iocost.c
> +++ b/block/blk-iocost.c
> @@ -2886,7 +2886,12 @@ static int blk_iocost_init(struct request_queue *q)
>  	 * called before policy activation completion, can't assume that the
>  	 * target bio has an iocg associated and need to test for NULL iocg.
>  	 */
> -	rq_qos_add(q, rqos);
> +	ret = rq_qos_add(q, rqos);
> +	if (ret) {
> +		free_percpu(ioc->pcpu_stat);
> +		kfree(ioc);
> +		return ret;

Given that these get repeated for policy activation failure, it'd prolly be
better to factor them out at the end and use gotos and make all of the users
use the same pattern.

> +static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
>  {
>  	/*
>  	 * No IO can be in-flight when adding rqos, so freeze queue, which
> @@ -98,6 +98,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
>  	blk_mq_freeze_queue(q);
>  
>  	spin_lock_irq(&q->queue_lock);
> +	if (rq_qos_id(q, rqos->id))
> +		goto out;

Maybe rename the goto label to ebusy so that it's `goto ebusy`?

Other than the nits, please feel free to add

Acked-by: Tejun Heo <tj@kernel.org>

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] block: don't allow the same type rq_qos add more than once
  2022-07-18  8:36 ` Jinke Han
  (?)
  (?)
@ 2022-07-19  6:53 ` Muchun Song
  2022-07-19  7:50     ` hanjinke
  -1 siblings, 1 reply; 6+ messages in thread
From: Muchun Song @ 2022-07-19  6:53 UTC (permalink / raw)
  To: Jinke Han; +Cc: Jens Axboe, Tejun Heo, open list:BLOCK LAYER, LKML, Cgroups

On Mon, Jul 18, 2022 at 4:37 PM Jinke Han <hanjinke.666@bytedance.com> wrote:
>
> From: hanjinke <hanjinke.666@bytedance.com>
>
> In our test of iocost, we encounttered some list add/del corrutions of
> inner_walk list in ioc_timer_fn.
> The resean can be descripted as follow:
>
> cpu 0                                           cpu 1
> ioc_qos_write                                   ioc_qos_write
>
> ioc = q_to_ioc(bdev_get_queue(bdev));
> if (!ioc) {
>         ioc = kzalloc();                        ioc = q_to_ioc(bdev_get_queue(bdev));
>                                                         if (!ioc) {
>                                                                 ioc = kzalloc();
>                                                                 ...
>                                                                 rq_qos_add(q, rqos);
>                                                          }
>         ...
>         rq_qos_add(q, rqos);
>         ...
> }
>
> When the io.cost.qos file is written by two cpu concurrently, rq_qos may
> be added to one disk twice. In that case, there will be two iocs enabled
> and running on one disk. They own different iocgs on their active list.
> In the ioc_timer_fn function, because of the iocgs from two ioc have the
> same root iocg, the root iocg's walk_list may be overwritten by each
> other and this lead to list add/del corrutions in building or destorying
> the inner_walk list.
>
> And so far, the blk-rq-qos framework works in case that one instance for
> one type rq_qos per queue by default. This patch make this explicit and
> also fix the crash above.
>
> Signed-off-by: hanjinke <hanjinke.666@bytedance.com>

The change LGTM. Maybe it is better to add a Fixes tag here so that
others can easily know what Linux versions should be backported.

Reviewed-by: Muchun Song <songmuchun@bytedance.com>

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] block: don't allow the same type rq_qos add more than once
@ 2022-07-19  7:50     ` hanjinke
  0 siblings, 0 replies; 6+ messages in thread
From: hanjinke @ 2022-07-19  7:50 UTC (permalink / raw)
  To: Muchun Song; +Cc: Jens Axboe, Tejun Heo, open list:BLOCK LAYER, LKML, Cgroups

Thanks for your review. I have saw this review just after the send of 
patch v2

在 2022/7/19 下午2:53, Muchun Song 写道:
> On Mon, Jul 18, 2022 at 4:37 PM Jinke Han <hanjinke.666@bytedance.com> wrote:
>>
>> From: hanjinke <hanjinke.666@bytedance.com>
>>
>> In our test of iocost, we encounttered some list add/del corrutions of
>> inner_walk list in ioc_timer_fn.
>> The resean can be descripted as follow:
>>
>> cpu 0                                           cpu 1
>> ioc_qos_write                                   ioc_qos_write
>>
>> ioc = q_to_ioc(bdev_get_queue(bdev));
>> if (!ioc) {
>>          ioc = kzalloc();                        ioc = q_to_ioc(bdev_get_queue(bdev));
>>                                                          if (!ioc) {
>>                                                                  ioc = kzalloc();
>>                                                                  ...
>>                                                                  rq_qos_add(q, rqos);
>>                                                           }
>>          ...
>>          rq_qos_add(q, rqos);
>>          ...
>> }
>>
>> When the io.cost.qos file is written by two cpu concurrently, rq_qos may
>> be added to one disk twice. In that case, there will be two iocs enabled
>> and running on one disk. They own different iocgs on their active list.
>> In the ioc_timer_fn function, because of the iocgs from two ioc have the
>> same root iocg, the root iocg's walk_list may be overwritten by each
>> other and this lead to list add/del corrutions in building or destorying
>> the inner_walk list.
>>
>> And so far, the blk-rq-qos framework works in case that one instance for
>> one type rq_qos per queue by default. This patch make this explicit and
>> also fix the crash above.
>>
>> Signed-off-by: hanjinke <hanjinke.666@bytedance.com>
> 
> The change LGTM. Maybe it is better to add a Fixes tag here so that
> others can easily know what Linux versions should be backported.
> 
> Reviewed-by: Muchun Song <songmuchun@bytedance.com>
> 
> Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] block: don't allow the same type rq_qos add more than once
@ 2022-07-19  7:50     ` hanjinke
  0 siblings, 0 replies; 6+ messages in thread
From: hanjinke @ 2022-07-19  7:50 UTC (permalink / raw)
  To: Muchun Song; +Cc: Jens Axboe, Tejun Heo, open list:BLOCK LAYER, LKML, Cgroups

Thanks for your review. I have saw this review just after the send of 
patch v2

在 2022/7/19 下午2:53, Muchun Song 写道:
> On Mon, Jul 18, 2022 at 4:37 PM Jinke Han <hanjinke.666-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org> wrote:
>>
>> From: hanjinke <hanjinke.666-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
>>
>> In our test of iocost, we encounttered some list add/del corrutions of
>> inner_walk list in ioc_timer_fn.
>> The resean can be descripted as follow:
>>
>> cpu 0                                           cpu 1
>> ioc_qos_write                                   ioc_qos_write
>>
>> ioc = q_to_ioc(bdev_get_queue(bdev));
>> if (!ioc) {
>>          ioc = kzalloc();                        ioc = q_to_ioc(bdev_get_queue(bdev));
>>                                                          if (!ioc) {
>>                                                                  ioc = kzalloc();
>>                                                                  ...
>>                                                                  rq_qos_add(q, rqos);
>>                                                           }
>>          ...
>>          rq_qos_add(q, rqos);
>>          ...
>> }
>>
>> When the io.cost.qos file is written by two cpu concurrently, rq_qos may
>> be added to one disk twice. In that case, there will be two iocs enabled
>> and running on one disk. They own different iocgs on their active list.
>> In the ioc_timer_fn function, because of the iocgs from two ioc have the
>> same root iocg, the root iocg's walk_list may be overwritten by each
>> other and this lead to list add/del corrutions in building or destorying
>> the inner_walk list.
>>
>> And so far, the blk-rq-qos framework works in case that one instance for
>> one type rq_qos per queue by default. This patch make this explicit and
>> also fix the crash above.
>>
>> Signed-off-by: hanjinke <hanjinke.666-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
> 
> The change LGTM. Maybe it is better to add a Fixes tag here so that
> others can easily know what Linux versions should be backported.
> 
> Reviewed-by: Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
> 
> Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-07-19  7:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-18  8:36 [PATCH] block: don't allow the same type rq_qos add more than once Jinke Han
2022-07-18  8:36 ` Jinke Han
2022-07-18 19:22 ` Tejun Heo
2022-07-19  6:53 ` Muchun Song
2022-07-19  7:50   ` hanjinke
2022-07-19  7:50     ` hanjinke

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.