* [PATCH] blk-cgroup: prevent rcu_sched detected stalls warnings while iterating blkgs
@ 2021-07-02 4:04 Yu Kuai
2021-07-06 17:56 ` Tejun Heo
0 siblings, 1 reply; 2+ messages in thread
From: Yu Kuai @ 2021-07-02 4:04 UTC (permalink / raw)
To: tj, axboe; +Cc: cgroups, linux-block, linux-kernel, yukuai3, yi.zhang
We run a test that create millions of cgroups and blkgs, and then trigger
blkg_destroy_all(). blkg_destroy_all() will hold spin lock for a long
time in such situation. Thus release the lock when a batch of blkgs are
destroyed.
blkcg_activate_policy() and blkcg_deactivate_policy() might have the
same problem, fix them the same way.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
block/blk-cgroup.c | 56 +++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 50 insertions(+), 6 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7b06a5fa3cac..1d26b58d7c25 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -56,6 +56,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
bool blkcg_debug_stats = false;
static struct workqueue_struct *blkcg_punt_bio_wq;
+#define BLKG_BATCH_OP_NUM 64
+
static bool blkcg_policy_enabled(struct request_queue *q,
const struct blkcg_policy *pol)
{
@@ -422,7 +424,9 @@ static void blkg_destroy(struct blkcg_gq *blkg)
static void blkg_destroy_all(struct request_queue *q)
{
struct blkcg_gq *blkg, *n;
+ int count = BLKG_BATCH_OP_NUM;
+restart:
spin_lock_irq(&q->queue_lock);
list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
@@ -430,6 +434,17 @@ static void blkg_destroy_all(struct request_queue *q)
spin_lock(&blkcg->lock);
blkg_destroy(blkg);
spin_unlock(&blkcg->lock);
+
+ /*
+ * in order to avoid holding the spin lock for too long, release
+ * it when a batch of blkgs are destroyed.
+ */
+ if (!(--count)) {
+ count = BLKG_BATCH_OP_NUM;
+ spin_unlock_irq(&q->queue_lock);
+ cond_resched();
+ goto restart;
+ }
}
q->root_blkg = NULL;
@@ -1291,6 +1306,7 @@ int blkcg_activate_policy(struct request_queue *q,
{
struct blkg_policy_data *pd_prealloc = NULL;
struct blkcg_gq *blkg, *pinned_blkg = NULL;
+ int count;
int ret;
if (blkcg_policy_enabled(q, pol))
@@ -1299,6 +1315,7 @@ int blkcg_activate_policy(struct request_queue *q,
if (queue_is_mq(q))
blk_mq_freeze_queue(q);
retry:
+ count = BLKG_BATCH_OP_NUM;
spin_lock_irq(&q->queue_lock);
/* blkg_list is pushed at the head, reverse walk to allocate parents first */
@@ -1342,6 +1359,16 @@ int blkcg_activate_policy(struct request_queue *q,
blkg->pd[pol->plid] = pd;
pd->blkg = blkg;
pd->plid = pol->plid;
+
+ /*
+ * in order to avoid holding the spin lock for too long, release
+ * it when a batch of blkgs are activated.
+ */
+ if (!(--count)) {
+ spin_unlock_irq(&q->queue_lock);
+ cond_resched();
+ goto retry;
+ }
}
/* all allocated, init in the same order */
@@ -1389,6 +1416,7 @@ void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol)
{
struct blkcg_gq *blkg;
+ int count;
if (!blkcg_policy_enabled(q, pol))
return;
@@ -1397,15 +1425,31 @@ void blkcg_deactivate_policy(struct request_queue *q,
blk_mq_freeze_queue(q);
spin_lock_irq(&q->queue_lock);
-
__clear_bit(pol->plid, q->blkcg_pols);
+retry:
+ count = BLKG_BATCH_OP_NUM;
list_for_each_entry(blkg, &q->blkg_list, q_node) {
- if (blkg->pd[pol->plid]) {
- if (pol->pd_offline_fn)
- pol->pd_offline_fn(blkg->pd[pol->plid]);
- pol->pd_free_fn(blkg->pd[pol->plid]);
- blkg->pd[pol->plid] = NULL;
+ if (!blkg->pd[pol->plid])
+ continue;
+
+ if (pol->pd_offline_fn) {
+ pol->pd_offline_fn(blkg->pd[pol->plid]);
+ count--;
+ }
+
+ pol->pd_free_fn(blkg->pd[pol->plid]);
+ blkg->pd[pol->plid] = NULL;
+
+ /*
+ * in order to avoid holding the spin lock for too long, release
+ * it when a batch of blkgs are deactivated.
+ */
+ if (!count) {
+ spin_unlock_irq(&q->queue_lock);
+ cond_resched();
+ spin_lock_irq(&q->queue_lock);
+ goto retry;
}
}
--
2.31.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] blk-cgroup: prevent rcu_sched detected stalls warnings while iterating blkgs
2021-07-02 4:04 [PATCH] blk-cgroup: prevent rcu_sched detected stalls warnings while iterating blkgs Yu Kuai
@ 2021-07-06 17:56 ` Tejun Heo
0 siblings, 0 replies; 2+ messages in thread
From: Tejun Heo @ 2021-07-06 17:56 UTC (permalink / raw)
To: Yu Kuai; +Cc: axboe, cgroups, linux-block, linux-kernel, yi.zhang
Hello, Yu.
On Fri, Jul 02, 2021 at 12:04:44PM +0800, Yu Kuai wrote:
> blkcg_activate_policy() and blkcg_deactivate_policy() might have the
> same problem, fix them the same way.
Given that these are basically only called from module init/exit paths,
let's leave them alone for now.
> +#define BLKG_BATCH_OP_NUM 64
Can we do BLKG_DESTRY_BATCH_SIZE instead?
> static void blkg_destroy_all(struct request_queue *q)
> {
> struct blkcg_gq *blkg, *n;
> + int count = BLKG_BATCH_OP_NUM;
>
> +restart:
> spin_lock_irq(&q->queue_lock);
> list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
> struct blkcg *blkcg = blkg->blkcg;
> @@ -430,6 +434,17 @@ static void blkg_destroy_all(struct request_queue *q)
> spin_lock(&blkcg->lock);
> blkg_destroy(blkg);
> spin_unlock(&blkcg->lock);
> +
> + /*
> + * in order to avoid holding the spin lock for too long, release
> + * it when a batch of blkgs are destroyed.
> + */
> + if (!(--count)) {
> + count = BLKG_BATCH_OP_NUM;
> + spin_unlock_irq(&q->queue_lock);
> + cond_resched();
> + goto restart;
> + }
> }
This part looks good otherwise.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-07-06 17:56 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-02 4:04 [PATCH] blk-cgroup: prevent rcu_sched detected stalls warnings while iterating blkgs Yu Kuai
2021-07-06 17:56 ` Tejun Heo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).