linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list
@ 2022-04-27 16:05 Chengming Zhou
  2022-05-11 12:05 ` Chengming Zhou
  2022-05-23 16:23 ` Vincent Guittot
  0 siblings, 2 replies; 6+ messages in thread
From: Chengming Zhou @ 2022-04-27 16:05 UTC (permalink / raw)
  To: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, bristot
  Cc: linux-kernel, duanxiongchun, songmuchun, zhengqi.arch, Chengming Zhou

We notice the rq leaf_cfs_rq_list has two problems when do bugfix
backports and some test profiling.

1. cfs_rqs under throttled subtree could be added to the list, and
   make their fully decayed ancestors on the list, even though not needed.

2. #1 also make the leaf_cfs_rq_list management complex and error prone,
   this is the list of related bugfix so far:

   commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
   commit fe61468b2cbc ("sched/fair: Fix enqueue_task_fair warning")
   commit b34cb07dde7c ("sched/fair: Fix enqueue_task_fair() warning some more")
   commit 39f23ce07b93 ("sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list")
   commit 0258bdfaff5b ("sched/fair: Fix unfairness caused by missing load decay")
   commit a7b359fc6a37 ("sched/fair: Correctly insert cfs_rq's to list on unthrottle")
   commit fdaba61ef8a2 ("sched/fair: Ensure that the CFS parent is added after unthrottling")
   commit 2630cde26711 ("sched/fair: Add ancestors of unthrottled undecayed cfs_rq")

commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
delete every cfs_rq under throttled subtree from rq->leaf_cfs_rq_list,
and delete the throttled_hierarchy() test in update_blocked_averages(),
which optimized update_blocked_averages().

But those later bugfix add cfs_rqs under throttled subtree back to
rq->leaf_cfs_rq_list again, with their fully decayed ancestors, for
the integrity of rq->leaf_cfs_rq_list.

This patch takes another method, skip all cfs_rqs under throttled
hierarchy when list_add_leaf_cfs_rq(), to completely make cfs_rqs
under throttled subtree off the leaf_cfs_rq_list.

So we don't need to consider throttled related things in
enqueue_entity(), unthrottle_cfs_rq() and enqueue_task_fair(),
which simplify the code a lot. Also optimize update_blocked_averages()
since cfs_rqs under throttled hierarchy and their ancestors
won't be on the leaf_cfs_rq_list.

Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
 kernel/sched/fair.c | 72 ++++++++++-----------------------------------
 1 file changed, 16 insertions(+), 56 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1ad18b5cc1b8..083c3d32c899 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -309,6 +309,8 @@ const struct sched_class fair_sched_class;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
+
 /* Walk up scheduling entities hierarchy */
 #define for_each_sched_entity(se) \
 		for (; se; se = se->parent)
@@ -331,7 +333,7 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 	struct rq *rq = rq_of(cfs_rq);
 	int cpu = cpu_of(rq);
 
-	if (cfs_rq->on_list)
+	if (cfs_rq->on_list || throttled_hierarchy(cfs_rq))
 		return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
 
 	cfs_rq->on_list = 1;
@@ -3242,8 +3244,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
 }
 #endif /* CONFIG_SMP */
 
-static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
-
 /*
  * Recomputes the group entity based on the current state of its group
  * runqueue.
@@ -4356,16 +4356,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
 
-	/*
-	 * When bandwidth control is enabled, cfs might have been removed
-	 * because of a parent been throttled but cfs->nr_running > 1. Try to
-	 * add it unconditionally.
-	 */
-	if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
+	if (cfs_rq->nr_running == 1) {
 		list_add_leaf_cfs_rq(cfs_rq);
-
-	if (cfs_rq->nr_running == 1)
 		check_enqueue_throttle(cfs_rq);
+	}
 }
 
 static void __clear_buddies_last(struct sched_entity *se)
@@ -4980,11 +4974,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	/* update hierarchical throttle state */
 	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
 
-	/* Nothing to run but something to decay (on_list)? Complete the branch */
 	if (!cfs_rq->load.weight) {
-		if (cfs_rq->on_list)
-			goto unthrottle_throttle;
-		return;
+		if (!cfs_rq->on_list)
+			return;
+		/*
+		 * Nothing to run but something to decay (on_list)?
+		 * Complete the branch.
+		 */
+		for_each_sched_entity(se) {
+			if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
+				break;
+		}
+		goto unthrottle_throttle;
 	}
 
 	task_delta = cfs_rq->h_nr_running;
@@ -5022,31 +5023,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(qcfs_rq))
 			goto unthrottle_throttle;
-
-		/*
-		 * One parent has been throttled and cfs_rq removed from the
-		 * list. Add it back to not break the leaf list.
-		 */
-		if (throttled_hierarchy(qcfs_rq))
-			list_add_leaf_cfs_rq(qcfs_rq);
 	}
 
 	/* At this point se is NULL and we are at root level*/
 	add_nr_running(rq, task_delta);
 
 unthrottle_throttle:
-	/*
-	 * The cfs_rq_throttled() breaks in the above iteration can result in
-	 * incomplete leaf list maintenance, resulting in triggering the
-	 * assertion below.
-	 */
-	for_each_sched_entity(se) {
-		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
-
-		if (list_add_leaf_cfs_rq(qcfs_rq))
-			break;
-	}
-
 	assert_list_leaf_cfs_rq(rq);
 
 	/* Determine whether we need to wake up potentially idle CPU: */
@@ -5701,13 +5683,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(cfs_rq))
 			goto enqueue_throttle;
-
-               /*
-                * One parent has been throttled and cfs_rq removed from the
-                * list. Add it back to not break the leaf list.
-                */
-               if (throttled_hierarchy(cfs_rq))
-                       list_add_leaf_cfs_rq(cfs_rq);
 	}
 
 	/* At this point se is NULL and we are at root level*/
@@ -5731,21 +5706,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		update_overutilized_status(rq);
 
 enqueue_throttle:
-	if (cfs_bandwidth_used()) {
-		/*
-		 * When bandwidth control is enabled; the cfs_rq_throttled()
-		 * breaks in the above iteration can result in incomplete
-		 * leaf list maintenance, resulting in triggering the assertion
-		 * below.
-		 */
-		for_each_sched_entity(se) {
-			cfs_rq = cfs_rq_of(se);
-
-			if (list_add_leaf_cfs_rq(cfs_rq))
-				break;
-		}
-	}
-
 	assert_list_leaf_cfs_rq(rq);
 
 	hrtick_update(rq);
-- 
2.35.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list
  2022-04-27 16:05 [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list Chengming Zhou
@ 2022-05-11 12:05 ` Chengming Zhou
  2022-05-12 14:02   ` Vincent Guittot
  2022-05-23 16:23 ` Vincent Guittot
  1 sibling, 1 reply; 6+ messages in thread
From: Chengming Zhou @ 2022-05-11 12:05 UTC (permalink / raw)
  To: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
	rostedt, bsegall, mgorman, bristot
  Cc: linux-kernel, duanxiongchun, songmuchun, zhengqi.arch

Hello, friendly ping...


On 2022/4/28 00:05, Chengming Zhou wrote:
> We notice the rq leaf_cfs_rq_list has two problems when do bugfix
> backports and some test profiling.
> 
> 1. cfs_rqs under throttled subtree could be added to the list, and
>    make their fully decayed ancestors on the list, even though not needed.
> 
> 2. #1 also make the leaf_cfs_rq_list management complex and error prone,
>    this is the list of related bugfix so far:
> 
>    commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
>    commit fe61468b2cbc ("sched/fair: Fix enqueue_task_fair warning")
>    commit b34cb07dde7c ("sched/fair: Fix enqueue_task_fair() warning some more")
>    commit 39f23ce07b93 ("sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list")
>    commit 0258bdfaff5b ("sched/fair: Fix unfairness caused by missing load decay")
>    commit a7b359fc6a37 ("sched/fair: Correctly insert cfs_rq's to list on unthrottle")
>    commit fdaba61ef8a2 ("sched/fair: Ensure that the CFS parent is added after unthrottling")
>    commit 2630cde26711 ("sched/fair: Add ancestors of unthrottled undecayed cfs_rq")
> 
> commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
> delete every cfs_rq under throttled subtree from rq->leaf_cfs_rq_list,
> and delete the throttled_hierarchy() test in update_blocked_averages(),
> which optimized update_blocked_averages().
> 
> But those later bugfix add cfs_rqs under throttled subtree back to
> rq->leaf_cfs_rq_list again, with their fully decayed ancestors, for
> the integrity of rq->leaf_cfs_rq_list.
> 
> This patch takes another method, skip all cfs_rqs under throttled
> hierarchy when list_add_leaf_cfs_rq(), to completely make cfs_rqs
> under throttled subtree off the leaf_cfs_rq_list.
> 
> So we don't need to consider throttled related things in
> enqueue_entity(), unthrottle_cfs_rq() and enqueue_task_fair(),
> which simplify the code a lot. Also optimize update_blocked_averages()
> since cfs_rqs under throttled hierarchy and their ancestors
> won't be on the leaf_cfs_rq_list.
> 
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
>  kernel/sched/fair.c | 72 ++++++++++-----------------------------------
>  1 file changed, 16 insertions(+), 56 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 1ad18b5cc1b8..083c3d32c899 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -309,6 +309,8 @@ const struct sched_class fair_sched_class;
>  
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>  
> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
> +
>  /* Walk up scheduling entities hierarchy */
>  #define for_each_sched_entity(se) \
>  		for (; se; se = se->parent)
> @@ -331,7 +333,7 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
>  	struct rq *rq = rq_of(cfs_rq);
>  	int cpu = cpu_of(rq);
>  
> -	if (cfs_rq->on_list)
> +	if (cfs_rq->on_list || throttled_hierarchy(cfs_rq))
>  		return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
>  
>  	cfs_rq->on_list = 1;
> @@ -3242,8 +3244,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
>  }
>  #endif /* CONFIG_SMP */
>  
> -static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
> -
>  /*
>   * Recomputes the group entity based on the current state of its group
>   * runqueue.
> @@ -4356,16 +4356,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>  		__enqueue_entity(cfs_rq, se);
>  	se->on_rq = 1;
>  
> -	/*
> -	 * When bandwidth control is enabled, cfs might have been removed
> -	 * because of a parent been throttled but cfs->nr_running > 1. Try to
> -	 * add it unconditionally.
> -	 */
> -	if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
> +	if (cfs_rq->nr_running == 1) {
>  		list_add_leaf_cfs_rq(cfs_rq);
> -
> -	if (cfs_rq->nr_running == 1)
>  		check_enqueue_throttle(cfs_rq);
> +	}
>  }
>  
>  static void __clear_buddies_last(struct sched_entity *se)
> @@ -4980,11 +4974,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>  	/* update hierarchical throttle state */
>  	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
>  
> -	/* Nothing to run but something to decay (on_list)? Complete the branch */
>  	if (!cfs_rq->load.weight) {
> -		if (cfs_rq->on_list)
> -			goto unthrottle_throttle;
> -		return;
> +		if (!cfs_rq->on_list)
> +			return;
> +		/*
> +		 * Nothing to run but something to decay (on_list)?
> +		 * Complete the branch.
> +		 */
> +		for_each_sched_entity(se) {
> +			if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
> +				break;
> +		}
> +		goto unthrottle_throttle;
>  	}
>  
>  	task_delta = cfs_rq->h_nr_running;
> @@ -5022,31 +5023,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>  		/* end evaluation on encountering a throttled cfs_rq */
>  		if (cfs_rq_throttled(qcfs_rq))
>  			goto unthrottle_throttle;
> -
> -		/*
> -		 * One parent has been throttled and cfs_rq removed from the
> -		 * list. Add it back to not break the leaf list.
> -		 */
> -		if (throttled_hierarchy(qcfs_rq))
> -			list_add_leaf_cfs_rq(qcfs_rq);
>  	}
>  
>  	/* At this point se is NULL and we are at root level*/
>  	add_nr_running(rq, task_delta);
>  
>  unthrottle_throttle:
> -	/*
> -	 * The cfs_rq_throttled() breaks in the above iteration can result in
> -	 * incomplete leaf list maintenance, resulting in triggering the
> -	 * assertion below.
> -	 */
> -	for_each_sched_entity(se) {
> -		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
> -
> -		if (list_add_leaf_cfs_rq(qcfs_rq))
> -			break;
> -	}
> -
>  	assert_list_leaf_cfs_rq(rq);
>  
>  	/* Determine whether we need to wake up potentially idle CPU: */
> @@ -5701,13 +5683,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>  		/* end evaluation on encountering a throttled cfs_rq */
>  		if (cfs_rq_throttled(cfs_rq))
>  			goto enqueue_throttle;
> -
> -               /*
> -                * One parent has been throttled and cfs_rq removed from the
> -                * list. Add it back to not break the leaf list.
> -                */
> -               if (throttled_hierarchy(cfs_rq))
> -                       list_add_leaf_cfs_rq(cfs_rq);
>  	}
>  
>  	/* At this point se is NULL and we are at root level*/
> @@ -5731,21 +5706,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>  		update_overutilized_status(rq);
>  
>  enqueue_throttle:
> -	if (cfs_bandwidth_used()) {
> -		/*
> -		 * When bandwidth control is enabled; the cfs_rq_throttled()
> -		 * breaks in the above iteration can result in incomplete
> -		 * leaf list maintenance, resulting in triggering the assertion
> -		 * below.
> -		 */
> -		for_each_sched_entity(se) {
> -			cfs_rq = cfs_rq_of(se);
> -
> -			if (list_add_leaf_cfs_rq(cfs_rq))
> -				break;
> -		}
> -	}
> -
>  	assert_list_leaf_cfs_rq(rq);
>  
>  	hrtick_update(rq);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list
  2022-05-11 12:05 ` Chengming Zhou
@ 2022-05-12 14:02   ` Vincent Guittot
  2022-05-13  4:32     ` [External] " Chengming Zhou
  0 siblings, 1 reply; 6+ messages in thread
From: Vincent Guittot @ 2022-05-12 14:02 UTC (permalink / raw)
  To: Chengming Zhou
  Cc: mingo, peterz, juri.lelli, dietmar.eggemann, rostedt, bsegall,
	mgorman, bristot, linux-kernel, duanxiongchun, songmuchun,
	zhengqi.arch

Hi Chengming,

Your patch is on my list but I have been busy on other stuff and we
had enough warnings and problems with this part that I want to
carefully review that all the cases are covered. I will review it soon

Vincent

On Wed, 11 May 2022 at 14:05, Chengming Zhou
<zhouchengming@bytedance.com> wrote:
>
> Hello, friendly ping...
>
>
> On 2022/4/28 00:05, Chengming Zhou wrote:
> > We notice the rq leaf_cfs_rq_list has two problems when do bugfix
> > backports and some test profiling.
> >
> > 1. cfs_rqs under throttled subtree could be added to the list, and
> >    make their fully decayed ancestors on the list, even though not needed.
> >
> > 2. #1 also make the leaf_cfs_rq_list management complex and error prone,
> >    this is the list of related bugfix so far:
> >
> >    commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
> >    commit fe61468b2cbc ("sched/fair: Fix enqueue_task_fair warning")
> >    commit b34cb07dde7c ("sched/fair: Fix enqueue_task_fair() warning some more")
> >    commit 39f23ce07b93 ("sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list")
> >    commit 0258bdfaff5b ("sched/fair: Fix unfairness caused by missing load decay")
> >    commit a7b359fc6a37 ("sched/fair: Correctly insert cfs_rq's to list on unthrottle")
> >    commit fdaba61ef8a2 ("sched/fair: Ensure that the CFS parent is added after unthrottling")
> >    commit 2630cde26711 ("sched/fair: Add ancestors of unthrottled undecayed cfs_rq")
> >
> > commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
> > delete every cfs_rq under throttled subtree from rq->leaf_cfs_rq_list,
> > and delete the throttled_hierarchy() test in update_blocked_averages(),
> > which optimized update_blocked_averages().
> >
> > But those later bugfix add cfs_rqs under throttled subtree back to
> > rq->leaf_cfs_rq_list again, with their fully decayed ancestors, for
> > the integrity of rq->leaf_cfs_rq_list.
> >
> > This patch takes another method, skip all cfs_rqs under throttled
> > hierarchy when list_add_leaf_cfs_rq(), to completely make cfs_rqs
> > under throttled subtree off the leaf_cfs_rq_list.
> >
> > So we don't need to consider throttled related things in
> > enqueue_entity(), unthrottle_cfs_rq() and enqueue_task_fair(),
> > which simplify the code a lot. Also optimize update_blocked_averages()
> > since cfs_rqs under throttled hierarchy and their ancestors
> > won't be on the leaf_cfs_rq_list.
> >
> > Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> > ---
> >  kernel/sched/fair.c | 72 ++++++++++-----------------------------------
> >  1 file changed, 16 insertions(+), 56 deletions(-)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 1ad18b5cc1b8..083c3d32c899 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -309,6 +309,8 @@ const struct sched_class fair_sched_class;
> >
> >  #ifdef CONFIG_FAIR_GROUP_SCHED
> >
> > +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
> > +
> >  /* Walk up scheduling entities hierarchy */
> >  #define for_each_sched_entity(se) \
> >               for (; se; se = se->parent)
> > @@ -331,7 +333,7 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
> >       struct rq *rq = rq_of(cfs_rq);
> >       int cpu = cpu_of(rq);
> >
> > -     if (cfs_rq->on_list)
> > +     if (cfs_rq->on_list || throttled_hierarchy(cfs_rq))
> >               return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
> >
> >       cfs_rq->on_list = 1;
> > @@ -3242,8 +3244,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
> >  }
> >  #endif /* CONFIG_SMP */
> >
> > -static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
> > -
> >  /*
> >   * Recomputes the group entity based on the current state of its group
> >   * runqueue.
> > @@ -4356,16 +4356,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> >               __enqueue_entity(cfs_rq, se);
> >       se->on_rq = 1;
> >
> > -     /*
> > -      * When bandwidth control is enabled, cfs might have been removed
> > -      * because of a parent been throttled but cfs->nr_running > 1. Try to
> > -      * add it unconditionally.
> > -      */
> > -     if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
> > +     if (cfs_rq->nr_running == 1) {
> >               list_add_leaf_cfs_rq(cfs_rq);
> > -
> > -     if (cfs_rq->nr_running == 1)
> >               check_enqueue_throttle(cfs_rq);
> > +     }
> >  }
> >
> >  static void __clear_buddies_last(struct sched_entity *se)
> > @@ -4980,11 +4974,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
> >       /* update hierarchical throttle state */
> >       walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
> >
> > -     /* Nothing to run but something to decay (on_list)? Complete the branch */
> >       if (!cfs_rq->load.weight) {
> > -             if (cfs_rq->on_list)
> > -                     goto unthrottle_throttle;
> > -             return;
> > +             if (!cfs_rq->on_list)
> > +                     return;
> > +             /*
> > +              * Nothing to run but something to decay (on_list)?
> > +              * Complete the branch.
> > +              */
> > +             for_each_sched_entity(se) {
> > +                     if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
> > +                             break;
> > +             }
> > +             goto unthrottle_throttle;
> >       }
> >
> >       task_delta = cfs_rq->h_nr_running;
> > @@ -5022,31 +5023,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
> >               /* end evaluation on encountering a throttled cfs_rq */
> >               if (cfs_rq_throttled(qcfs_rq))
> >                       goto unthrottle_throttle;
> > -
> > -             /*
> > -              * One parent has been throttled and cfs_rq removed from the
> > -              * list. Add it back to not break the leaf list.
> > -              */
> > -             if (throttled_hierarchy(qcfs_rq))
> > -                     list_add_leaf_cfs_rq(qcfs_rq);
> >       }
> >
> >       /* At this point se is NULL and we are at root level*/
> >       add_nr_running(rq, task_delta);
> >
> >  unthrottle_throttle:
> > -     /*
> > -      * The cfs_rq_throttled() breaks in the above iteration can result in
> > -      * incomplete leaf list maintenance, resulting in triggering the
> > -      * assertion below.
> > -      */
> > -     for_each_sched_entity(se) {
> > -             struct cfs_rq *qcfs_rq = cfs_rq_of(se);
> > -
> > -             if (list_add_leaf_cfs_rq(qcfs_rq))
> > -                     break;
> > -     }
> > -
> >       assert_list_leaf_cfs_rq(rq);
> >
> >       /* Determine whether we need to wake up potentially idle CPU: */
> > @@ -5701,13 +5683,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> >               /* end evaluation on encountering a throttled cfs_rq */
> >               if (cfs_rq_throttled(cfs_rq))
> >                       goto enqueue_throttle;
> > -
> > -               /*
> > -                * One parent has been throttled and cfs_rq removed from the
> > -                * list. Add it back to not break the leaf list.
> > -                */
> > -               if (throttled_hierarchy(cfs_rq))
> > -                       list_add_leaf_cfs_rq(cfs_rq);
> >       }
> >
> >       /* At this point se is NULL and we are at root level*/
> > @@ -5731,21 +5706,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> >               update_overutilized_status(rq);
> >
> >  enqueue_throttle:
> > -     if (cfs_bandwidth_used()) {
> > -             /*
> > -              * When bandwidth control is enabled; the cfs_rq_throttled()
> > -              * breaks in the above iteration can result in incomplete
> > -              * leaf list maintenance, resulting in triggering the assertion
> > -              * below.
> > -              */
> > -             for_each_sched_entity(se) {
> > -                     cfs_rq = cfs_rq_of(se);
> > -
> > -                     if (list_add_leaf_cfs_rq(cfs_rq))
> > -                             break;
> > -             }
> > -     }
> > -
> >       assert_list_leaf_cfs_rq(rq);
> >
> >       hrtick_update(rq);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [External] Re: [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list
  2022-05-12 14:02   ` Vincent Guittot
@ 2022-05-13  4:32     ` Chengming Zhou
  0 siblings, 0 replies; 6+ messages in thread
From: Chengming Zhou @ 2022-05-13  4:32 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: mingo, peterz, juri.lelli, dietmar.eggemann, rostedt, bsegall,
	mgorman, bristot, linux-kernel, duanxiongchun, songmuchun,
	zhengqi.arch

On 2022/5/12 22:02, Vincent Guittot wrote:
> Hi Chengming,
> 
> Your patch is on my list but I have been busy on other stuff and we
> had enough warnings and problems with this part that I want to
> carefully review that all the cases are covered. I will review it soon
> 

Hi Vincent,

Thanks for your reply. There's no rush, just take your time. I agree
that we should be very careful with this part due to previous problems.

Thanks.

> Vincent
> 
> On Wed, 11 May 2022 at 14:05, Chengming Zhou
> <zhouchengming@bytedance.com> wrote:
>>
>> Hello, friendly ping...
>>
>>
>> On 2022/4/28 00:05, Chengming Zhou wrote:
>>> We notice the rq leaf_cfs_rq_list has two problems when do bugfix
>>> backports and some test profiling.
>>>
>>> 1. cfs_rqs under throttled subtree could be added to the list, and
>>>    make their fully decayed ancestors on the list, even though not needed.
>>>
>>> 2. #1 also make the leaf_cfs_rq_list management complex and error prone,
>>>    this is the list of related bugfix so far:
>>>
>>>    commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
>>>    commit fe61468b2cbc ("sched/fair: Fix enqueue_task_fair warning")
>>>    commit b34cb07dde7c ("sched/fair: Fix enqueue_task_fair() warning some more")
>>>    commit 39f23ce07b93 ("sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list")
>>>    commit 0258bdfaff5b ("sched/fair: Fix unfairness caused by missing load decay")
>>>    commit a7b359fc6a37 ("sched/fair: Correctly insert cfs_rq's to list on unthrottle")
>>>    commit fdaba61ef8a2 ("sched/fair: Ensure that the CFS parent is added after unthrottling")
>>>    commit 2630cde26711 ("sched/fair: Add ancestors of unthrottled undecayed cfs_rq")
>>>
>>> commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
>>> delete every cfs_rq under throttled subtree from rq->leaf_cfs_rq_list,
>>> and delete the throttled_hierarchy() test in update_blocked_averages(),
>>> which optimized update_blocked_averages().
>>>
>>> But those later bugfix add cfs_rqs under throttled subtree back to
>>> rq->leaf_cfs_rq_list again, with their fully decayed ancestors, for
>>> the integrity of rq->leaf_cfs_rq_list.
>>>
>>> This patch takes another method, skip all cfs_rqs under throttled
>>> hierarchy when list_add_leaf_cfs_rq(), to completely make cfs_rqs
>>> under throttled subtree off the leaf_cfs_rq_list.
>>>
>>> So we don't need to consider throttled related things in
>>> enqueue_entity(), unthrottle_cfs_rq() and enqueue_task_fair(),
>>> which simplify the code a lot. Also optimize update_blocked_averages()
>>> since cfs_rqs under throttled hierarchy and their ancestors
>>> won't be on the leaf_cfs_rq_list.
>>>
>>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
>>> ---
>>>  kernel/sched/fair.c | 72 ++++++++++-----------------------------------
>>>  1 file changed, 16 insertions(+), 56 deletions(-)
>>>
>>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>>> index 1ad18b5cc1b8..083c3d32c899 100644
>>> --- a/kernel/sched/fair.c
>>> +++ b/kernel/sched/fair.c
>>> @@ -309,6 +309,8 @@ const struct sched_class fair_sched_class;
>>>
>>>  #ifdef CONFIG_FAIR_GROUP_SCHED
>>>
>>> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
>>> +
>>>  /* Walk up scheduling entities hierarchy */
>>>  #define for_each_sched_entity(se) \
>>>               for (; se; se = se->parent)
>>> @@ -331,7 +333,7 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
>>>       struct rq *rq = rq_of(cfs_rq);
>>>       int cpu = cpu_of(rq);
>>>
>>> -     if (cfs_rq->on_list)
>>> +     if (cfs_rq->on_list || throttled_hierarchy(cfs_rq))
>>>               return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
>>>
>>>       cfs_rq->on_list = 1;
>>> @@ -3242,8 +3244,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
>>>  }
>>>  #endif /* CONFIG_SMP */
>>>
>>> -static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
>>> -
>>>  /*
>>>   * Recomputes the group entity based on the current state of its group
>>>   * runqueue.
>>> @@ -4356,16 +4356,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>>>               __enqueue_entity(cfs_rq, se);
>>>       se->on_rq = 1;
>>>
>>> -     /*
>>> -      * When bandwidth control is enabled, cfs might have been removed
>>> -      * because of a parent been throttled but cfs->nr_running > 1. Try to
>>> -      * add it unconditionally.
>>> -      */
>>> -     if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
>>> +     if (cfs_rq->nr_running == 1) {
>>>               list_add_leaf_cfs_rq(cfs_rq);
>>> -
>>> -     if (cfs_rq->nr_running == 1)
>>>               check_enqueue_throttle(cfs_rq);
>>> +     }
>>>  }
>>>
>>>  static void __clear_buddies_last(struct sched_entity *se)
>>> @@ -4980,11 +4974,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>>>       /* update hierarchical throttle state */
>>>       walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
>>>
>>> -     /* Nothing to run but something to decay (on_list)? Complete the branch */
>>>       if (!cfs_rq->load.weight) {
>>> -             if (cfs_rq->on_list)
>>> -                     goto unthrottle_throttle;
>>> -             return;
>>> +             if (!cfs_rq->on_list)
>>> +                     return;
>>> +             /*
>>> +              * Nothing to run but something to decay (on_list)?
>>> +              * Complete the branch.
>>> +              */
>>> +             for_each_sched_entity(se) {
>>> +                     if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
>>> +                             break;
>>> +             }
>>> +             goto unthrottle_throttle;
>>>       }
>>>
>>>       task_delta = cfs_rq->h_nr_running;
>>> @@ -5022,31 +5023,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>>>               /* end evaluation on encountering a throttled cfs_rq */
>>>               if (cfs_rq_throttled(qcfs_rq))
>>>                       goto unthrottle_throttle;
>>> -
>>> -             /*
>>> -              * One parent has been throttled and cfs_rq removed from the
>>> -              * list. Add it back to not break the leaf list.
>>> -              */
>>> -             if (throttled_hierarchy(qcfs_rq))
>>> -                     list_add_leaf_cfs_rq(qcfs_rq);
>>>       }
>>>
>>>       /* At this point se is NULL and we are at root level*/
>>>       add_nr_running(rq, task_delta);
>>>
>>>  unthrottle_throttle:
>>> -     /*
>>> -      * The cfs_rq_throttled() breaks in the above iteration can result in
>>> -      * incomplete leaf list maintenance, resulting in triggering the
>>> -      * assertion below.
>>> -      */
>>> -     for_each_sched_entity(se) {
>>> -             struct cfs_rq *qcfs_rq = cfs_rq_of(se);
>>> -
>>> -             if (list_add_leaf_cfs_rq(qcfs_rq))
>>> -                     break;
>>> -     }
>>> -
>>>       assert_list_leaf_cfs_rq(rq);
>>>
>>>       /* Determine whether we need to wake up potentially idle CPU: */
>>> @@ -5701,13 +5683,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>>>               /* end evaluation on encountering a throttled cfs_rq */
>>>               if (cfs_rq_throttled(cfs_rq))
>>>                       goto enqueue_throttle;
>>> -
>>> -               /*
>>> -                * One parent has been throttled and cfs_rq removed from the
>>> -                * list. Add it back to not break the leaf list.
>>> -                */
>>> -               if (throttled_hierarchy(cfs_rq))
>>> -                       list_add_leaf_cfs_rq(cfs_rq);
>>>       }
>>>
>>>       /* At this point se is NULL and we are at root level*/
>>> @@ -5731,21 +5706,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>>>               update_overutilized_status(rq);
>>>
>>>  enqueue_throttle:
>>> -     if (cfs_bandwidth_used()) {
>>> -             /*
>>> -              * When bandwidth control is enabled; the cfs_rq_throttled()
>>> -              * breaks in the above iteration can result in incomplete
>>> -              * leaf list maintenance, resulting in triggering the assertion
>>> -              * below.
>>> -              */
>>> -             for_each_sched_entity(se) {
>>> -                     cfs_rq = cfs_rq_of(se);
>>> -
>>> -                     if (list_add_leaf_cfs_rq(cfs_rq))
>>> -                             break;
>>> -             }
>>> -     }
>>> -
>>>       assert_list_leaf_cfs_rq(rq);
>>>
>>>       hrtick_update(rq);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list
  2022-04-27 16:05 [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list Chengming Zhou
  2022-05-11 12:05 ` Chengming Zhou
@ 2022-05-23 16:23 ` Vincent Guittot
  2022-05-24  9:35   ` [External] " Chengming Zhou
  1 sibling, 1 reply; 6+ messages in thread
From: Vincent Guittot @ 2022-05-23 16:23 UTC (permalink / raw)
  To: Chengming Zhou
  Cc: mingo, peterz, juri.lelli, dietmar.eggemann, rostedt, bsegall,
	mgorman, bristot, linux-kernel, duanxiongchun, songmuchun,
	zhengqi.arch

On Wed, 27 Apr 2022 at 18:07, Chengming Zhou
<zhouchengming@bytedance.com> wrote:
>
> We notice the rq leaf_cfs_rq_list has two problems when do bugfix
> backports and some test profiling.
>
> 1. cfs_rqs under throttled subtree could be added to the list, and
>    make their fully decayed ancestors on the list, even though not needed.
>
> 2. #1 also make the leaf_cfs_rq_list management complex and error prone,
>    this is the list of related bugfix so far:
>
>    commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
>    commit fe61468b2cbc ("sched/fair: Fix enqueue_task_fair warning")
>    commit b34cb07dde7c ("sched/fair: Fix enqueue_task_fair() warning some more")
>    commit 39f23ce07b93 ("sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list")
>    commit 0258bdfaff5b ("sched/fair: Fix unfairness caused by missing load decay")
>    commit a7b359fc6a37 ("sched/fair: Correctly insert cfs_rq's to list on unthrottle")
>    commit fdaba61ef8a2 ("sched/fair: Ensure that the CFS parent is added after unthrottling")
>    commit 2630cde26711 ("sched/fair: Add ancestors of unthrottled undecayed cfs_rq")
>
> commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
> delete every cfs_rq under throttled subtree from rq->leaf_cfs_rq_list,
> and delete the throttled_hierarchy() test in update_blocked_averages(),
> which optimized update_blocked_averages().
>
> But those later bugfix add cfs_rqs under throttled subtree back to
> rq->leaf_cfs_rq_list again, with their fully decayed ancestors, for
> the integrity of rq->leaf_cfs_rq_list.
>
> This patch takes another method, skip all cfs_rqs under throttled
> hierarchy when list_add_leaf_cfs_rq(), to completely make cfs_rqs
> under throttled subtree off the leaf_cfs_rq_list.
>
> So we don't need to consider throttled related things in
> enqueue_entity(), unthrottle_cfs_rq() and enqueue_task_fair(),
> which simplify the code a lot. Also optimize update_blocked_averages()
> since cfs_rqs under throttled hierarchy and their ancestors
> won't be on the leaf_cfs_rq_list.
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
>  kernel/sched/fair.c | 72 ++++++++++-----------------------------------
>  1 file changed, 16 insertions(+), 56 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 1ad18b5cc1b8..083c3d32c899 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -309,6 +309,8 @@ const struct sched_class fair_sched_class;
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>
> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
> +
>  /* Walk up scheduling entities hierarchy */
>  #define for_each_sched_entity(se) \
>                 for (; se; se = se->parent)
> @@ -331,7 +333,7 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
>         struct rq *rq = rq_of(cfs_rq);
>         int cpu = cpu_of(rq);
>
> -       if (cfs_rq->on_list)
> +       if (cfs_rq->on_list || throttled_hierarchy(cfs_rq))

Please move throttled_hierarchy() outside list_add_leaf_cfs_rq()
because the task will not be added in this case which is quite
misleading

I will continue to check the various corner cases but I haven't seen
problem so far with your method

>                 return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
>
>         cfs_rq->on_list = 1;
> @@ -3242,8 +3244,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
>  }
>  #endif /* CONFIG_SMP */
>
> -static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
> -
>  /*
>   * Recomputes the group entity based on the current state of its group
>   * runqueue.
> @@ -4356,16 +4356,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>                 __enqueue_entity(cfs_rq, se);
>         se->on_rq = 1;
>
> -       /*
> -        * When bandwidth control is enabled, cfs might have been removed
> -        * because of a parent been throttled but cfs->nr_running > 1. Try to
> -        * add it unconditionally.
> -        */
> -       if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
> +       if (cfs_rq->nr_running == 1) {
>                 list_add_leaf_cfs_rq(cfs_rq);
> -
> -       if (cfs_rq->nr_running == 1)
>                 check_enqueue_throttle(cfs_rq);
> +       }
>  }
>
>  static void __clear_buddies_last(struct sched_entity *se)
> @@ -4980,11 +4974,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>         /* update hierarchical throttle state */
>         walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
>
> -       /* Nothing to run but something to decay (on_list)? Complete the branch */
>         if (!cfs_rq->load.weight) {
> -               if (cfs_rq->on_list)
> -                       goto unthrottle_throttle;
> -               return;
> +               if (!cfs_rq->on_list)
> +                       return;
> +               /*
> +                * Nothing to run but something to decay (on_list)?
> +                * Complete the branch.
> +                */
> +               for_each_sched_entity(se) {
> +                       if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
> +                               break;
> +               }
> +               goto unthrottle_throttle;
>         }
>
>         task_delta = cfs_rq->h_nr_running;
> @@ -5022,31 +5023,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>                 /* end evaluation on encountering a throttled cfs_rq */
>                 if (cfs_rq_throttled(qcfs_rq))
>                         goto unthrottle_throttle;
> -
> -               /*
> -                * One parent has been throttled and cfs_rq removed from the
> -                * list. Add it back to not break the leaf list.
> -                */
> -               if (throttled_hierarchy(qcfs_rq))
> -                       list_add_leaf_cfs_rq(qcfs_rq);
>         }
>
>         /* At this point se is NULL and we are at root level*/
>         add_nr_running(rq, task_delta);
>
>  unthrottle_throttle:
> -       /*
> -        * The cfs_rq_throttled() breaks in the above iteration can result in
> -        * incomplete leaf list maintenance, resulting in triggering the
> -        * assertion below.
> -        */
> -       for_each_sched_entity(se) {
> -               struct cfs_rq *qcfs_rq = cfs_rq_of(se);
> -
> -               if (list_add_leaf_cfs_rq(qcfs_rq))
> -                       break;
> -       }
> -
>         assert_list_leaf_cfs_rq(rq);
>
>         /* Determine whether we need to wake up potentially idle CPU: */
> @@ -5701,13 +5683,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>                 /* end evaluation on encountering a throttled cfs_rq */
>                 if (cfs_rq_throttled(cfs_rq))
>                         goto enqueue_throttle;
> -
> -               /*
> -                * One parent has been throttled and cfs_rq removed from the
> -                * list. Add it back to not break the leaf list.
> -                */
> -               if (throttled_hierarchy(cfs_rq))
> -                       list_add_leaf_cfs_rq(cfs_rq);
>         }
>
>         /* At this point se is NULL and we are at root level*/
> @@ -5731,21 +5706,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>                 update_overutilized_status(rq);
>
>  enqueue_throttle:
> -       if (cfs_bandwidth_used()) {
> -               /*
> -                * When bandwidth control is enabled; the cfs_rq_throttled()
> -                * breaks in the above iteration can result in incomplete
> -                * leaf list maintenance, resulting in triggering the assertion
> -                * below.
> -                */
> -               for_each_sched_entity(se) {
> -                       cfs_rq = cfs_rq_of(se);
> -
> -                       if (list_add_leaf_cfs_rq(cfs_rq))
> -                               break;
> -               }
> -       }
> -
>         assert_list_leaf_cfs_rq(rq);
>
>         hrtick_update(rq);
> --
> 2.35.2
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [External] Re: [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list
  2022-05-23 16:23 ` Vincent Guittot
@ 2022-05-24  9:35   ` Chengming Zhou
  0 siblings, 0 replies; 6+ messages in thread
From: Chengming Zhou @ 2022-05-24  9:35 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: mingo, peterz, juri.lelli, dietmar.eggemann, rostedt, bsegall,
	mgorman, bristot, linux-kernel, duanxiongchun, songmuchun,
	zhengqi.arch

On 2022/5/24 00:23, Vincent Guittot wrote:
> On Wed, 27 Apr 2022 at 18:07, Chengming Zhou
> <zhouchengming@bytedance.com> wrote:
[...]
>>  kernel/sched/fair.c | 72 ++++++++++-----------------------------------
>>  1 file changed, 16 insertions(+), 56 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 1ad18b5cc1b8..083c3d32c899 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -309,6 +309,8 @@ const struct sched_class fair_sched_class;
>>
>>  #ifdef CONFIG_FAIR_GROUP_SCHED
>>
>> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
>> +
>>  /* Walk up scheduling entities hierarchy */
>>  #define for_each_sched_entity(se) \
>>                 for (; se; se = se->parent)
>> @@ -331,7 +333,7 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
>>         struct rq *rq = rq_of(cfs_rq);
>>         int cpu = cpu_of(rq);
>>
>> -       if (cfs_rq->on_list)
>> +       if (cfs_rq->on_list || throttled_hierarchy(cfs_rq))
> 
> Please move throttled_hierarchy() outside list_add_leaf_cfs_rq()
> because the task will not be added in this case which is quite
> misleading

Ok, will do. I will move throttled_hierarchy() outside.

> 
> I will continue to check the various corner cases but I haven't seen
> problem so far with your method

Thanks!

> 
>>                 return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
>>
>>         cfs_rq->on_list = 1;
>> @@ -3242,8 +3244,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
>>  }
>>  #endif /* CONFIG_SMP */
>>
>> -static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
>> -
>>  /*
>>   * Recomputes the group entity based on the current state of its group
>>   * runqueue.
>> @@ -4356,16 +4356,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>>                 __enqueue_entity(cfs_rq, se);
>>         se->on_rq = 1;
>>
>> -       /*
>> -        * When bandwidth control is enabled, cfs might have been removed
>> -        * because of a parent been throttled but cfs->nr_running > 1. Try to
>> -        * add it unconditionally.
>> -        */
>> -       if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
>> +       if (cfs_rq->nr_running == 1) {
>>                 list_add_leaf_cfs_rq(cfs_rq);
>> -
>> -       if (cfs_rq->nr_running == 1)
>>                 check_enqueue_throttle(cfs_rq);
>> +       }
>>  }
>>
>>  static void __clear_buddies_last(struct sched_entity *se)
>> @@ -4980,11 +4974,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>>         /* update hierarchical throttle state */
>>         walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
>>
>> -       /* Nothing to run but something to decay (on_list)? Complete the branch */
>>         if (!cfs_rq->load.weight) {
>> -               if (cfs_rq->on_list)
>> -                       goto unthrottle_throttle;
>> -               return;
>> +               if (!cfs_rq->on_list)
>> +                       return;
>> +               /*
>> +                * Nothing to run but something to decay (on_list)?
>> +                * Complete the branch.
>> +                */
>> +               for_each_sched_entity(se) {
>> +                       if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
>> +                               break;
>> +               }
>> +               goto unthrottle_throttle;
>>         }
>>
>>         task_delta = cfs_rq->h_nr_running;
>> @@ -5022,31 +5023,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>>                 /* end evaluation on encountering a throttled cfs_rq */
>>                 if (cfs_rq_throttled(qcfs_rq))
>>                         goto unthrottle_throttle;
>> -
>> -               /*
>> -                * One parent has been throttled and cfs_rq removed from the
>> -                * list. Add it back to not break the leaf list.
>> -                */
>> -               if (throttled_hierarchy(qcfs_rq))
>> -                       list_add_leaf_cfs_rq(qcfs_rq);
>>         }
>>
>>         /* At this point se is NULL and we are at root level*/
>>         add_nr_running(rq, task_delta);
>>
>>  unthrottle_throttle:
>> -       /*
>> -        * The cfs_rq_throttled() breaks in the above iteration can result in
>> -        * incomplete leaf list maintenance, resulting in triggering the
>> -        * assertion below.
>> -        */
>> -       for_each_sched_entity(se) {
>> -               struct cfs_rq *qcfs_rq = cfs_rq_of(se);
>> -
>> -               if (list_add_leaf_cfs_rq(qcfs_rq))
>> -                       break;
>> -       }
>> -
>>         assert_list_leaf_cfs_rq(rq);
>>
>>         /* Determine whether we need to wake up potentially idle CPU: */
>> @@ -5701,13 +5683,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>>                 /* end evaluation on encountering a throttled cfs_rq */
>>                 if (cfs_rq_throttled(cfs_rq))
>>                         goto enqueue_throttle;
>> -
>> -               /*
>> -                * One parent has been throttled and cfs_rq removed from the
>> -                * list. Add it back to not break the leaf list.
>> -                */
>> -               if (throttled_hierarchy(cfs_rq))
>> -                       list_add_leaf_cfs_rq(cfs_rq);
>>         }
>>
>>         /* At this point se is NULL and we are at root level*/
>> @@ -5731,21 +5706,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>>                 update_overutilized_status(rq);
>>
>>  enqueue_throttle:
>> -       if (cfs_bandwidth_used()) {
>> -               /*
>> -                * When bandwidth control is enabled; the cfs_rq_throttled()
>> -                * breaks in the above iteration can result in incomplete
>> -                * leaf list maintenance, resulting in triggering the assertion
>> -                * below.
>> -                */
>> -               for_each_sched_entity(se) {
>> -                       cfs_rq = cfs_rq_of(se);
>> -
>> -                       if (list_add_leaf_cfs_rq(cfs_rq))
>> -                               break;
>> -               }
>> -       }
>> -
>>         assert_list_leaf_cfs_rq(rq);
>>
>>         hrtick_update(rq);
>> --
>> 2.35.2
>>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-05-24  9:35 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-27 16:05 [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list Chengming Zhou
2022-05-11 12:05 ` Chengming Zhou
2022-05-12 14:02   ` Vincent Guittot
2022-05-13  4:32     ` [External] " Chengming Zhou
2022-05-23 16:23 ` Vincent Guittot
2022-05-24  9:35   ` [External] " Chengming Zhou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).