All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5] sched/fair: Correctly insert cfs_rq's to list on unthrottle
@ 2021-06-12 11:28 ` Odin Ugedal
  0 siblings, 0 replies; 7+ messages in thread
From: Odin Ugedal @ 2021-06-12 11:28 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira
  Cc: cgroups, linux-kernel, Odin Ugedal

This fixes an issue where fairness is decreased since cfs_rq's can
end up not being decayed properly. For two sibling control groups with
the same priority, this can often lead to a load ratio of 99/1 (!!).

This happen because when a cfs_rq is throttled, all the descendant cfs_rq's
will be removed from the leaf list. When they initial cfs_rq is
unthrottled, it will currently only re add descendant cfs_rq's if they
have one or more entities enqueued. This is not a perfect heuristic.

Instead, we insert all cfs_rq's that contain one or more enqueued
entities, or it its load is not completely decayed.

Can often lead to situations like this for equally weighted control
groups:

$ ps u -C stress
USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root       10009 88.8  0.0   3676   100 pts/1    R+   11:04   0:13 stress --cpu 1
root       10023  3.0  0.0   3676   104 pts/1    R+   11:04   0:00 stress --cpu 1

Fixes: 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
Signed-off-by: Odin Ugedal <odin@uged.al>
---
Changes since v1:
 - Replaced cfs_rq field with using tg_load_avg_contrib
 - Went from 3 to 1 patches; one is merged and one is replaced
   by a new patchset.
Changes since v2:
 - Use !cfs_rq_is_decayed() instead of tg_load_avg_contrib
 - Moved cfs_rq_is_decayed to above its new use
Changes since v3:
 - (hopefully) Fix config for !CONFIG_SMP
Changes since v4:
 - Move cfs_rq_is_decayed again

 kernel/sched/fair.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 794c2cb945f8..c48d1d409b20 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3283,6 +3283,24 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_FAIR_GROUP_SCHED
+
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+	if (cfs_rq->load.weight)
+		return false;
+
+	if (cfs_rq->avg.load_sum)
+		return false;
+
+	if (cfs_rq->avg.util_sum)
+		return false;
+
+	if (cfs_rq->avg.runnable_sum)
+		return false;
+
+	return true;
+}
+
 /**
  * update_tg_load_avg - update the tg's load avg
  * @cfs_rq: the cfs_rq whose avg changed
@@ -4719,8 +4737,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
 		cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
 					     cfs_rq->throttled_clock_task;
 
-		/* Add cfs_rq with already running entity in the list */
-		if (cfs_rq->nr_running >= 1)
+		/* Add cfs_rq with load or one or more already running entities to the list */
+		if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
 			list_add_leaf_cfs_rq(cfs_rq);
 	}
 
@@ -7895,23 +7913,6 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
-{
-	if (cfs_rq->load.weight)
-		return false;
-
-	if (cfs_rq->avg.load_sum)
-		return false;
-
-	if (cfs_rq->avg.util_sum)
-		return false;
-
-	if (cfs_rq->avg.runnable_sum)
-		return false;
-
-	return true;
-}
-
 static bool __update_blocked_fair(struct rq *rq, bool *done)
 {
 	struct cfs_rq *cfs_rq, *pos;
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v5] sched/fair: Correctly insert cfs_rq's to list on unthrottle
@ 2021-06-12 11:28 ` Odin Ugedal
  0 siblings, 0 replies; 7+ messages in thread
From: Odin Ugedal @ 2021-06-12 11:28 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Odin Ugedal

This fixes an issue where fairness is decreased since cfs_rq's can
end up not being decayed properly. For two sibling control groups with
the same priority, this can often lead to a load ratio of 99/1 (!!).

This happen because when a cfs_rq is throttled, all the descendant cfs_rq's
will be removed from the leaf list. When they initial cfs_rq is
unthrottled, it will currently only re add descendant cfs_rq's if they
have one or more entities enqueued. This is not a perfect heuristic.

Instead, we insert all cfs_rq's that contain one or more enqueued
entities, or it its load is not completely decayed.

Can often lead to situations like this for equally weighted control
groups:

$ ps u -C stress
USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root       10009 88.8  0.0   3676   100 pts/1    R+   11:04   0:13 stress --cpu 1
root       10023  3.0  0.0   3676   104 pts/1    R+   11:04   0:00 stress --cpu 1

Fixes: 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
Signed-off-by: Odin Ugedal <odin-RObV4cXtwVA@public.gmane.org>
---
Changes since v1:
 - Replaced cfs_rq field with using tg_load_avg_contrib
 - Went from 3 to 1 patches; one is merged and one is replaced
   by a new patchset.
Changes since v2:
 - Use !cfs_rq_is_decayed() instead of tg_load_avg_contrib
 - Moved cfs_rq_is_decayed to above its new use
Changes since v3:
 - (hopefully) Fix config for !CONFIG_SMP
Changes since v4:
 - Move cfs_rq_is_decayed again

 kernel/sched/fair.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 794c2cb945f8..c48d1d409b20 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3283,6 +3283,24 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_FAIR_GROUP_SCHED
+
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+	if (cfs_rq->load.weight)
+		return false;
+
+	if (cfs_rq->avg.load_sum)
+		return false;
+
+	if (cfs_rq->avg.util_sum)
+		return false;
+
+	if (cfs_rq->avg.runnable_sum)
+		return false;
+
+	return true;
+}
+
 /**
  * update_tg_load_avg - update the tg's load avg
  * @cfs_rq: the cfs_rq whose avg changed
@@ -4719,8 +4737,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
 		cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
 					     cfs_rq->throttled_clock_task;
 
-		/* Add cfs_rq with already running entity in the list */
-		if (cfs_rq->nr_running >= 1)
+		/* Add cfs_rq with load or one or more already running entities to the list */
+		if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
 			list_add_leaf_cfs_rq(cfs_rq);
 	}
 
@@ -7895,23 +7913,6 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
-{
-	if (cfs_rq->load.weight)
-		return false;
-
-	if (cfs_rq->avg.load_sum)
-		return false;
-
-	if (cfs_rq->avg.util_sum)
-		return false;
-
-	if (cfs_rq->avg.runnable_sum)
-		return false;
-
-	return true;
-}
-
 static bool __update_blocked_fair(struct rq *rq, bool *done)
 {
 	struct cfs_rq *cfs_rq, *pos;
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v5] sched/fair: Correctly insert cfs_rq's to list on unthrottle
  2021-06-12 11:28 ` Odin Ugedal
  (?)
@ 2021-06-14 12:01 ` Vincent Guittot
  2021-06-14 13:10     ` Peter Zijlstra
  -1 siblings, 1 reply; 7+ messages in thread
From: Vincent Guittot @ 2021-06-14 12:01 UTC (permalink / raw)
  To: Odin Ugedal
  Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, open list:CONTROL GROUP (CGROUP),
	linux-kernel

On Sat, 12 Jun 2021 at 13:31, Odin Ugedal <odin@uged.al> wrote:
>
> This fixes an issue where fairness is decreased since cfs_rq's can
> end up not being decayed properly. For two sibling control groups with
> the same priority, this can often lead to a load ratio of 99/1 (!!).
>
> This happen because when a cfs_rq is throttled, all the descendant cfs_rq's

s/happen/happens/

> will be removed from the leaf list. When they initial cfs_rq is
> unthrottled, it will currently only re add descendant cfs_rq's if they
> have one or more entities enqueued. This is not a perfect heuristic.
>
> Instead, we insert all cfs_rq's that contain one or more enqueued
> entities, or it its load is not completely decayed.
>
> Can often lead to situations like this for equally weighted control
> groups:
>
> $ ps u -C stress
> USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
> root       10009 88.8  0.0   3676   100 pts/1    R+   11:04   0:13 stress --cpu 1
> root       10023  3.0  0.0   3676   104 pts/1    R+   11:04   0:00 stress --cpu 1
>
> Fixes: 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
> Signed-off-by: Odin Ugedal <odin@uged.al>

minor typo in the commit message otherwise

Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---
> Changes since v1:
>  - Replaced cfs_rq field with using tg_load_avg_contrib
>  - Went from 3 to 1 patches; one is merged and one is replaced
>    by a new patchset.
> Changes since v2:
>  - Use !cfs_rq_is_decayed() instead of tg_load_avg_contrib
>  - Moved cfs_rq_is_decayed to above its new use
> Changes since v3:
>  - (hopefully) Fix config for !CONFIG_SMP
> Changes since v4:
>  - Move cfs_rq_is_decayed again
>
>  kernel/sched/fair.c | 39 ++++++++++++++++++++-------------------
>  1 file changed, 20 insertions(+), 19 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 794c2cb945f8..c48d1d409b20 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3283,6 +3283,24 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
>
>  #ifdef CONFIG_SMP
>  #ifdef CONFIG_FAIR_GROUP_SCHED
> +
> +static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
> +{
> +       if (cfs_rq->load.weight)
> +               return false;
> +
> +       if (cfs_rq->avg.load_sum)
> +               return false;
> +
> +       if (cfs_rq->avg.util_sum)
> +               return false;
> +
> +       if (cfs_rq->avg.runnable_sum)
> +               return false;
> +
> +       return true;
> +}
> +
>  /**
>   * update_tg_load_avg - update the tg's load avg
>   * @cfs_rq: the cfs_rq whose avg changed
> @@ -4719,8 +4737,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
>                 cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
>                                              cfs_rq->throttled_clock_task;
>
> -               /* Add cfs_rq with already running entity in the list */
> -               if (cfs_rq->nr_running >= 1)
> +               /* Add cfs_rq with load or one or more already running entities to the list */
> +               if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
>                         list_add_leaf_cfs_rq(cfs_rq);
>         }
>
> @@ -7895,23 +7913,6 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>
> -static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
> -{
> -       if (cfs_rq->load.weight)
> -               return false;
> -
> -       if (cfs_rq->avg.load_sum)
> -               return false;
> -
> -       if (cfs_rq->avg.util_sum)
> -               return false;
> -
> -       if (cfs_rq->avg.runnable_sum)
> -               return false;
> -
> -       return true;
> -}
> -
>  static bool __update_blocked_fair(struct rq *rq, bool *done)
>  {
>         struct cfs_rq *cfs_rq, *pos;
> --
> 2.31.1
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v5] sched/fair: Correctly insert cfs_rq's to list on unthrottle
  2021-06-14 12:01 ` Vincent Guittot
@ 2021-06-14 13:10     ` Peter Zijlstra
  0 siblings, 0 replies; 7+ messages in thread
From: Peter Zijlstra @ 2021-06-14 13:10 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Odin Ugedal, Ingo Molnar, Juri Lelli, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, open list:CONTROL GROUP (CGROUP),
	linux-kernel

On Mon, Jun 14, 2021 at 02:01:55PM +0200, Vincent Guittot wrote:
> On Sat, 12 Jun 2021 at 13:31, Odin Ugedal <odin@uged.al> wrote:
> >
> > This fixes an issue where fairness is decreased since cfs_rq's can
> > end up not being decayed properly. For two sibling control groups with
> > the same priority, this can often lead to a load ratio of 99/1 (!!).
> >
> > This happen because when a cfs_rq is throttled, all the descendant cfs_rq's
> 
> s/happen/happens/
> 
> > will be removed from the leaf list. When they initial cfs_rq is
> > unthrottled, it will currently only re add descendant cfs_rq's if they
> > have one or more entities enqueued. This is not a perfect heuristic.
> >
> > Instead, we insert all cfs_rq's that contain one or more enqueued
> > entities, or it its load is not completely decayed.
> >
> > Can often lead to situations like this for equally weighted control
> > groups:
> >
> > $ ps u -C stress
> > USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
> > root       10009 88.8  0.0   3676   100 pts/1    R+   11:04   0:13 stress --cpu 1
> > root       10023  3.0  0.0   3676   104 pts/1    R+   11:04   0:00 stress --cpu 1
> >
> > Fixes: 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
> > Signed-off-by: Odin Ugedal <odin@uged.al>
> 
> minor typo in the commit message otherwise
> 
> Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>

Thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v5] sched/fair: Correctly insert cfs_rq's to list on unthrottle
@ 2021-06-14 13:10     ` Peter Zijlstra
  0 siblings, 0 replies; 7+ messages in thread
From: Peter Zijlstra @ 2021-06-14 13:10 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Odin Ugedal, Ingo Molnar, Juri Lelli, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, open list:CONTROL GROUP (CGROUP),
	linux-kernel

On Mon, Jun 14, 2021 at 02:01:55PM +0200, Vincent Guittot wrote:
> On Sat, 12 Jun 2021 at 13:31, Odin Ugedal <odin-RObV4cXtwVA@public.gmane.org> wrote:
> >
> > This fixes an issue where fairness is decreased since cfs_rq's can
> > end up not being decayed properly. For two sibling control groups with
> > the same priority, this can often lead to a load ratio of 99/1 (!!).
> >
> > This happen because when a cfs_rq is throttled, all the descendant cfs_rq's
> 
> s/happen/happens/
> 
> > will be removed from the leaf list. When they initial cfs_rq is
> > unthrottled, it will currently only re add descendant cfs_rq's if they
> > have one or more entities enqueued. This is not a perfect heuristic.
> >
> > Instead, we insert all cfs_rq's that contain one or more enqueued
> > entities, or it its load is not completely decayed.
> >
> > Can often lead to situations like this for equally weighted control
> > groups:
> >
> > $ ps u -C stress
> > USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
> > root       10009 88.8  0.0   3676   100 pts/1    R+   11:04   0:13 stress --cpu 1
> > root       10023  3.0  0.0   3676   104 pts/1    R+   11:04   0:00 stress --cpu 1
> >
> > Fixes: 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
> > Signed-off-by: Odin Ugedal <odin-RObV4cXtwVA@public.gmane.org>
> 
> minor typo in the commit message otherwise
> 
> Reviewed-by: Vincent Guittot <vincent.guittot-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>

Thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v5] sched/fair: Correctly insert cfs_rq's to list on unthrottle
  2021-06-12 11:28 ` Odin Ugedal
  (?)
  (?)
@ 2021-06-16 13:24 ` kernel test robot
  -1 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2021-06-16 13:24 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2717 bytes --]

Hi Odin,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master linux/master linus/master v5.13-rc6 next-20210615]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Odin-Ugedal/sched-fair-Correctly-insert-cfs_rq-s-to-list-on-unthrottle/20210616-151742
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 0159bb020ca9a43b17aa9149f1199643c1d49426
config: m68k-randconfig-r021-20210615 (attached as .config)
compiler: m68k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/670fb8556a2d3594c67bf4ab2983ba4f91f5686b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Odin-Ugedal/sched-fair-Correctly-insert-cfs_rq-s-to-list-on-unthrottle/20210616-151742
        git checkout 670fb8556a2d3594c67bf4ab2983ba4f91f5686b
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   kernel/sched/fair.c: In function 'tg_unthrottle_up':
>> kernel/sched/fair.c:4724:8: error: implicit declaration of function 'cfs_rq_is_decayed' [-Werror=implicit-function-declaration]
    4724 |   if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
         |        ^~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/cfs_rq_is_decayed +4724 kernel/sched/fair.c

  4712	
  4713	static int tg_unthrottle_up(struct task_group *tg, void *data)
  4714	{
  4715		struct rq *rq = data;
  4716		struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
  4717	
  4718		cfs_rq->throttle_count--;
  4719		if (!cfs_rq->throttle_count) {
  4720			cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
  4721						     cfs_rq->throttled_clock_task;
  4722	
  4723			/* Add cfs_rq with load or one or more already running entities to the list */
> 4724			if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
  4725				list_add_leaf_cfs_rq(cfs_rq);
  4726		}
  4727	
  4728		return 0;
  4729	}
  4730	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 22441 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tip: sched/urgent] sched/fair: Correctly insert cfs_rq's to list on unthrottle
  2021-06-12 11:28 ` Odin Ugedal
                   ` (2 preceding siblings ...)
  (?)
@ 2021-06-16 13:49 ` tip-bot2 for Odin Ugedal
  -1 siblings, 0 replies; 7+ messages in thread
From: tip-bot2 for Odin Ugedal @ 2021-06-16 13:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Odin Ugedal, Peter Zijlstra (Intel), Vincent Guittot, x86, linux-kernel

The following commit has been merged into the sched/urgent branch of tip:

Commit-ID:     a7b359fc6a37faaf472125867c8dc5a068c90982
Gitweb:        https://git.kernel.org/tip/a7b359fc6a37faaf472125867c8dc5a068c90982
Author:        Odin Ugedal <odin@uged.al>
AuthorDate:    Sat, 12 Jun 2021 13:28:15 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 14 Jun 2021 22:58:47 +02:00

sched/fair: Correctly insert cfs_rq's to list on unthrottle

Fix an issue where fairness is decreased since cfs_rq's can end up not
being decayed properly. For two sibling control groups with the same
priority, this can often lead to a load ratio of 99/1 (!!).

This happens because when a cfs_rq is throttled, all the descendant
cfs_rq's will be removed from the leaf list. When they initial cfs_rq
is unthrottled, it will currently only re add descendant cfs_rq's if
they have one or more entities enqueued. This is not a perfect
heuristic.

Instead, we insert all cfs_rq's that contain one or more enqueued
entities, or it its load is not completely decayed.

Can often lead to situations like this for equally weighted control
groups:

  $ ps u -C stress
  USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
  root       10009 88.8  0.0   3676   100 pts/1    R+   11:04   0:13 stress --cpu 1
  root       10023  3.0  0.0   3676   104 pts/1    R+   11:04   0:00 stress --cpu 1

Fixes: 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
[vingo: !SMP build fix]
Signed-off-by: Odin Ugedal <odin@uged.al>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/20210612112815.61678-1-odin@uged.al
---
 kernel/sched/fair.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2c8a935..bfaa6e1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3298,6 +3298,24 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_FAIR_GROUP_SCHED
+
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+	if (cfs_rq->load.weight)
+		return false;
+
+	if (cfs_rq->avg.load_sum)
+		return false;
+
+	if (cfs_rq->avg.util_sum)
+		return false;
+
+	if (cfs_rq->avg.runnable_sum)
+		return false;
+
+	return true;
+}
+
 /**
  * update_tg_load_avg - update the tg's load avg
  * @cfs_rq: the cfs_rq whose avg changed
@@ -4091,6 +4109,11 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
 
 #else /* CONFIG_SMP */
 
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+	return true;
+}
+
 #define UPDATE_TG	0x0
 #define SKIP_AGE_LOAD	0x0
 #define DO_ATTACH	0x0
@@ -4749,8 +4772,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
 		cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
 					     cfs_rq->throttled_clock_task;
 
-		/* Add cfs_rq with already running entity in the list */
-		if (cfs_rq->nr_running >= 1)
+		/* Add cfs_rq with load or one or more already running entities to the list */
+		if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
 			list_add_leaf_cfs_rq(cfs_rq);
 	}
 
@@ -7996,23 +8019,6 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
-{
-	if (cfs_rq->load.weight)
-		return false;
-
-	if (cfs_rq->avg.load_sum)
-		return false;
-
-	if (cfs_rq->avg.util_sum)
-		return false;
-
-	if (cfs_rq->avg.runnable_sum)
-		return false;
-
-	return true;
-}
-
 static bool __update_blocked_fair(struct rq *rq, bool *done)
 {
 	struct cfs_rq *cfs_rq, *pos;

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-06-16 13:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-12 11:28 [PATCH v5] sched/fair: Correctly insert cfs_rq's to list on unthrottle Odin Ugedal
2021-06-12 11:28 ` Odin Ugedal
2021-06-14 12:01 ` Vincent Guittot
2021-06-14 13:10   ` Peter Zijlstra
2021-06-14 13:10     ` Peter Zijlstra
2021-06-16 13:24 ` kernel test robot
2021-06-16 13:49 ` [tip: sched/urgent] " tip-bot2 for Odin Ugedal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.