All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched/fair: Fix ftq noise bench regression
@ 2017-03-17 13:47 Vincent Guittot
  2017-03-21 17:46 ` Dietmar Eggemann
  2017-03-23  9:12 ` [tip:sched/core] sched/fair: Fix FTQ " tip-bot for Vincent Guittot
  0 siblings, 2 replies; 7+ messages in thread
From: Vincent Guittot @ 2017-03-17 13:47 UTC (permalink / raw)
  To: peterz, mingo, linux-kernel, ying.huang; +Cc: dietmar.eggemann, Vincent Guittot

A regression of the ftq noise has been reported by ying.huang@linux.intel.com
on 8 threads Intel(R) Core(TM)i7-4770 CPU @ 3.40GHz with 8G memory due to 

  commit 4e5160766fcc ("sched/fair: Propagate asynchrous detach")

The only part of the patch that can increase the noise is the update
of blocked load of group entity in update_blocked_averages().
We can optimize this call and skip the update of group entity if its load
and utilization are already null and there is no pending propagation of load
in the task group.

This optimization partly restores the noise score. A more agressive
optimization has been tried but has shown worse score.

Reported-by: ying.huang@linux.intel.com
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Fixes: 4e5160766fcc ("sched/fair: Propagate asynchrous detach")

---
 kernel/sched/fair.c | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2805bd7..007df59 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3173,6 +3173,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
 	return 1;
 }
 
+/*
+ * Check if we need to update the load and the utilization of a blocked
+ * group_entity
+ */
+static inline bool skip_blocked_update(struct sched_entity *se)
+{
+	struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+
+	/*
+	 * If sched_entity still have not null load or utilization, we have to
+	 * decay it.
+	 */
+	if (se->avg.load_avg || se->avg.util_avg)
+		return false;
+
+	/*
+	 * If there is a pending propagation, we have to update the load and
+	 * the utilizaion of the sched_entity
+	 */
+	if (gcfs_rq->propagate_avg)
+		return false;
+
+	/*
+	 * Other wise, the load and the utilization of the sched_entity is
+	 * already null and there is no pending propagation so it will be a
+	 * waste of time to try to decay it.
+	 */
+	return true;
+}
+
 #else /* CONFIG_FAIR_GROUP_SCHED */
 
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
@@ -6961,6 +6991,8 @@ static void update_blocked_averages(int cpu)
 	 * list_add_leaf_cfs_rq() for details.
 	 */
 	for_each_leaf_cfs_rq(rq, cfs_rq) {
+		struct sched_entity *se;
+
 		/* throttled entities do not contribute to load */
 		if (throttled_hierarchy(cfs_rq))
 			continue;
@@ -6968,9 +7000,10 @@ static void update_blocked_averages(int cpu)
 		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
 			update_tg_load_avg(cfs_rq, 0);
 
-		/* Propagate pending load changes to the parent */
-		if (cfs_rq->tg->se[cpu])
-			update_load_avg(cfs_rq->tg->se[cpu], 0);
+		/* Propagate pending load changes to the parent if any */
+		se = cfs_rq->tg->se[cpu];
+		if (se && !skip_blocked_update(se))
+			update_load_avg(se, 0);
 	}
 	rq_unlock_irqrestore(rq, &rf);
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched/fair: Fix ftq noise bench regression
  2017-03-17 13:47 [PATCH] sched/fair: Fix ftq noise bench regression Vincent Guittot
@ 2017-03-21 17:46 ` Dietmar Eggemann
  2017-03-22  9:22   ` Vincent Guittot
  2017-03-23  9:12 ` [tip:sched/core] sched/fair: Fix FTQ " tip-bot for Vincent Guittot
  1 sibling, 1 reply; 7+ messages in thread
From: Dietmar Eggemann @ 2017-03-21 17:46 UTC (permalink / raw)
  To: Vincent Guittot, peterz, mingo, linux-kernel, ying.huang

Hi Vincent,

On 17/03/17 13:47, Vincent Guittot wrote:

[...]

> Reported-by: ying.huang@linux.intel.com
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> Fixes: 4e5160766fcc ("sched/fair: Propagate asynchrous detach")

I thought I can see a difference by running:

 perf stat --null --repeat 10 -- perf bench sched messaging -g 50 -l 
 5000 

on an Ubuntu 16.10 server system.

Number of entries in the rq->leaf_cfs_rq_list per cpu: ~40

Target: Intel i5-3320M (4 logical cpus)

tip/sched/core: 42.119140365 seconds time elapsed ( +-  0.33% )

+ patch       : 42.089557108 seconds time elapsed ( +-  0.37% )

Maybe I need a CPU with more 'logical cpus' or a different test case.
Anyway, couldn't spot any regression.

> ---
>  kernel/sched/fair.c | 39 ++++++++++++++++++++++++++++++++++++---
>  1 file changed, 36 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2805bd7..007df59 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3173,6 +3173,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
>  	return 1;
>  }
>  
> +/*
> + * Check if we need to update the load and the utilization of a blocked
> + * group_entity
> + */
> +static inline bool skip_blocked_update(struct sched_entity *se)
> +{
> +	struct cfs_rq *gcfs_rq = group_cfs_rq(se);
> +
> +	/*
> +	 * If sched_entity still have not null load or utilization, we have to
> +	 * decay it.
> +	 */
> +	if (se->avg.load_avg || se->avg.util_avg)
> +		return false;
> +
> +	/*
> +	 * If there is a pending propagation, we have to update the load and
> +	 * the utilizaion of the sched_entity

nit pick: s/utilizaion/utilization

> +	 */
> +	if (gcfs_rq->propagate_avg)
> +		return false;
> +
> +	/*
> +	 * Other wise, the load and the utilization of the sched_entity is
> +	 * already null and there is no pending propagation so it will be a
> +	 * waste of time to try to decay it.
> +	 */
> +	return true;
> +}
> +
>  #else /* CONFIG_FAIR_GROUP_SCHED */
>  
>  static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
> @@ -6961,6 +6991,8 @@ static void update_blocked_averages(int cpu)
>  	 * list_add_leaf_cfs_rq() for details.
>  	 */
>  	for_each_leaf_cfs_rq(rq, cfs_rq) {
> +		struct sched_entity *se;
> +
>  		/* throttled entities do not contribute to load */
>  		if (throttled_hierarchy(cfs_rq))
>  			continue;
> @@ -6968,9 +7000,10 @@ static void update_blocked_averages(int cpu)
>  		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
>  			update_tg_load_avg(cfs_rq, 0);
>  
> -		/* Propagate pending load changes to the parent */
> -		if (cfs_rq->tg->se[cpu])
> -			update_load_avg(cfs_rq->tg->se[cpu], 0);
> +		/* Propagate pending load changes to the parent if any */
> +		se = cfs_rq->tg->se[cpu];
> +		if (se && !skip_blocked_update(se))
> +			update_load_avg(se, 0);
>  	}
>  	rq_unlock_irqrestore(rq, &rf);
>  }
> 

Why not turn skip_blocked_update(se) into blocked_update_needed(cpu, cfs_rq)?
Saves a couple of patch lines:

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 007df5953d1a..8001eeb4ec18 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3177,30 +3177,34 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
  * Check if we need to update the load and the utilization of a blocked
  * group_entity
  */
-static inline bool skip_blocked_update(struct sched_entity *se)
+static inline bool blocked_update_needed(int cpu, struct cfs_rq *cfs_rq)
 {
-       struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+       struct sched_entity *se = cfs_rq->tg->se[cpu];
+
+       /* cfs_rq of a root task_group has no sched_entity counterpart */
+       if (!se)
+               return false;
 
        /*
         * If sched_entity still have not null load or utilization, we have to
         * decay it.
         */
        if (se->avg.load_avg || se->avg.util_avg)
-               return false;
+               return true;
 
        /*
         * If there is a pending propagation, we have to update the load and
         * the utilizaion of the sched_entity
         */
-       if (gcfs_rq->propagate_avg)
-               return false;
+       if (cfs_rq->propagate_avg)
+               return true;
 
        /*
         * Other wise, the load and the utilization of the sched_entity is
         * already null and there is no pending propagation so it will be a
         * waste of time to try to decay it.
         */
-       return true;
+       return false;
 }
 
 #else /* CONFIG_FAIR_GROUP_SCHED */
@@ -6991,8 +6995,6 @@ static void update_blocked_averages(int cpu)
         * list_add_leaf_cfs_rq() for details.
         */
        for_each_leaf_cfs_rq(rq, cfs_rq) {
-               struct sched_entity *se;
-
                /* throttled entities do not contribute to load */
                if (throttled_hierarchy(cfs_rq))
                        continue;
@@ -7001,9 +7003,8 @@ static void update_blocked_averages(int cpu)
                        update_tg_load_avg(cfs_rq, 0);
 
                /* Propagate pending load changes to the parent if any */
-               se = cfs_rq->tg->se[cpu];
-               if (se && !skip_blocked_update(se))
-                       update_load_avg(se, 0);
+               if (blocked_update_needed(cpu, cfs_rq))
+                       update_load_avg(cfs_rq->tg->se[cpu], 0);
        }
        rq_unlock_irqrestore(rq, &rf);
 }

  

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched/fair: Fix ftq noise bench regression
  2017-03-21 17:46 ` Dietmar Eggemann
@ 2017-03-22  9:22   ` Vincent Guittot
  2017-03-22 16:22     ` Dietmar Eggemann
  0 siblings, 1 reply; 7+ messages in thread
From: Vincent Guittot @ 2017-03-22  9:22 UTC (permalink / raw)
  To: Dietmar Eggemann; +Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, Huang, Ying

On 21 March 2017 at 18:46, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
> Hi Vincent,
>
> On 17/03/17 13:47, Vincent Guittot wrote:
>
> [...]
>
>> Reported-by: ying.huang@linux.intel.com
>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
>> Fixes: 4e5160766fcc ("sched/fair: Propagate asynchrous detach")
>
> I thought I can see a difference by running:
>
>  perf stat --null --repeat 10 -- perf bench sched messaging -g 50 -l
>  5000
>
> on an Ubuntu 16.10 server system.
>
> Number of entries in the rq->leaf_cfs_rq_list per cpu: ~40
>
> Target: Intel i5-3320M (4 logical cpus)
>
> tip/sched/core: 42.119140365 seconds time elapsed ( +-  0.33% )
>
> + patch       : 42.089557108 seconds time elapsed ( +-  0.37% )
>
> Maybe I need a CPU with more 'logical cpus' or a different test case.
> Anyway, couldn't spot any regression.

ok. I haven't been able to reproduce the regression on my platforms too

>
>> ---
>>  kernel/sched/fair.c | 39 ++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 36 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 2805bd7..007df59 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -3173,6 +3173,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
>>       return 1;
>>  }
>>
>> +/*
>> + * Check if we need to update the load and the utilization of a blocked
>> + * group_entity
>> + */
>> +static inline bool skip_blocked_update(struct sched_entity *se)
>> +{
>> +     struct cfs_rq *gcfs_rq = group_cfs_rq(se);
>> +
>> +     /*
>> +      * If sched_entity still have not null load or utilization, we have to
>> +      * decay it.
>> +      */
>> +     if (se->avg.load_avg || se->avg.util_avg)
>> +             return false;
>> +
>> +     /*
>> +      * If there is a pending propagation, we have to update the load and
>> +      * the utilizaion of the sched_entity
>
> nit pick: s/utilizaion/utilization

yes


>
>> +      */
>> +     if (gcfs_rq->propagate_avg)
>> +             return false;
>> +
>> +     /*
>> +      * Other wise, the load and the utilization of the sched_entity is
>> +      * already null and there is no pending propagation so it will be a
>> +      * waste of time to try to decay it.
>> +      */
>> +     return true;
>> +}
>> +
>>  #else /* CONFIG_FAIR_GROUP_SCHED */
>>
>>  static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
>> @@ -6961,6 +6991,8 @@ static void update_blocked_averages(int cpu)
>>        * list_add_leaf_cfs_rq() for details.
>>        */
>>       for_each_leaf_cfs_rq(rq, cfs_rq) {
>> +             struct sched_entity *se;
>> +
>>               /* throttled entities do not contribute to load */
>>               if (throttled_hierarchy(cfs_rq))
>>                       continue;
>> @@ -6968,9 +7000,10 @@ static void update_blocked_averages(int cpu)
>>               if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
>>                       update_tg_load_avg(cfs_rq, 0);
>>
>> -             /* Propagate pending load changes to the parent */
>> -             if (cfs_rq->tg->se[cpu])
>> -                     update_load_avg(cfs_rq->tg->se[cpu], 0);
>> +             /* Propagate pending load changes to the parent if any */
>> +             se = cfs_rq->tg->se[cpu];
>> +             if (se && !skip_blocked_update(se))
>> +                     update_load_avg(se, 0);
>>       }
>>       rq_unlock_irqrestore(rq, &rf);
>>  }
>>
>
> Why not turn skip_blocked_update(se) into blocked_update_needed(cpu, cfs_rq)?
> Saves a couple of patch lines:

Are you sure that we are saving some patch lines ?

I tend to agree on the name and but not on parameters.
IMO, it's easier to understand the purpose of
blocked_update_needed(se) compared to blocked_update_needed(cpu,
cfs_rq)


>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 007df5953d1a..8001eeb4ec18 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3177,30 +3177,34 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
>   * Check if we need to update the load and the utilization of a blocked
>   * group_entity
>   */
> -static inline bool skip_blocked_update(struct sched_entity *se)
> +static inline bool blocked_update_needed(int cpu, struct cfs_rq *cfs_rq)
>  {
> -       struct cfs_rq *gcfs_rq = group_cfs_rq(se);
> +       struct sched_entity *se = cfs_rq->tg->se[cpu];
> +
> +       /* cfs_rq of a root task_group has no sched_entity counterpart */
> +       if (!se)
> +               return false;
>
>         /*
>          * If sched_entity still have not null load or utilization, we have to
>          * decay it.
>          */
>         if (se->avg.load_avg || se->avg.util_avg)
> -               return false;
> +               return true;
>
>         /*
>          * If there is a pending propagation, we have to update the load and
>          * the utilizaion of the sched_entity
>          */
> -       if (gcfs_rq->propagate_avg)
> -               return false;
> +       if (cfs_rq->propagate_avg)
> +               return true;
>
>         /*
>          * Other wise, the load and the utilization of the sched_entity is
>          * already null and there is no pending propagation so it will be a
>          * waste of time to try to decay it.
>          */
> -       return true;
> +       return false;
>  }
>
>  #else /* CONFIG_FAIR_GROUP_SCHED */
> @@ -6991,8 +6995,6 @@ static void update_blocked_averages(int cpu)
>          * list_add_leaf_cfs_rq() for details.
>          */
>         for_each_leaf_cfs_rq(rq, cfs_rq) {
> -               struct sched_entity *se;
> -
>                 /* throttled entities do not contribute to load */
>                 if (throttled_hierarchy(cfs_rq))
>                         continue;
> @@ -7001,9 +7003,8 @@ static void update_blocked_averages(int cpu)
>                         update_tg_load_avg(cfs_rq, 0);
>
>                 /* Propagate pending load changes to the parent if any */
> -               se = cfs_rq->tg->se[cpu];
> -               if (se && !skip_blocked_update(se))
> -                       update_load_avg(se, 0);
> +               if (blocked_update_needed(cpu, cfs_rq))
> +                       update_load_avg(cfs_rq->tg->se[cpu], 0);
>         }
>         rq_unlock_irqrestore(rq, &rf);
>  }
>
>
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched/fair: Fix ftq noise bench regression
  2017-03-22  9:22   ` Vincent Guittot
@ 2017-03-22 16:22     ` Dietmar Eggemann
  2017-03-22 16:55       ` Vincent Guittot
  0 siblings, 1 reply; 7+ messages in thread
From: Dietmar Eggemann @ 2017-03-22 16:22 UTC (permalink / raw)
  To: Vincent Guittot; +Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, Huang, Ying

On 22/03/17 09:22, Vincent Guittot wrote:
> On 21 March 2017 at 18:46, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
>> Hi Vincent,
>>
>> On 17/03/17 13:47, Vincent Guittot wrote:
>>
>> [...]
>>
>>> Reported-by: ying.huang@linux.intel.com
>>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
>>> Fixes: 4e5160766fcc ("sched/fair: Propagate asynchrous detach")

[...]

>> Why not turn skip_blocked_update(se) into blocked_update_needed(cpu, cfs_rq)?
>> Saves a couple of patch lines:
> 
> Are you sure that we are saving some patch lines ?

Sorry, it's actually the same :-)

> 
> I tend to agree on the name and but not on parameters.
> IMO, it's easier to understand the purpose of
> blocked_update_needed(se) compared to blocked_update_needed(cpu,
> cfs_rq)

OK, so:

-               /* Propagate pending load changes to the parent */
-               if (cfs_rq->tg->se[cpu])
+               /* Propagate pending load changes to the parent if any */
+               if (blocked_update_needed(cfs_rq->tg->se[cpu]))

and

+static inline bool blocked_update_needed(struct sched_entity *se)
+{
+       struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+
+       /* cfs_rq of a root task_group has no sched_entity counterpart */
+       if (!se)
+               return false;
+
+       /*
+        * If sched_entity still have not null load or utilization, we
have to
+        * decay it.
+        */
....

Would make sense to me ...

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched/fair: Fix ftq noise bench regression
  2017-03-22 16:22     ` Dietmar Eggemann
@ 2017-03-22 16:55       ` Vincent Guittot
  2017-03-22 17:22         ` Dietmar Eggemann
  0 siblings, 1 reply; 7+ messages in thread
From: Vincent Guittot @ 2017-03-22 16:55 UTC (permalink / raw)
  To: Dietmar Eggemann; +Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, Huang, Ying

On 22 March 2017 at 17:22, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
> On 22/03/17 09:22, Vincent Guittot wrote:
>> On 21 March 2017 at 18:46, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
>>> Hi Vincent,
>>>
>>> On 17/03/17 13:47, Vincent Guittot wrote:
>>>
>>> [...]
>>>
>>>> Reported-by: ying.huang@linux.intel.com
>>>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
>>>> Fixes: 4e5160766fcc ("sched/fair: Propagate asynchrous detach")
>
> [...]
>
>>> Why not turn skip_blocked_update(se) into blocked_update_needed(cpu, cfs_rq)?
>>> Saves a couple of patch lines:
>>
>> Are you sure that we are saving some patch lines ?
>
> Sorry, it's actually the same :-)
>
>>
>> I tend to agree on the name and but not on parameters.
>> IMO, it's easier to understand the purpose of
>> blocked_update_needed(se) compared to blocked_update_needed(cpu,
>> cfs_rq)
>
> OK, so:
>
> -               /* Propagate pending load changes to the parent */
> -               if (cfs_rq->tg->se[cpu])
> +               /* Propagate pending load changes to the parent if any */
> +               if (blocked_update_needed(cfs_rq->tg->se[cpu]))
>
> and
>
> +static inline bool blocked_update_needed(struct sched_entity *se)
> +{
> +       struct cfs_rq *gcfs_rq = group_cfs_rq(se);

gcfs_rq can't be set before testing that se is not null

> +
> +       /* cfs_rq of a root task_group has no sched_entity counterpart */
> +       if (!se)
> +               return false;
> +
> +       /*
> +        * If sched_entity still have not null load or utilization, we
> have to
> +        * decay it.
> +        */
> ....
>
> Would make sense to me ...

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched/fair: Fix ftq noise bench regression
  2017-03-22 16:55       ` Vincent Guittot
@ 2017-03-22 17:22         ` Dietmar Eggemann
  0 siblings, 0 replies; 7+ messages in thread
From: Dietmar Eggemann @ 2017-03-22 17:22 UTC (permalink / raw)
  To: Vincent Guittot; +Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, Huang, Ying

On 22/03/17 16:55, Vincent Guittot wrote:
> On 22 March 2017 at 17:22, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
>> On 22/03/17 09:22, Vincent Guittot wrote:
>>> On 21 March 2017 at 18:46, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
>>>> Hi Vincent,
>>>>
>>>> On 17/03/17 13:47, Vincent Guittot wrote:

[...]

>> +static inline bool blocked_update_needed(struct sched_entity *se)
>> +{
>> +       struct cfs_rq *gcfs_rq = group_cfs_rq(se);
> 
> gcfs_rq can't be set before testing that se is not null

Ah correct, but you could set it after the !se check.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tip:sched/core] sched/fair: Fix FTQ noise bench regression
  2017-03-17 13:47 [PATCH] sched/fair: Fix ftq noise bench regression Vincent Guittot
  2017-03-21 17:46 ` Dietmar Eggemann
@ 2017-03-23  9:12 ` tip-bot for Vincent Guittot
  1 sibling, 0 replies; 7+ messages in thread
From: tip-bot for Vincent Guittot @ 2017-03-23  9:12 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: efault, hpa, peterz, vincent.guittot, linux-kernel, tglx, mingo,
	torvalds

Commit-ID:  bc4278987e3874da62edf585fe8b3bdd9b53f638
Gitweb:     http://git.kernel.org/tip/bc4278987e3874da62edf585fe8b3bdd9b53f638
Author:     Vincent Guittot <vincent.guittot@linaro.org>
AuthorDate: Fri, 17 Mar 2017 14:47:22 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 23 Mar 2017 07:44:51 +0100

sched/fair: Fix FTQ noise bench regression

A regression of the FTQ noise has been reported by Ying Huang,
on the following hardware:

  8 threads Intel(R) Core(TM)i7-4770 CPU @ 3.40GHz with 8G memory

... which was caused by this commit:

  commit 4e5160766fcc ("sched/fair: Propagate asynchrous detach")

The only part of the patch that can increase the noise is the update
of blocked load of group entity in update_blocked_averages().

We can optimize this call and skip the update of group entity if its load
and utilization are already null and there is no pending propagation of load
in the task group.

This optimization partly restores the noise score. A more agressive
optimization has been tried but has shown worse score.

Reported-by: ying.huang@linux.intel.com
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dietmar.eggemann@arm.com
Cc: ying.huang@intel.com
Fixes: 4e5160766fcc ("sched/fair: Propagate asynchrous detach")
Link: http://lkml.kernel.org/r/1489758442-2877-1-git-send-email-vincent.guittot@linaro.org
[ Fixed typos, improved layout. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2805bd7..03adf9f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3173,6 +3173,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
 	return 1;
 }
 
+/*
+ * Check if we need to update the load and the utilization of a blocked
+ * group_entity:
+ */
+static inline bool skip_blocked_update(struct sched_entity *se)
+{
+	struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+
+	/*
+	 * If sched_entity still have not zero load or utilization, we have to
+	 * decay it:
+	 */
+	if (se->avg.load_avg || se->avg.util_avg)
+		return false;
+
+	/*
+	 * If there is a pending propagation, we have to update the load and
+	 * the utilization of the sched_entity:
+	 */
+	if (gcfs_rq->propagate_avg)
+		return false;
+
+	/*
+	 * Otherwise, the load and the utilization of the sched_entity is
+	 * already zero and there is no pending propagation, so it will be a
+	 * waste of time to try to decay it:
+	 */
+	return true;
+}
+
 #else /* CONFIG_FAIR_GROUP_SCHED */
 
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
@@ -6961,6 +6991,8 @@ static void update_blocked_averages(int cpu)
 	 * list_add_leaf_cfs_rq() for details.
 	 */
 	for_each_leaf_cfs_rq(rq, cfs_rq) {
+		struct sched_entity *se;
+
 		/* throttled entities do not contribute to load */
 		if (throttled_hierarchy(cfs_rq))
 			continue;
@@ -6968,9 +7000,10 @@ static void update_blocked_averages(int cpu)
 		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
 			update_tg_load_avg(cfs_rq, 0);
 
-		/* Propagate pending load changes to the parent */
-		if (cfs_rq->tg->se[cpu])
-			update_load_avg(cfs_rq->tg->se[cpu], 0);
+		/* Propagate pending load changes to the parent, if any: */
+		se = cfs_rq->tg->se[cpu];
+		if (se && !skip_blocked_update(se))
+			update_load_avg(se, 0);
 	}
 	rq_unlock_irqrestore(rq, &rf);
 }

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-03-23 10:01 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-17 13:47 [PATCH] sched/fair: Fix ftq noise bench regression Vincent Guittot
2017-03-21 17:46 ` Dietmar Eggemann
2017-03-22  9:22   ` Vincent Guittot
2017-03-22 16:22     ` Dietmar Eggemann
2017-03-22 16:55       ` Vincent Guittot
2017-03-22 17:22         ` Dietmar Eggemann
2017-03-23  9:12 ` [tip:sched/core] sched/fair: Fix FTQ " tip-bot for Vincent Guittot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.