Re: [PATCH v2] sched/freq: move call to cpufreq_update

linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* Re: [PATCH v2] sched/freq: move call to cpufreq_update_util
       [not found] <1573570093-1340-1-git-send-email-vincent.guittot@linaro.org>
@ 2019-11-12 15:05 ` Vincent Guittot
  2019-11-12 17:11   ` Valentin Schneider
  2019-11-13 10:49   ` Dietmar Eggemann
  0 siblings, 2 replies; 9+ messages in thread
From: Vincent Guittot @ 2019-11-12 15:05 UTC (permalink / raw)
  To: linux-kernel, mingo, peterz, dietmar.eggemann, juri.lelli,
	rostedt, mgorman, dsmythies
  Cc: linux-pm, torvalds, tglx, sargun, tj, xiexiuqi, xiezhipeng1,
	srinivas.pandruvada

Le Tuesday 12 Nov 2019 à 15:48:13 (+0100), Vincent Guittot a écrit :
> update_cfs_rq_load_avg() calls cfs_rq_util_change() everytime pelt decays,
> which might be inefficient when cpufreq driver has rate limitation.
> 
> When a task is attached on a CPU, we have call path:
> 
> update_blocked_averages()
>   update_cfs_rq_load_avg()
>     cfs_rq_util_change -- > trig frequency update
>   attach_entity_load_avg()
>     cfs_rq_util_change -- > trig frequency update
> 
> The 1st frequency update will not take into account the utilization of the
> newly attached task and the 2nd one might be discard because of rate
> limitation of the cpufreq driver.
> 
> update_cfs_rq_load_avg() is only called by update_blocked_averages()
> and update_load_avg() so we can move the call to
> {cfs_rq,cpufreq}_util_change() into these 2 functions. It's also
> interesting to notice that update_load_avg() already calls directly
> cfs_rq_util_change() for !SMP case.
> 
> This changes will also ensure that cpufreq_update_util() is called even
> when there is no more CFS rq in the leaf_cfs_rq_list to update but only
> irq, rt or dl pelt signals.
> 
> Reported-by: Doug Smythies <dsmythies@telus.net>
> Fixes: 039ae8bcf7a5 ("sched/fair: Fix O(nr_cgroups) in the load balancing path")
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> 
> ---
> 
> I have just rebased the patch on latest tip/sched/core and made it a proper
> patchset after Doug reported that the problem has diseappeared according to
> his 1st results but tests results are not all based on the same v5.4-rcX
> and with menu instead of teo governor.
> 
>  kernel/sched/fair.c | 33 +++++++++++++++++++++------------
>  1 file changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index e458f52..c93d534 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3508,9 +3508,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
>  	cfs_rq->load_last_update_time_copy = sa->last_update_time;
>  #endif
>  
> -	if (decayed)
> -		cfs_rq_util_change(cfs_rq, 0);
> -
>  	return decayed;
>  }
>  
> @@ -3620,8 +3617,12 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
>  		attach_entity_load_avg(cfs_rq, se, SCHED_CPUFREQ_MIGRATION);
>  		update_tg_load_avg(cfs_rq, 0);
>  
> -	} else if (decayed && (flags & UPDATE_TG))
> -		update_tg_load_avg(cfs_rq, 0);
> +	} else if (decayed) {
> +		cfs_rq_util_change(cfs_rq, 0);
> +
> +		if (flags & UPDATE_TG)
> +			update_tg_load_avg(cfs_rq, 0);
> +	}
>  }
>  
>  #ifndef CONFIG_64BIT
> @@ -7484,6 +7485,7 @@ static void update_blocked_averages(int cpu)
>  	const struct sched_class *curr_class;
>  	struct rq_flags rf;
>  	bool done = true;
> +	int decayed = 0;
>  
>  	rq_lock_irqsave(rq, &rf);
>  	update_rq_clock(rq);
> @@ -7493,9 +7495,9 @@ static void update_blocked_averages(int cpu)
>  	 * that RT, DL and IRQ signals have been updated before updating CFS.
>  	 */
>  	curr_class = rq->curr->sched_class;
> -	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> -	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> -	update_irq_load_avg(rq, 0);
> +	decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> +	decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> +	decayed |= update_irq_load_avg(rq, 0);
>  
>  	/* Don't need periodic decay once load/util_avg are null */
>  	if (others_have_blocked(rq))
> @@ -7529,6 +7531,9 @@ static void update_blocked_averages(int cpu)
>  	}
>  
>  	update_blocked_load_status(rq, !done);
> +
> +	if (decayed)
> +		cpufreq_update_util(rq, 0);
>  	rq_unlock_irqrestore(rq, &rf);
>  }
>  
> @@ -7585,6 +7590,7 @@ static inline void update_blocked_averages(int cpu)
>  	struct cfs_rq *cfs_rq = &rq->cfs;
>  	const struct sched_class *curr_class;
>  	struct rq_flags rf;
> +	int decayed = 0;
>  
>  	rq_lock_irqsave(rq, &rf);
>  	update_rq_clock(rq);
> @@ -7594,13 +7600,16 @@ static inline void update_blocked_averages(int cpu)
>  	 * that RT, DL and IRQ signals have been updated before updating CFS.
>  	 */
>  	curr_class = rq->curr->sched_class;
> -	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> -	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> -	update_irq_load_avg(rq, 0);
> +	decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> +	decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> +	decayed |= update_irq_load_avg(rq, 0);
>  
> -	update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
> +	decayed |= update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
>  
>  	update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
> +
> +	if (decayed)
> +		cpufreq_update_util(rq, 0);
>  	rq_unlock_irqrestore(rq, &rf);
>  }
>  
> -- 
> 2.7.4
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] sched/freq: move call to cpufreq_update_util
  2019-11-12 15:05 ` [PATCH v2] sched/freq: move call to cpufreq_update_util Vincent Guittot
@ 2019-11-12 17:11   ` Valentin Schneider
  2019-11-12 17:20     ` Vincent Guittot
  2019-11-13 10:49   ` Dietmar Eggemann
  1 sibling, 1 reply; 9+ messages in thread
From: Valentin Schneider @ 2019-11-12 17:11 UTC (permalink / raw)
  To: Vincent Guittot, linux-kernel, mingo, peterz, dietmar.eggemann,
	juri.lelli, rostedt, mgorman, dsmythies
  Cc: linux-pm, torvalds, tglx, sargun, tj, xiexiuqi, xiezhipeng1,
	srinivas.pandruvada

Hi Vincent,

I didn't see anything in that reply, was that just a spurious email?

On 12/11/2019 15:05, Vincent Guittot wrote:
> Le Tuesday 12 Nov 2019 à 15:48:13 (+0100), Vincent Guittot a écrit :
>> update_cfs_rq_load_avg() calls cfs_rq_util_change() everytime pelt decays,
>> which might be inefficient when cpufreq driver has rate limitation.
>>
>> When a task is attached on a CPU, we have call path:
>>
>> update_blocked_averages()
>>   update_cfs_rq_load_avg()
>>     cfs_rq_util_change -- > trig frequency update
>>   attach_entity_load_avg()
>>     cfs_rq_util_change -- > trig frequency update
>>
>> The 1st frequency update will not take into account the utilization of the
>> newly attached task and the 2nd one might be discard because of rate
>> limitation of the cpufreq driver.
>>
>> update_cfs_rq_load_avg() is only called by update_blocked_averages()
>> and update_load_avg() so we can move the call to
>> {cfs_rq,cpufreq}_util_change() into these 2 functions. It's also
>> interesting to notice that update_load_avg() already calls directly
>> cfs_rq_util_change() for !SMP case.
>>
>> This changes will also ensure that cpufreq_update_util() is called even
>> when there is no more CFS rq in the leaf_cfs_rq_list to update but only
>> irq, rt or dl pelt signals.
>>
>> Reported-by: Doug Smythies <dsmythies@telus.net>
>> Fixes: 039ae8bcf7a5 ("sched/fair: Fix O(nr_cgroups) in the load balancing path")
>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
>>
>> ---
>>
>> I have just rebased the patch on latest tip/sched/core and made it a proper
>> patchset after Doug reported that the problem has diseappeared according to
>> his 1st results but tests results are not all based on the same v5.4-rcX
>> and with menu instead of teo governor.
>>
>>  kernel/sched/fair.c | 33 +++++++++++++++++++++------------
>>  1 file changed, 21 insertions(+), 12 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index e458f52..c93d534 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -3508,9 +3508,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
>>  	cfs_rq->load_last_update_time_copy = sa->last_update_time;
>>  #endif
>>  
>> -	if (decayed)
>> -		cfs_rq_util_change(cfs_rq, 0);
>> -
>>  	return decayed;
>>  }
>>  
>> @@ -3620,8 +3617,12 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
>>  		attach_entity_load_avg(cfs_rq, se, SCHED_CPUFREQ_MIGRATION);
>>  		update_tg_load_avg(cfs_rq, 0);
>>  
>> -	} else if (decayed && (flags & UPDATE_TG))
>> -		update_tg_load_avg(cfs_rq, 0);
>> +	} else if (decayed) {
>> +		cfs_rq_util_change(cfs_rq, 0);
>> +
>> +		if (flags & UPDATE_TG)
>> +			update_tg_load_avg(cfs_rq, 0);
>> +	}
>>  }
>>  
>>  #ifndef CONFIG_64BIT
>> @@ -7484,6 +7485,7 @@ static void update_blocked_averages(int cpu)
>>  	const struct sched_class *curr_class;
>>  	struct rq_flags rf;
>>  	bool done = true;
>> +	int decayed = 0;
>>  
>>  	rq_lock_irqsave(rq, &rf);
>>  	update_rq_clock(rq);
>> @@ -7493,9 +7495,9 @@ static void update_blocked_averages(int cpu)
>>  	 * that RT, DL and IRQ signals have been updated before updating CFS.
>>  	 */
>>  	curr_class = rq->curr->sched_class;
>> -	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
>> -	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
>> -	update_irq_load_avg(rq, 0);
>> +	decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
>> +	decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
>> +	decayed |= update_irq_load_avg(rq, 0);
>>  
>>  	/* Don't need periodic decay once load/util_avg are null */
>>  	if (others_have_blocked(rq))
>> @@ -7529,6 +7531,9 @@ static void update_blocked_averages(int cpu)
>>  	}
>>  
>>  	update_blocked_load_status(rq, !done);
>> +
>> +	if (decayed)
>> +		cpufreq_update_util(rq, 0);
>>  	rq_unlock_irqrestore(rq, &rf);
>>  }
>>  
>> @@ -7585,6 +7590,7 @@ static inline void update_blocked_averages(int cpu)
>>  	struct cfs_rq *cfs_rq = &rq->cfs;
>>  	const struct sched_class *curr_class;
>>  	struct rq_flags rf;
>> +	int decayed = 0;
>>  
>>  	rq_lock_irqsave(rq, &rf);
>>  	update_rq_clock(rq);
>> @@ -7594,13 +7600,16 @@ static inline void update_blocked_averages(int cpu)
>>  	 * that RT, DL and IRQ signals have been updated before updating CFS.
>>  	 */
>>  	curr_class = rq->curr->sched_class;
>> -	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
>> -	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
>> -	update_irq_load_avg(rq, 0);
>> +	decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
>> +	decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
>> +	decayed |= update_irq_load_avg(rq, 0);
>>  
>> -	update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
>> +	decayed |= update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
>>  
>>  	update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
>> +
>> +	if (decayed)
>> +		cpufreq_update_util(rq, 0);
>>  	rq_unlock_irqrestore(rq, &rf);
>>  }
>>  
>> -- 
>> 2.7.4
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] sched/freq: move call to cpufreq_update_util
  2019-11-12 17:11   ` Valentin Schneider
@ 2019-11-12 17:20     ` Vincent Guittot
  2019-11-12 17:44       ` Valentin Schneider
  0 siblings, 1 reply; 9+ messages in thread
From: Vincent Guittot @ 2019-11-12 17:20 UTC (permalink / raw)
  To: Valentin Schneider
  Cc: linux-kernel, Ingo Molnar, Peter Zijlstra, Dietmar Eggemann,
	Juri Lelli, Steven Rostedt, Mel Gorman, Doug Smythies,
	open list:THERMAL, Linus Torvalds, Thomas Gleixner,
	Sargun Dhillon, Tejun Heo, Xie XiuQi, xiezhipeng1,
	Srinivas Pandruvada

On Tue, 12 Nov 2019 at 18:11, Valentin Schneider
<valentin.schneider@arm.com> wrote:
>
> Hi Vincent,
>
> I didn't see anything in that reply, was that just a spurious email?

there were a typo in one email address in the 1st message

>
> On 12/11/2019 15:05, Vincent Guittot wrote:
> > Le Tuesday 12 Nov 2019 à 15:48:13 (+0100), Vincent Guittot a écrit :
> >> update_cfs_rq_load_avg() calls cfs_rq_util_change() everytime pelt decays,
> >> which might be inefficient when cpufreq driver has rate limitation.
> >>
> >> When a task is attached on a CPU, we have call path:
> >>
> >> update_blocked_averages()
> >>   update_cfs_rq_load_avg()
> >>     cfs_rq_util_change -- > trig frequency update
> >>   attach_entity_load_avg()
> >>     cfs_rq_util_change -- > trig frequency update
> >>
> >> The 1st frequency update will not take into account the utilization of the
> >> newly attached task and the 2nd one might be discard because of rate
> >> limitation of the cpufreq driver.
> >>
> >> update_cfs_rq_load_avg() is only called by update_blocked_averages()
> >> and update_load_avg() so we can move the call to
> >> {cfs_rq,cpufreq}_util_change() into these 2 functions. It's also
> >> interesting to notice that update_load_avg() already calls directly
> >> cfs_rq_util_change() for !SMP case.
> >>
> >> This changes will also ensure that cpufreq_update_util() is called even
> >> when there is no more CFS rq in the leaf_cfs_rq_list to update but only
> >> irq, rt or dl pelt signals.
> >>
> >> Reported-by: Doug Smythies <dsmythies@telus.net>
> >> Fixes: 039ae8bcf7a5 ("sched/fair: Fix O(nr_cgroups) in the load balancing path")
> >> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> >>
> >> ---
> >>
> >> I have just rebased the patch on latest tip/sched/core and made it a proper
> >> patchset after Doug reported that the problem has diseappeared according to
> >> his 1st results but tests results are not all based on the same v5.4-rcX
> >> and with menu instead of teo governor.
> >>
> >>  kernel/sched/fair.c | 33 +++++++++++++++++++++------------
> >>  1 file changed, 21 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> >> index e458f52..c93d534 100644
> >> --- a/kernel/sched/fair.c
> >> +++ b/kernel/sched/fair.c
> >> @@ -3508,9 +3508,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
> >>      cfs_rq->load_last_update_time_copy = sa->last_update_time;
> >>  #endif
> >>
> >> -    if (decayed)
> >> -            cfs_rq_util_change(cfs_rq, 0);
> >> -
> >>      return decayed;
> >>  }
> >>
> >> @@ -3620,8 +3617,12 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
> >>              attach_entity_load_avg(cfs_rq, se, SCHED_CPUFREQ_MIGRATION);
> >>              update_tg_load_avg(cfs_rq, 0);
> >>
> >> -    } else if (decayed && (flags & UPDATE_TG))
> >> -            update_tg_load_avg(cfs_rq, 0);
> >> +    } else if (decayed) {
> >> +            cfs_rq_util_change(cfs_rq, 0);
> >> +
> >> +            if (flags & UPDATE_TG)
> >> +                    update_tg_load_avg(cfs_rq, 0);
> >> +    }
> >>  }
> >>
> >>  #ifndef CONFIG_64BIT
> >> @@ -7484,6 +7485,7 @@ static void update_blocked_averages(int cpu)
> >>      const struct sched_class *curr_class;
> >>      struct rq_flags rf;
> >>      bool done = true;
> >> +    int decayed = 0;
> >>
> >>      rq_lock_irqsave(rq, &rf);
> >>      update_rq_clock(rq);
> >> @@ -7493,9 +7495,9 @@ static void update_blocked_averages(int cpu)
> >>       * that RT, DL and IRQ signals have been updated before updating CFS.
> >>       */
> >>      curr_class = rq->curr->sched_class;
> >> -    update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> >> -    update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> >> -    update_irq_load_avg(rq, 0);
> >> +    decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> >> +    decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> >> +    decayed |= update_irq_load_avg(rq, 0);
> >>
> >>      /* Don't need periodic decay once load/util_avg are null */
> >>      if (others_have_blocked(rq))
> >> @@ -7529,6 +7531,9 @@ static void update_blocked_averages(int cpu)
> >>      }
> >>
> >>      update_blocked_load_status(rq, !done);
> >> +
> >> +    if (decayed)
> >> +            cpufreq_update_util(rq, 0);
> >>      rq_unlock_irqrestore(rq, &rf);
> >>  }
> >>
> >> @@ -7585,6 +7590,7 @@ static inline void update_blocked_averages(int cpu)
> >>      struct cfs_rq *cfs_rq = &rq->cfs;
> >>      const struct sched_class *curr_class;
> >>      struct rq_flags rf;
> >> +    int decayed = 0;
> >>
> >>      rq_lock_irqsave(rq, &rf);
> >>      update_rq_clock(rq);
> >> @@ -7594,13 +7600,16 @@ static inline void update_blocked_averages(int cpu)
> >>       * that RT, DL and IRQ signals have been updated before updating CFS.
> >>       */
> >>      curr_class = rq->curr->sched_class;
> >> -    update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> >> -    update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> >> -    update_irq_load_avg(rq, 0);
> >> +    decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> >> +    decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> >> +    decayed |= update_irq_load_avg(rq, 0);
> >>
> >> -    update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
> >> +    decayed |= update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
> >>
> >>      update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
> >> +
> >> +    if (decayed)
> >> +            cpufreq_update_util(rq, 0);
> >>      rq_unlock_irqrestore(rq, &rf);
> >>  }
> >>
> >> --
> >> 2.7.4
> >>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] sched/freq: move call to cpufreq_update_util
  2019-11-12 17:20     ` Vincent Guittot
@ 2019-11-12 17:44       ` Valentin Schneider
  0 siblings, 0 replies; 9+ messages in thread
From: Valentin Schneider @ 2019-11-12 17:44 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: linux-kernel, Ingo Molnar, Peter Zijlstra, Dietmar Eggemann,
	Juri Lelli, Steven Rostedt, Mel Gorman, Doug Smythies,
	open list:THERMAL, Linus Torvalds, Thomas Gleixner,
	Sargun Dhillon, Tejun Heo, Xie XiuQi, xiezhipeng1,
	Srinivas Pandruvada

On 12/11/2019 17:20, Vincent Guittot wrote:
>> I didn't see anything in that reply, was that just a spurious email?
> 
> there were a typo in one email address in the 1st message

Oh right, got it, sorry.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] sched/freq: move call to cpufreq_update_util
  2019-11-12 15:05 ` [PATCH v2] sched/freq: move call to cpufreq_update_util Vincent Guittot
  2019-11-12 17:11   ` Valentin Schneider
@ 2019-11-13 10:49   ` Dietmar Eggemann
  2019-11-13 13:30     ` Vincent Guittot
  1 sibling, 1 reply; 9+ messages in thread
From: Dietmar Eggemann @ 2019-11-13 10:49 UTC (permalink / raw)
  To: Vincent Guittot, linux-kernel, mingo, peterz, juri.lelli,
	rostedt, mgorman, dsmythies
  Cc: linux-pm, torvalds, tglx, sargun, tj, xiexiuqi, xiezhipeng1,
	srinivas.pandruvada

On 12.11.19 16:05, Vincent Guittot wrote:
> Le Tuesday 12 Nov 2019 à 15:48:13 (+0100), Vincent Guittot a écrit :
>> update_cfs_rq_load_avg() calls cfs_rq_util_change() everytime pelt decays,
>> which might be inefficient when cpufreq driver has rate limitation.
>>
>> When a task is attached on a CPU, we have call path:
>>
>> update_blocked_averages()
>>   update_cfs_rq_load_avg()
>>     cfs_rq_util_change -- > trig frequency update
>>   attach_entity_load_avg()
>>     cfs_rq_util_change -- > trig frequency update

This looks like attach_entity_load_avg() is called from
update_blocked_averages(). Do you refer to the attach_entity_load_avg()
call from attach_entity_cfs_rq() or update_load_avg() here? I assume the
former.

>> The 1st frequency update will not take into account the utilization of the
>> newly attached task and the 2nd one might be discard because of rate
>> limitation of the cpufreq driver.
>>
>> update_cfs_rq_load_avg() is only called by update_blocked_averages()
>> and update_load_avg() so we can move the call to
>> {cfs_rq,cpufreq}_util_change() into these 2 functions. It's also

s/cpufreq_util_change()/cpufreq_update_util() ?

[...]

>> I have just rebased the patch on latest tip/sched/core and made it a proper
>> patchset after Doug reported that the problem has diseappeared according to
>> his 1st results but tests results are not all based on the same v5.4-rcX
>> and with menu instead of teo governor.

I had some minor tweaks to do putting this on a0e813f26ebc ("sched/core:
Further clarify sched_class::set_next_task()") ? I saw the '[tip:
sched/urgent] sched/pelt: Fix update of blocked PELT ordering' tip-bot
msg this morning though.

[...]

>> @@ -7493,9 +7495,9 @@ static void update_blocked_averages(int cpu)
>>  	 * that RT, DL and IRQ signals have been updated before updating CFS.
>>  	 */
>>  	curr_class = rq->curr->sched_class;
>> -	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
>> -	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
>> -	update_irq_load_avg(rq, 0);
>> +	decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
>> +	decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
>> +	decayed |= update_irq_load_avg(rq, 0);

Why not 'decayed  = update_cfs_rq_load_avg()' like in the
!CONFIG_FAIR_GROUP_SCHED case?

[...]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] sched/freq: move call to cpufreq_update_util
  2019-11-13 10:49   ` Dietmar Eggemann
@ 2019-11-13 13:30     ` Vincent Guittot
  2019-11-13 14:09       ` Dietmar Eggemann
  0 siblings, 1 reply; 9+ messages in thread
From: Vincent Guittot @ 2019-11-13 13:30 UTC (permalink / raw)
  To: Dietmar Eggemann
  Cc: linux-kernel, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Steven Rostedt, Mel Gorman, Doug Smythies, open list:THERMAL,
	Linus Torvalds, Thomas Gleixner, Sargun Dhillon, Tejun Heo,
	Xie XiuQi, xiezhipeng1, Srinivas Pandruvada

On Wed, 13 Nov 2019 at 11:50, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
>
> On 12.11.19 16:05, Vincent Guittot wrote:
> > Le Tuesday 12 Nov 2019 à 15:48:13 (+0100), Vincent Guittot a écrit :
> >> update_cfs_rq_load_avg() calls cfs_rq_util_change() everytime pelt decays,
> >> which might be inefficient when cpufreq driver has rate limitation.
> >>
> >> When a task is attached on a CPU, we have call path:
> >>
> >> update_blocked_averages()
> >>   update_cfs_rq_load_avg()
> >>     cfs_rq_util_change -- > trig frequency update
> >>   attach_entity_load_avg()
> >>     cfs_rq_util_change -- > trig frequency update
>
> This looks like attach_entity_load_avg() is called from
> update_blocked_averages(). Do you refer to the attach_entity_load_avg()
> call from attach_entity_cfs_rq() or update_load_avg() here? I assume the
> former.

ah... typo mistake, i wanted to write update_load_avg
update_blocked_averages()
  update_cfs_rq_load_avg()
    cfs_rq_util_change -- > trig frequency update
  attach_entity_load_avg()
    cfs_rq_util_change -- > trig frequency update

>
> >> The 1st frequency update will not take into account the utilization of the
> >> newly attached task and the 2nd one might be discard because of rate
> >> limitation of the cpufreq driver.
> >>
> >> update_cfs_rq_load_avg() is only called by update_blocked_averages()
> >> and update_load_avg() so we can move the call to
> >> {cfs_rq,cpufreq}_util_change() into these 2 functions. It's also
>
> s/cpufreq_util_change()/cpufreq_update_util() ?

yes

>
> [...]
>
> >> I have just rebased the patch on latest tip/sched/core and made it a proper
> >> patchset after Doug reported that the problem has diseappeared according to
> >> his 1st results but tests results are not all based on the same v5.4-rcX
> >> and with menu instead of teo governor.
>
> I had some minor tweaks to do putting this on a0e813f26ebc ("sched/core:
> Further clarify sched_class::set_next_task()") ? I saw the '[tip:
> sched/urgent] sched/pelt: Fix update of blocked PELT ordering' tip-bot
> msg this morning though.

yes, a0e813f26ebc was item 1 and this patch is item 2 on top

>
> [...]
>
> >> @@ -7493,9 +7495,9 @@ static void update_blocked_averages(int cpu)
> >>       * that RT, DL and IRQ signals have been updated before updating CFS.
> >>       */
> >>      curr_class = rq->curr->sched_class;
> >> -    update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> >> -    update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> >> -    update_irq_load_avg(rq, 0);
> >> +    decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> >> +    decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> >> +    decayed |= update_irq_load_avg(rq, 0);
>
> Why not 'decayed  = update_cfs_rq_load_avg()' like in the
> !CONFIG_FAIR_GROUP_SCHED case?

Because it is handled by the update_load_avg() in
for_each_leaf_cfs_rq_safe() loop

This means that we can have 2 calls to cpufreq_update_util in
update_blocked_average() but at least the values will be up to date in
both calls unlike previously.

I'm going to prepare an additional patch to remove this useless call.
I have also seen some possible further optimization that i need to
study a bit more before preparing a patch


>
> [...]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] sched/freq: move call to cpufreq_update_util
  2019-11-13 13:30     ` Vincent Guittot
@ 2019-11-13 14:09       ` Dietmar Eggemann
  2019-11-13 17:50         ` Vincent Guittot
  0 siblings, 1 reply; 9+ messages in thread
From: Dietmar Eggemann @ 2019-11-13 14:09 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: linux-kernel, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Steven Rostedt, Mel Gorman, Doug Smythies, open list:THERMAL,
	Linus Torvalds, Thomas Gleixner, Sargun Dhillon, Tejun Heo,
	Xie XiuQi, xiezhipeng1, Srinivas Pandruvada

On 13.11.19 14:30, Vincent Guittot wrote:
> On Wed, 13 Nov 2019 at 11:50, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
>>
>> On 12.11.19 16:05, Vincent Guittot wrote:
>>> Le Tuesday 12 Nov 2019 à 15:48:13 (+0100), Vincent Guittot a écrit :

[...]

>>>> @@ -7493,9 +7495,9 @@ static void update_blocked_averages(int cpu)
>>>>       * that RT, DL and IRQ signals have been updated before updating CFS.
>>>>       */
>>>>      curr_class = rq->curr->sched_class;
>>>> -    update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
>>>> -    update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
>>>> -    update_irq_load_avg(rq, 0);
>>>> +    decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
>>>> +    decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
>>>> +    decayed |= update_irq_load_avg(rq, 0);
>>
>> Why not 'decayed  = update_cfs_rq_load_avg()' like in the
>> !CONFIG_FAIR_GROUP_SCHED case?
> 
> Because it is handled by the update_load_avg() in
> for_each_leaf_cfs_rq_safe() loop
> 
> This means that we can have 2 calls to cpufreq_update_util in
> update_blocked_average() but at least the values will be up to date in
> both calls unlike previously.
> 
> I'm going to prepare an additional patch to remove this useless call.
> I have also seen some possible further optimization that i need to
> study a bit more before preparing a patch

I see. The update_load_avg() call for the taskgroup skeleton se
(cfs_rq->tg->se[cpu]). But what happens to the cpu which only has the
root cfs_rq i the list? It doesn't have a skeleton se.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] sched/freq: move call to cpufreq_update_util
  2019-11-13 14:09       ` Dietmar Eggemann
@ 2019-11-13 17:50         ` Vincent Guittot
  2019-11-13 18:09           ` Vincent Guittot
  0 siblings, 1 reply; 9+ messages in thread
From: Vincent Guittot @ 2019-11-13 17:50 UTC (permalink / raw)
  To: Dietmar Eggemann
  Cc: linux-kernel, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Steven Rostedt, Mel Gorman, Doug Smythies, open list:THERMAL,
	Linus Torvalds, Thomas Gleixner, Sargun Dhillon, Tejun Heo,
	Xie XiuQi, xiezhipeng1, Srinivas Pandruvada

Le Wednesday 13 Nov 2019 à 15:09:47 (+0100), Dietmar Eggemann a écrit :
> On 13.11.19 14:30, Vincent Guittot wrote:
> > On Wed, 13 Nov 2019 at 11:50, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
> >>
> >> On 12.11.19 16:05, Vincent Guittot wrote:
> >>> Le Tuesday 12 Nov 2019 à 15:48:13 (+0100), Vincent Guittot a écrit :
> 
> [...]
> 
> >>>> @@ -7493,9 +7495,9 @@ static void update_blocked_averages(int cpu)
> >>>>       * that RT, DL and IRQ signals have been updated before updating CFS.
> >>>>       */
> >>>>      curr_class = rq->curr->sched_class;
> >>>> -    update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> >>>> -    update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> >>>> -    update_irq_load_avg(rq, 0);
> >>>> +    decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> >>>> +    decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> >>>> +    decayed |= update_irq_load_avg(rq, 0);
> >>
> >> Why not 'decayed  = update_cfs_rq_load_avg()' like in the
> >> !CONFIG_FAIR_GROUP_SCHED case?
> > 
> > Because it is handled by the update_load_avg() in
> > for_each_leaf_cfs_rq_safe() loop
> > 
> > This means that we can have 2 calls to cpufreq_update_util in
> > update_blocked_average() but at least the values will be up to date in
> > both calls unlike previously.
> > 
> > I'm going to prepare an additional patch to remove this useless call.
> > I have also seen some possible further optimization that i need to
> > study a bit more before preparing a patch
> 
> I see. The update_load_avg() call for the taskgroup skeleton se
> (cfs_rq->tg->se[cpu]). But what happens to the cpu which only has the
> root cfs_rq i the list? It doesn't have a skeleton se.

you're right. I have to add the following to make sure it will be called

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2eb1aa8..9fc077c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7604,9 +7604,13 @@ static void update_blocked_averages(int cpu)
                        cpu,
                        cfs_rq == &rq->cfs ? 0 : (long)cfs_rq->tg );
 
-               if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq))
+               if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq)) {
                        update_tg_load_avg(cfs_rq, 0);
 
+                       if (cfs_rq == &rq->cfs)
+                               decayed = 1;
+               }
+
                trace_sched_load_contrib_blocked(cpu,
                        &cfs_rq->avg,
                        cfs_rq == &rq->cfs ? 0 : (long)cfs_rq->tg );



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] sched/freq: move call to cpufreq_update_util
  2019-11-13 17:50         ` Vincent Guittot
@ 2019-11-13 18:09           ` Vincent Guittot
  0 siblings, 0 replies; 9+ messages in thread
From: Vincent Guittot @ 2019-11-13 18:09 UTC (permalink / raw)
  To: Dietmar Eggemann
  Cc: linux-kernel, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Steven Rostedt, Mel Gorman, Doug Smythies, open list:THERMAL,
	Linus Torvalds, Thomas Gleixner, Sargun Dhillon, Tejun Heo,
	Xie XiuQi, xiezhipeng1, Srinivas Pandruvada

Le Wednesday 13 Nov 2019 à 18:50:35 (+0100), Vincent Guittot a écrit :
> Le Wednesday 13 Nov 2019 à 15:09:47 (+0100), Dietmar Eggemann a écrit :
> > On 13.11.19 14:30, Vincent Guittot wrote:
> > > On Wed, 13 Nov 2019 at 11:50, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
> > >>
> > >> On 12.11.19 16:05, Vincent Guittot wrote:
> > >>> Le Tuesday 12 Nov 2019 à 15:48:13 (+0100), Vincent Guittot a écrit :
> > 
> > [...]
> > 
> > >>>> @@ -7493,9 +7495,9 @@ static void update_blocked_averages(int cpu)
> > >>>>       * that RT, DL and IRQ signals have been updated before updating CFS.
> > >>>>       */
> > >>>>      curr_class = rq->curr->sched_class;
> > >>>> -    update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> > >>>> -    update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> > >>>> -    update_irq_load_avg(rq, 0);
> > >>>> +    decayed |= update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
> > >>>> +    decayed |= update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
> > >>>> +    decayed |= update_irq_load_avg(rq, 0);
> > >>
> > >> Why not 'decayed  = update_cfs_rq_load_avg()' like in the
> > >> !CONFIG_FAIR_GROUP_SCHED case?
> > > 
> > > Because it is handled by the update_load_avg() in
> > > for_each_leaf_cfs_rq_safe() loop
> > > 
> > > This means that we can have 2 calls to cpufreq_update_util in
> > > update_blocked_average() but at least the values will be up to date in
> > > both calls unlike previously.
> > > 
> > > I'm going to prepare an additional patch to remove this useless call.
> > > I have also seen some possible further optimization that i need to
> > > study a bit more before preparing a patch
> > 
> > I see. The update_load_avg() call for the taskgroup skeleton se
> > (cfs_rq->tg->se[cpu]). But what happens to the cpu which only has the
> > root cfs_rq i the list? It doesn't have a skeleton se.
> 
> you're right. I have to add the following to make sure it will be called
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2eb1aa8..9fc077c 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7604,9 +7604,13 @@ static void update_blocked_averages(int cpu)
>                         cpu,
>                         cfs_rq == &rq->cfs ? 0 : (long)cfs_rq->tg );
>  
> -               if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq))
> +               if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq)) {
>                         update_tg_load_avg(cfs_rq, 0);
>  
> +                       if (cfs_rq == &rq->cfs)
> +                               decayed = 1;
> +               }
> +
>                 trace_sched_load_contrib_blocked(cpu,
>                         &cfs_rq->avg,
>                         cfs_rq == &rq->cfs ? 0 : (long)cfs_rq->tg );

the proper fix without some debug trace events :-)

@@ -7567,9 +7569,13 @@ static void update_blocked_averages(int cpu)
 	for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
 		struct sched_entity *se;
 
-		if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq))
+		if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq)) {
 			update_tg_load_avg(cfs_rq, 0);
 
+			if (cfs_rq == &rq->cfs)
+				decayed = 1;
+		}
+
 		/* Propagate pending load changes to the parent, if any: */
 		se = cfs_rq->tg->se[cpu];
 		if (se && !skip_blocked_update(se))



> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-11-13 18:09 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1573570093-1340-1-git-send-email-vincent.guittot@linaro.org>
2019-11-12 15:05 ` [PATCH v2] sched/freq: move call to cpufreq_update_util Vincent Guittot
2019-11-12 17:11   ` Valentin Schneider
2019-11-12 17:20     ` Vincent Guittot
2019-11-12 17:44       ` Valentin Schneider
2019-11-13 10:49   ` Dietmar Eggemann
2019-11-13 13:30     ` Vincent Guittot
2019-11-13 14:09       ` Dietmar Eggemann
2019-11-13 17:50         ` Vincent Guittot
2019-11-13 18:09           ` Vincent Guittot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).