All of lore.kernel.org
 help / color / mirror / Atom feed
* RE: [PATCH] cpufreq: intel_pstate: Rework iowait boosting to be less aggressive
@ 2019-02-01 16:54 Doug Smythies
  2019-02-05 12:04 ` Rafael J. Wysocki
  2019-02-11 20:14 ` Doug Smythies
  0 siblings, 2 replies; 4+ messages in thread
From: Doug Smythies @ 2019-02-01 16:54 UTC (permalink / raw)
  To: 'Rafael J. Wysocki'
  Cc: 'Srinivas Pandruvada', 'LKML', 'Linux PM',
	Doug Smythies

On 2019.01.30 16:05 Rafael J. Wysocki wrote:

> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> The current iowait boosting mechanism in intel_pstate_update_util()
> is quite aggressive, as it goes to the maximum P-state right away,
> and may cause excessive amounts of energy to be used, which is not
> desirable and arguably isn't necessary too.
>
> Follow commit a5a0809bc58e ("cpufreq: schedutil: Make iowait boost
> more energy efficient") that reworked the analogous iowait boost
> mechanism in the schedutil governor and make the iowait boosting
> in intel_pstate_update_util() work along the same lines.
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>  drivers/cpufreq/intel_pstate.c |   46 +++++++++++++++++++++++++----------------
>  1 file changed, 29 insertions(+), 17 deletions(-)
>
> Index: linux-pm/drivers/cpufreq/intel_pstate.c
> ===================================================================
> --- linux-pm.orig/drivers/cpufreq/intel_pstate.c
> +++ linux-pm/drivers/cpufreq/intel_pstate.c
> @@ -50,6 +50,8 @@
>  #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
>  #define fp_toint(X) ((X) >> FRAC_BITS)
> 
> +#define ONE_EIGHTH_FP ((int64_t)1 << (FRAC_BITS - 3))
> +
>  #define EXT_BITS 6
>  #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
>  #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
> @@ -1678,17 +1680,14 @@ static inline int32_t get_avg_pstate(str
>  static inline int32_t get_target_pstate(struct cpudata *cpu)
>  {
> 	struct sample *sample = &cpu->sample;
> -	int32_t busy_frac, boost;
> +	int32_t busy_frac;
> 	int target, avg_pstate;
> 
> 	busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
> 			   sample->tsc);
> 
> -	boost = cpu->iowait_boost;
> -	cpu->iowait_boost >>= 1;
> -
> -	if (busy_frac < boost)
> -		busy_frac = boost;
> +	if (busy_frac < cpu->iowait_boost)
> +		busy_frac = cpu->iowait_boost;
> 
> 	sample->busy_scaled = busy_frac * 100;
> 
> @@ -1767,22 +1766,35 @@ static void intel_pstate_update_util(str
> 	if (smp_processor_id() != cpu->cpu)
>  		return;
>
> +	delta_ns = time - cpu->last_update;
> 	if (flags & SCHED_CPUFREQ_IOWAIT) {
> -		cpu->iowait_boost = int_tofp(1);
> -		cpu->last_update = time;
> -		/*
> -		 * The last time the busy was 100% so P-state was max anyway
> -		 * so avoid overhead of computation.
> -		 */
> -		if (fp_toint(cpu->sample.busy_scaled) == 100)
> -			return;
> -
> -		goto set_pstate;
> +		/* Start over if the CPU may have been idle. */
> +		if (delta_ns > TICK_NSEC) {
> +			cpu->iowait_boost = ONE_EIGHTH_FP;
> +		} else if (cpu->iowait_boost) {
> +			cpu->iowait_boost <<= 1;
> +			if (cpu->iowait_boost >= int_tofp(1)) {
> +				cpu->iowait_boost = int_tofp(1);
> +				cpu->last_update = time;
> +				/*
> +				 * The last time the busy was 100% so P-state
> +				 * was max anyway, so avoid the overhead of
> +				 * computation.
> +				 */
> +				if (fp_toint(cpu->sample.busy_scaled) == 100)
> +					return;

Hi Rafael,

By exiting here, the trace, if enabled, is also bypassed.
We want the trace sample.
Also, there is a generic:
"If the target ptstate is the same as before, then don't set it"
later on.
Suggest to delete this test and exit condition. (I see that this early exit was done before also.)

> +
> +				goto set_pstate;
> +			}
> +		} else {
> +			cpu->iowait_boost = ONE_EIGHTH_FP;
> +		}
> 	} else if (cpu->iowait_boost) {
> 		/* Clear iowait_boost if the CPU may have been idle. */
> -		delta_ns = time - cpu->last_update;
> 		if (delta_ns > TICK_NSEC)
>  			cpu->iowait_boost = 0;
> +		else
> +			cpu->iowait_boost >>= 1;
> 	}
> 	cpu->last_update = time;
> 	delta_ns = time - cpu->sample.time;



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] cpufreq: intel_pstate: Rework iowait boosting to be less aggressive
  2019-02-01 16:54 [PATCH] cpufreq: intel_pstate: Rework iowait boosting to be less aggressive Doug Smythies
@ 2019-02-05 12:04 ` Rafael J. Wysocki
  2019-02-11 20:14 ` Doug Smythies
  1 sibling, 0 replies; 4+ messages in thread
From: Rafael J. Wysocki @ 2019-02-05 12:04 UTC (permalink / raw)
  To: Doug Smythies
  Cc: 'Srinivas Pandruvada', 'LKML', 'Linux PM'

On Friday, February 1, 2019 5:54:37 PM CET Doug Smythies wrote:
> On 2019.01.30 16:05 Rafael J. Wysocki wrote:
> 
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > The current iowait boosting mechanism in intel_pstate_update_util()
> > is quite aggressive, as it goes to the maximum P-state right away,
> > and may cause excessive amounts of energy to be used, which is not
> > desirable and arguably isn't necessary too.
> >
> > Follow commit a5a0809bc58e ("cpufreq: schedutil: Make iowait boost
> > more energy efficient") that reworked the analogous iowait boost
> > mechanism in the schedutil governor and make the iowait boosting
> > in intel_pstate_update_util() work along the same lines.
> > 
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
> >  drivers/cpufreq/intel_pstate.c |   46 +++++++++++++++++++++++++----------------
> >  1 file changed, 29 insertions(+), 17 deletions(-)
> >
> > Index: linux-pm/drivers/cpufreq/intel_pstate.c
> > ===================================================================
> > --- linux-pm.orig/drivers/cpufreq/intel_pstate.c
> > +++ linux-pm/drivers/cpufreq/intel_pstate.c
> > @@ -50,6 +50,8 @@
> >  #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
> >  #define fp_toint(X) ((X) >> FRAC_BITS)
> > 
> > +#define ONE_EIGHTH_FP ((int64_t)1 << (FRAC_BITS - 3))
> > +
> >  #define EXT_BITS 6
> >  #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
> >  #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
> > @@ -1678,17 +1680,14 @@ static inline int32_t get_avg_pstate(str
> >  static inline int32_t get_target_pstate(struct cpudata *cpu)
> >  {
> > 	struct sample *sample = &cpu->sample;
> > -	int32_t busy_frac, boost;
> > +	int32_t busy_frac;
> > 	int target, avg_pstate;
> > 
> > 	busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
> > 			   sample->tsc);
> > 
> > -	boost = cpu->iowait_boost;
> > -	cpu->iowait_boost >>= 1;
> > -
> > -	if (busy_frac < boost)
> > -		busy_frac = boost;
> > +	if (busy_frac < cpu->iowait_boost)
> > +		busy_frac = cpu->iowait_boost;
> > 
> > 	sample->busy_scaled = busy_frac * 100;
> > 
> > @@ -1767,22 +1766,35 @@ static void intel_pstate_update_util(str
> > 	if (smp_processor_id() != cpu->cpu)
> >  		return;
> >
> > +	delta_ns = time - cpu->last_update;
> > 	if (flags & SCHED_CPUFREQ_IOWAIT) {
> > -		cpu->iowait_boost = int_tofp(1);
> > -		cpu->last_update = time;
> > -		/*
> > -		 * The last time the busy was 100% so P-state was max anyway
> > -		 * so avoid overhead of computation.
> > -		 */
> > -		if (fp_toint(cpu->sample.busy_scaled) == 100)
> > -			return;
> > -
> > -		goto set_pstate;
> > +		/* Start over if the CPU may have been idle. */
> > +		if (delta_ns > TICK_NSEC) {
> > +			cpu->iowait_boost = ONE_EIGHTH_FP;
> > +		} else if (cpu->iowait_boost) {
> > +			cpu->iowait_boost <<= 1;
> > +			if (cpu->iowait_boost >= int_tofp(1)) {
> > +				cpu->iowait_boost = int_tofp(1);
> > +				cpu->last_update = time;
> > +				/*
> > +				 * The last time the busy was 100% so P-state
> > +				 * was max anyway, so avoid the overhead of
> > +				 * computation.
> > +				 */
> > +				if (fp_toint(cpu->sample.busy_scaled) == 100)
> > +					return;
> 
> Hi Rafael,
> 
> By exiting here, the trace, if enabled, is also bypassed.
> We want the trace sample.

Fair enough, but the return is there regardless of this patch.

Maybe it should be fixed separately?

> Also, there is a generic:
> "If the target ptstate is the same as before, then don't set it"
> later on.
> Suggest to delete this test and exit condition. (I see that this early
> exit was done before also.)

Well, exactly.

It is not unreasonable to boost the frequency right away for an IO-waiter
without waiting for the next sample time IMO.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] cpufreq: intel_pstate: Rework iowait boosting to be less aggressive
  2019-02-01 16:54 [PATCH] cpufreq: intel_pstate: Rework iowait boosting to be less aggressive Doug Smythies
  2019-02-05 12:04 ` Rafael J. Wysocki
@ 2019-02-11 20:14 ` Doug Smythies
  1 sibling, 0 replies; 4+ messages in thread
From: Doug Smythies @ 2019-02-11 20:14 UTC (permalink / raw)
  To: 'Rafael J. Wysocki'
  Cc: 'Srinivas Pandruvada', 'LKML', 'Linux PM',
	Doug Smythies

On 2019.02.05 04:04 Rafael J. Wysocki wrote:
> On Friday, February 1, 2019 5:54:37 PM CET Doug Smythies wrote:
>> On 2019.01.30 16:05 Rafael J. Wysocki wrote:
>> 
>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>
>>> The current iowait boosting mechanism in intel_pstate_update_util()
>>> is quite aggressive, as it goes to the maximum P-state right away,
>>> and may cause excessive amounts of energy to be used, which is not
>>> desirable and arguably isn't necessary too.
>>>
>>> Follow commit a5a0809bc58e ("cpufreq: schedutil: Make iowait boost
>>> more energy efficient") that reworked the analogous iowait boost
>>> mechanism in the schedutil governor and make the iowait boosting
>>> in intel_pstate_update_util() work along the same lines.
>>> 
>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>> ---
>>>  drivers/cpufreq/intel_pstate.c |   46 +++++++++++++++++++++++++----------------
>>>  1 file changed, 29 insertions(+), 17 deletions(-)
>>>
>>> Index: linux-pm/drivers/cpufreq/intel_pstate.c
>>> ===================================================================
>>> --- linux-pm.orig/drivers/cpufreq/intel_pstate.c
>>> +++ linux-pm/drivers/cpufreq/intel_pstate.c
>>> @@ -50,6 +50,8 @@
>>>  #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
>>>  #define fp_toint(X) ((X) >> FRAC_BITS)
>>> 
>>> +#define ONE_EIGHTH_FP ((int64_t)1 << (FRAC_BITS - 3))
>>> +
>>>  #define EXT_BITS 6
>>>  #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
>>>  #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
>>> @@ -1678,17 +1680,14 @@ static inline int32_t get_avg_pstate(str
>>>  static inline int32_t get_target_pstate(struct cpudata *cpu)
>>>  {
>>> 	struct sample *sample = &cpu->sample;
>>> -	int32_t busy_frac, boost;
>>> +	int32_t busy_frac;
>>> 	int target, avg_pstate;
>>> 
>>> 	busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
>>> 			   sample->tsc);
>>> 
>>> -	boost = cpu->iowait_boost;
>>> -	cpu->iowait_boost >>= 1;
>>> -
>>> -	if (busy_frac < boost)
>>> -		busy_frac = boost;
>>> +	if (busy_frac < cpu->iowait_boost)
>>> +		busy_frac = cpu->iowait_boost;
>>> 
>>> 	sample->busy_scaled = busy_frac * 100;
>>> 
>>> @@ -1767,22 +1766,35 @@ static void intel_pstate_update_util(str
>>> 	if (smp_processor_id() != cpu->cpu)
>>>  		return;
>>>
>>> +	delta_ns = time - cpu->last_update;
>>> 	if (flags & SCHED_CPUFREQ_IOWAIT) {
>>> -		cpu->iowait_boost = int_tofp(1);
>>> -		cpu->last_update = time;
>>> -		/*
>>> -		 * The last time the busy was 100% so P-state was max anyway
>>> -		 * so avoid overhead of computation.
>>> -		 */
>>> -		if (fp_toint(cpu->sample.busy_scaled) == 100)
>>> -			return;
>>> -
>>> -		goto set_pstate;
>>> +		/* Start over if the CPU may have been idle. */
>>> +		if (delta_ns > TICK_NSEC) {
>>> +			cpu->iowait_boost = ONE_EIGHTH_FP;
>>> +		} else if (cpu->iowait_boost) {
>>> +			cpu->iowait_boost <<= 1;
>>> +			if (cpu->iowait_boost >= int_tofp(1)) {
>>> +				cpu->iowait_boost = int_tofp(1);
>>> +				cpu->last_update = time;
>>> +				/*
>>> +				 * The last time the busy was 100% so P-state
>>> +				 * was max anyway, so avoid the overhead of
>>> +				 * computation.
>>> +				 */
>>> +				if (fp_toint(cpu->sample.busy_scaled) == 100)
>>> +					return;
>> 
>> Hi Rafael,
>> 
>> By exiting here, the trace, if enabled, is also bypassed.
>> We want the trace sample.
>
> Fair enough, but the return is there regardless of this patch.
>
> Maybe it should be fixed separately?

O.K.

>> Also, there is a generic:
>> "If the target ptstate is the same as before, then don't set it"
>> later on.
>> Suggest to delete this test and exit condition. (I see that this early
>> exit was done before also.)
>
> Well, exactly.
>
> It is not unreasonable to boost the frequency right away for an IO-waiter
> without waiting for the next sample time IMO.

I agree, but am just saying that it should include a trace sample, otherwise
it is difficult to understand what happened.

By the way, I forgot to mention before, I tried the patch and it does prevent
CPU frequency spikes to maximum every few seconds in a very idle system.

... Doug



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] cpufreq: intel_pstate: Rework iowait boosting to be less aggressive
@ 2019-01-31  0:04 Rafael J. Wysocki
  0 siblings, 0 replies; 4+ messages in thread
From: Rafael J. Wysocki @ 2019-01-31  0:04 UTC (permalink / raw)
  To: Linux PM; +Cc: Srinivas Pandruvada, LKML, Doug Smythies

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

The current iowait boosting mechanism in intel_pstate_update_util()
is quite aggressive, as it goes to the maximum P-state right away,
and may cause excessive amounts of energy to be used, which is not
desirable and arguably isn't necessary too.

Follow commit a5a0809bc58e ("cpufreq: schedutil: Make iowait boost
more energy efficient") that reworked the analogous iowait boost
mechanism in the schedutil governor and make the iowait boosting
in intel_pstate_update_util() work along the same lines.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c |   46 +++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

Index: linux-pm/drivers/cpufreq/intel_pstate.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/intel_pstate.c
+++ linux-pm/drivers/cpufreq/intel_pstate.c
@@ -50,6 +50,8 @@
 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 #define fp_toint(X) ((X) >> FRAC_BITS)
 
+#define ONE_EIGHTH_FP ((int64_t)1 << (FRAC_BITS - 3))
+
 #define EXT_BITS 6
 #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
 #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
@@ -1678,17 +1680,14 @@ static inline int32_t get_avg_pstate(str
 static inline int32_t get_target_pstate(struct cpudata *cpu)
 {
 	struct sample *sample = &cpu->sample;
-	int32_t busy_frac, boost;
+	int32_t busy_frac;
 	int target, avg_pstate;
 
 	busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
 			   sample->tsc);
 
-	boost = cpu->iowait_boost;
-	cpu->iowait_boost >>= 1;
-
-	if (busy_frac < boost)
-		busy_frac = boost;
+	if (busy_frac < cpu->iowait_boost)
+		busy_frac = cpu->iowait_boost;
 
 	sample->busy_scaled = busy_frac * 100;
 
@@ -1767,22 +1766,35 @@ static void intel_pstate_update_util(str
 	if (smp_processor_id() != cpu->cpu)
 		return;
 
+	delta_ns = time - cpu->last_update;
 	if (flags & SCHED_CPUFREQ_IOWAIT) {
-		cpu->iowait_boost = int_tofp(1);
-		cpu->last_update = time;
-		/*
-		 * The last time the busy was 100% so P-state was max anyway
-		 * so avoid overhead of computation.
-		 */
-		if (fp_toint(cpu->sample.busy_scaled) == 100)
-			return;
-
-		goto set_pstate;
+		/* Start over if the CPU may have been idle. */
+		if (delta_ns > TICK_NSEC) {
+			cpu->iowait_boost = ONE_EIGHTH_FP;
+		} else if (cpu->iowait_boost) {
+			cpu->iowait_boost <<= 1;
+			if (cpu->iowait_boost >= int_tofp(1)) {
+				cpu->iowait_boost = int_tofp(1);
+				cpu->last_update = time;
+				/*
+				 * The last time the busy was 100% so P-state
+				 * was max anyway, so avoid the overhead of
+				 * computation.
+				 */
+				if (fp_toint(cpu->sample.busy_scaled) == 100)
+					return;
+
+				goto set_pstate;
+			}
+		} else {
+			cpu->iowait_boost = ONE_EIGHTH_FP;
+		}
 	} else if (cpu->iowait_boost) {
 		/* Clear iowait_boost if the CPU may have been idle. */
-		delta_ns = time - cpu->last_update;
 		if (delta_ns > TICK_NSEC)
 			cpu->iowait_boost = 0;
+		else
+			cpu->iowait_boost >>= 1;
 	}
 	cpu->last_update = time;
 	delta_ns = time - cpu->sample.time;


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-02-11 20:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-01 16:54 [PATCH] cpufreq: intel_pstate: Rework iowait boosting to be less aggressive Doug Smythies
2019-02-05 12:04 ` Rafael J. Wysocki
2019-02-11 20:14 ` Doug Smythies
  -- strict thread matches above, loose matches on Subject: below --
2019-01-31  0:04 Rafael J. Wysocki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.