linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* cpufreq: intel_pstate: Support raw epp and energy_efficiency
@ 2020-06-23 18:27 Srinivas Pandruvada
  2020-06-23 18:27 ` [PATCH v2 1/2] cpufreq: intel_pstate: Allow enable/disable energy efficiency Srinivas Pandruvada
  2020-06-23 18:27 ` [PATCH v2 2/2] cpufreq: intel_pstate: Allow raw energy performance preference value Srinivas Pandruvada
  0 siblings, 2 replies; 5+ messages in thread
From: Srinivas Pandruvada @ 2020-06-23 18:27 UTC (permalink / raw)
  To: rjw, viresh.kumar, lenb; +Cc: linux-pm, linux-kernel, Srinivas Pandruvada

v2
Remove raw value support for EPB

Srinivas Pandruvada (2):
  cpufreq: intel_pstate: Allow enable/disable energy efficiency
  cpufreq: intel_pstate: Allow raw energy performance preference value

 Documentation/admin-guide/pm/intel_pstate.rst | 13 ++-
 drivers/cpufreq/intel_pstate.c                | 99 ++++++++++++++++---
 2 files changed, 99 insertions(+), 13 deletions(-)

-- 
2.25.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/2] cpufreq: intel_pstate: Allow enable/disable energy efficiency
  2020-06-23 18:27 cpufreq: intel_pstate: Support raw epp and energy_efficiency Srinivas Pandruvada
@ 2020-06-23 18:27 ` Srinivas Pandruvada
  2020-06-23 18:27 ` [PATCH v2 2/2] cpufreq: intel_pstate: Allow raw energy performance preference value Srinivas Pandruvada
  1 sibling, 0 replies; 5+ messages in thread
From: Srinivas Pandruvada @ 2020-06-23 18:27 UTC (permalink / raw)
  To: rjw, viresh.kumar, lenb; +Cc: linux-pm, linux-kernel, Srinivas Pandruvada

By default intel_pstate driver disables energy efficiency by setting
MSR_IA32_POWER_CTL bit 19 for Kaby Lake desktop CPU model in HWP mode.
This CPU model is also shared by Coffee Lake desktop CPUs. This allows
these systems to reach maximum possible frequency. But this adds power
penalty, which some customers don't want. They want some way to enable/
disable dynamically.

So, add an additional attribute "energy_efficiency_enable" under
/sys/devices/system/cpu/intel_pstate/ for these CPU models. This allows
to read and write bit 19 ("Disable Energy Efficiency Optimization") in
the MSR IA32_POWER_CTL.

This attribute is present in both HWP and non-HWP mode as this has an
effect in both modes. Refer to Intel Software Developer's manual for
details. The scope of this bit is package wide.

Suggested-by: Len Brown <lenb@kernel.org>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 Documentation/admin-guide/pm/intel_pstate.rst |  7 +++
 drivers/cpufreq/intel_pstate.c                | 49 ++++++++++++++++++-
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index 39d80bc29ccd..939bfdc53f4f 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -431,6 +431,13 @@ argument is passed to the kernel in the command line.
 	supported in the current configuration, writes to this attribute will
 	fail with an appropriate error.
 
+``energy_efficiency_enable``
+	This attribute is only present on platforms, which has CPUs matching
+	Kaby Lake desktop CPU model. By default "energy_efficiency" is disabled
+	on these CPU models in HWP mode by this driver. Enabling energy
+	efficiency may limit maximum operating frequency in both HWP and non
+	HWP mode.
+
 Interpretation of Policy Attributes
 -----------------------------------
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 8e23a698ce04..1cf6d06f2314 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1218,6 +1218,44 @@ static ssize_t store_hwp_dynamic_boost(struct kobject *a,
 	return count;
 }
 
+#define MSR_IA32_POWER_CTL_BIT_EE	19
+
+static ssize_t show_energy_efficiency_enable(struct kobject *kobj,
+					     struct kobj_attribute *attr,
+					     char *buf)
+{
+	u64 power_ctl;
+	int enable;
+
+	rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
+	enable = (power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE)) >> MSR_IA32_POWER_CTL_BIT_EE;
+	return sprintf(buf, "%d\n", !enable);
+}
+
+static ssize_t store_energy_efficiency_enable(struct kobject *a,
+					      struct kobj_attribute *b,
+					      const char *buf, size_t count)
+{
+	u64 power_ctl;
+	u32 input;
+	int ret;
+
+	ret = kstrtouint(buf, 10, &input);
+	if (ret)
+		return ret;
+
+	mutex_lock(&intel_pstate_driver_lock);
+	rdmsrl(MSR_IA32_POWER_CTL, power_ctl);
+	if (input)
+		power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE);
+	else
+		power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE);
+	wrmsrl(MSR_IA32_POWER_CTL, power_ctl);
+	mutex_unlock(&intel_pstate_driver_lock);
+
+	return count;
+}
+
 show_one(max_perf_pct, max_perf_pct);
 show_one(min_perf_pct, min_perf_pct);
 
@@ -1228,6 +1266,7 @@ define_one_global_rw(min_perf_pct);
 define_one_global_ro(turbo_pct);
 define_one_global_ro(num_pstates);
 define_one_global_rw(hwp_dynamic_boost);
+define_one_global_rw(energy_efficiency_enable);
 
 static struct attribute *intel_pstate_attributes[] = {
 	&status.attr,
@@ -1241,6 +1280,8 @@ static const struct attribute_group intel_pstate_attr_group = {
 	.attrs = intel_pstate_attributes,
 };
 
+static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[];
+
 static void __init intel_pstate_sysfs_expose_params(void)
 {
 	struct kobject *intel_pstate_kobject;
@@ -1273,6 +1314,12 @@ static void __init intel_pstate_sysfs_expose_params(void)
 				       &hwp_dynamic_boost.attr);
 		WARN_ON(rc);
 	}
+
+	if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids)) {
+		rc = sysfs_create_file(intel_pstate_kobject,
+				       &energy_efficiency_enable.attr);
+		WARN_ON(rc);
+	}
 }
 /************************** sysfs end ************************/
 
@@ -1288,8 +1335,6 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 		cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
 }
 
-#define MSR_IA32_POWER_CTL_BIT_EE	19
-
 /* Disable energy efficiency optimization */
 static void intel_pstate_disable_ee(int cpu)
 {
-- 
2.25.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/2] cpufreq: intel_pstate: Allow raw energy performance preference value
  2020-06-23 18:27 cpufreq: intel_pstate: Support raw epp and energy_efficiency Srinivas Pandruvada
  2020-06-23 18:27 ` [PATCH v2 1/2] cpufreq: intel_pstate: Allow enable/disable energy efficiency Srinivas Pandruvada
@ 2020-06-23 18:27 ` Srinivas Pandruvada
  2020-06-24 15:37   ` Doug Smythies
  1 sibling, 1 reply; 5+ messages in thread
From: Srinivas Pandruvada @ 2020-06-23 18:27 UTC (permalink / raw)
  To: rjw, viresh.kumar, lenb; +Cc: linux-pm, linux-kernel, Srinivas Pandruvada

Currently using attribute "energy_performance_preference", user space can
write one of the four per-defined preference string. These preference
strings gets mapped to a hard-coded Energy-Performance Preference (EPP) or
Energy-Performance Bias (EPB) knob.

These four values supposed to cover broad spectrum of use cases, but they
are not uniformly distributed in the range. There are number of cases,
where this is not enough. For example:

Suppose user wants more performance when connected to AC. Instead of using
default "balance performance", the "performance" setting can be used. This
changes EPP value from 0x80 to 0x00. But setting EPP to 0, results in
electrical and thermal issues on some platforms. This results in CPU to do
aggressive throttling, which causes drop in performance. But some value
between 0x80 and 0x00 results in better performance. But that value can't
be fixed as the power curve is not linear. In some cases just changing EPP
from 0x80 to 0x75 is enough to get significant performance gain.

Similarly on battery EPP 0x80 can be very aggressive in power consumption.
But picking up the next choice "balance power" results in too much loss
of performance, which cause bad user experience in use case like "Google
Hangout". It was observed that some value between these two EPP is
optimal.

This change allows fine grain EPP tuning for platform like Chromebooks.
Here based on the product and use cases, different EPP values can be set.
This change is similar to the change done for:
/sys/devices/system/cpu/cpu*/power/energy_perf_bias
where user has choice to write a predefined string or raw value.

The change itself is trivial. When user preference doesn't match
predefined string preferences and value is an unsigned integer and in
range, use that value for EPP. When the EPP feature is not prsent
writing raw value is not supported.

Suggested-by: Len Brown <lenb@kernel.org>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 Documentation/admin-guide/pm/intel_pstate.rst |  6 ++-
 drivers/cpufreq/intel_pstate.c                | 50 +++++++++++++++----
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index 939bfdc53f4f..5e209926e0ed 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -561,7 +561,11 @@ somewhere between the two extremes:
 Strings written to the ``energy_performance_preference`` attribute are
 internally translated to integer values written to the processor's
 Energy-Performance Preference (EPP) knob (if supported) or its
-Energy-Performance Bias (EPB) knob.
+Energy-Performance Bias (EPB) knob. It is also possible to write a positive
+integer value between 0 to 255, if the EPP feature is present. If the EPP
+feature is not present, writing integer value to this attribute is not
+supported. In this case, user can use
+ "/sys/devices/system/cpu/cpu*/power/energy_perf_bias" interface.
 
 [Note that tasks may by migrated from one CPU to another by the scheduler's
 load-balancing algorithm and if different energy vs performance hints are
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 1cf6d06f2314..d8f195c7a428 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -602,11 +602,12 @@ static const unsigned int epp_values[] = {
 	HWP_EPP_POWERSAVE
 };
 
-static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
+static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp)
 {
 	s16 epp;
 	int index = -EINVAL;
 
+	*raw_epp = 0;
 	epp = intel_pstate_get_epp(cpu_data, 0);
 	if (epp < 0)
 		return epp;
@@ -614,12 +615,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
 	if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
 		if (epp == HWP_EPP_PERFORMANCE)
 			return 1;
-		if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
+		if (epp == HWP_EPP_BALANCE_PERFORMANCE)
 			return 2;
-		if (epp <= HWP_EPP_BALANCE_POWERSAVE)
+		if (epp == HWP_EPP_BALANCE_POWERSAVE)
 			return 3;
-		else
+		if (epp == HWP_EPP_POWERSAVE)
 			return 4;
+		*raw_epp = epp;
+		return 0;
 	} else if (boot_cpu_has(X86_FEATURE_EPB)) {
 		/*
 		 * Range:
@@ -638,7 +641,8 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
 }
 
 static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
-					      int pref_index)
+					      int pref_index, bool use_raw,
+					      u32 raw_epp)
 {
 	int epp = -EINVAL;
 	int ret;
@@ -657,6 +661,16 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
 
 		value &= ~GENMASK_ULL(31, 24);
 
+		if (use_raw) {
+			if (raw_epp > 255) {
+				ret = -EINVAL;
+				goto return_pref;
+			}
+			value |= (u64)raw_epp << 24;
+			ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
+			goto return_pref;
+		}
+
 		if (epp == -EINVAL)
 			epp = epp_values[pref_index - 1];
 
@@ -694,6 +708,8 @@ static ssize_t store_energy_performance_preference(
 {
 	struct cpudata *cpu_data = all_cpu_data[policy->cpu];
 	char str_preference[21];
+	bool raw = false;
+	u32 epp;
 	int ret;
 
 	ret = sscanf(buf, "%20s", str_preference);
@@ -701,10 +717,21 @@ static ssize_t store_energy_performance_preference(
 		return -EINVAL;
 
 	ret = match_string(energy_perf_strings, -1, str_preference);
-	if (ret < 0)
+	if (ret < 0) {
+		if (!boot_cpu_has(X86_FEATURE_HWP_EPP))
+			return ret;
+
+		ret = kstrtouint(buf, 10, &epp);
+		if (ret)
+			return ret;
+
+		raw = true;
+	}
+
+	ret = intel_pstate_set_energy_pref_index(cpu_data, ret, raw, epp);
+	if (ret)
 		return ret;
 
-	intel_pstate_set_energy_pref_index(cpu_data, ret);
 	return count;
 }
 
@@ -712,13 +739,16 @@ static ssize_t show_energy_performance_preference(
 				struct cpufreq_policy *policy, char *buf)
 {
 	struct cpudata *cpu_data = all_cpu_data[policy->cpu];
-	int preference;
+	int preference, raw_epp;
 
-	preference = intel_pstate_get_energy_pref_index(cpu_data);
+	preference = intel_pstate_get_energy_pref_index(cpu_data, &raw_epp);
 	if (preference < 0)
 		return preference;
 
-	return  sprintf(buf, "%s\n", energy_perf_strings[preference]);
+	if (raw_epp)
+		return  sprintf(buf, "%d\n", raw_epp);
+	else
+		return  sprintf(buf, "%s\n", energy_perf_strings[preference]);
 }
 
 cpufreq_freq_attr_rw(energy_performance_preference);
-- 
2.25.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* RE: [PATCH v2 2/2] cpufreq: intel_pstate: Allow raw energy performance preference value
  2020-06-23 18:27 ` [PATCH v2 2/2] cpufreq: intel_pstate: Allow raw energy performance preference value Srinivas Pandruvada
@ 2020-06-24 15:37   ` Doug Smythies
  2020-06-24 18:02     ` srinivas pandruvada
  0 siblings, 1 reply; 5+ messages in thread
From: Doug Smythies @ 2020-06-24 15:37 UTC (permalink / raw)
  To: 'Srinivas Pandruvada'
  Cc: linux-pm, linux-kernel, rjw, viresh.kumar, lenb

Hi Srinivas,

I have immediate need for this. I have been using a tool
I wrote myself for this which I can now retire.
(it wasn't very good anyway).
Yours remembers for each governor, and is way better.
Thanks.

On 2020.06.23 11:27 Srinivas Pandruvada wrote:

> Currently using attribute "energy_performance_preference", user space can
> write one of the four per-defined preference string. These preference
> strings gets mapped to a hard-coded Energy-Performance Preference (EPP) or
> Energy-Performance Bias (EPB) knob.
> 
> These four values supposed to cover broad spectrum of use cases, but they
> are not uniformly distributed in the range.

Suggest:

These four values are supposed to cover broad spectrum of use cases, but
are not uniformly distributed in the range.

> There are number of cases,
> where this is not enough. For example:
> 
> Suppose user wants more performance when connected to AC. Instead of using
> default "balance performance", the "performance" setting can be used. This
> changes EPP value from 0x80 to 0x00. But setting EPP to 0, results in
> electrical and thermal issues on some platforms.

> This results in CPU to do
> aggressive throttling, which causes drop in performance.

Suggest:

This results in aggressive throttling, which causes a drop in performance.

And:

Tough.
I consider "performance mode" as sacrosanct, and have always
expected these to behave identically and at max CPU freq:

intel_pstate no-hwp / performance
intel_cpufreq no-hwp / performance  (a.k.a. passive)
acpi_cpufreq / performance
intel_pstate hwp / performance
intel_cpufreq hwp / performance (in future)

as was always the case on my i7-2600K (no hwp) based computer
and is not the case on my i5-9600K (hwp capable) computer.

> But some value
> between 0x80 and 0x00 results in better performance. But that value can't
> be fixed as the power curve is not linear. In some cases just changing EPP
> from 0x80 to 0x75 is enough to get significant performance gain.
> 
> Similarly on battery EPP 0x80 can be very aggressive in power consumption.
> But picking up the next choice "balance power" results in too much loss
> of performance, which cause bad user experience in use case like "Google
> Hangout". It was observed that some value between these two EPP is
> optimal.
> 
> This change allows fine grain EPP tuning for platform like Chromebooks.
> Here based on the product and use cases, different EPP values can be set.
> This change is similar to the change done for:
> /sys/devices/system/cpu/cpu*/power/energy_perf_bias
> where user has choice to write a predefined string or raw value.
> 
> The change itself is trivial. When user preference doesn't match
> predefined string preferences and value is an unsigned integer and in
> range, use that value for EPP. When the EPP feature is not prsent
                                                             ^^^^^^
s/prsent/present

> writing raw value is not supported.
> 
> Suggested-by: Len Brown <lenb@kernel.org>
> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> ---
>  Documentation/admin-guide/pm/intel_pstate.rst |  6 ++-
>  drivers/cpufreq/intel_pstate.c                | 50 +++++++++++++++----
>  2 files changed, 45 insertions(+), 11 deletions(-)
> 
> diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-
> guide/pm/intel_pstate.rst
> index 939bfdc53f4f..5e209926e0ed 100644
> --- a/Documentation/admin-guide/pm/intel_pstate.rst
> +++ b/Documentation/admin-guide/pm/intel_pstate.rst
> @@ -561,7 +561,11 @@ somewhere between the two extremes:
>  Strings written to the ``energy_performance_preference`` attribute are
>  internally translated to integer values written to the processor's
>  Energy-Performance Preference (EPP) knob (if supported) or its
> -Energy-Performance Bias (EPB) knob.
> +Energy-Performance Bias (EPB) knob. It is also possible to write a positive
> +integer value between 0 to 255, if the EPP feature is present. If the EPP
> +feature is not present, writing integer value to this attribute is not
> +supported. In this case, user can use
> + "/sys/devices/system/cpu/cpu*/power/energy_perf_bias" interface.
> 
>  [Note that tasks may by migrated from one CPU to another by the scheduler's
>  load-balancing algorithm and if different energy vs performance hints are
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 1cf6d06f2314..d8f195c7a428 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -602,11 +602,12 @@ static const unsigned int epp_values[] = {
>  	HWP_EPP_POWERSAVE
>  };
> 
> -static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
> +static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp)
>  {
>  	s16 epp;
>  	int index = -EINVAL;
> 
> +	*raw_epp = 0;
>  	epp = intel_pstate_get_epp(cpu_data, 0);
>  	if (epp < 0)
>  		return epp;
> @@ -614,12 +615,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
>  	if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
>  		if (epp == HWP_EPP_PERFORMANCE)
>  			return 1;
> -		if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
> +		if (epp == HWP_EPP_BALANCE_PERFORMANCE)
>  			return 2;
> -		if (epp <= HWP_EPP_BALANCE_POWERSAVE)
> +		if (epp == HWP_EPP_BALANCE_POWERSAVE)
>  			return 3;
> -		else
> +		if (epp == HWP_EPP_POWERSAVE)
>  			return 4;
> +		*raw_epp = epp;
> +		return 0;
>  	} else if (boot_cpu_has(X86_FEATURE_EPB)) {
>  		/*
>  		 * Range:
> @@ -638,7 +641,8 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
>  }
> 
>  static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
> -					      int pref_index)
> +					      int pref_index, bool use_raw,
> +					      u32 raw_epp)
>  {
>  	int epp = -EINVAL;
>  	int ret;
> @@ -657,6 +661,16 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
> 
>  		value &= ~GENMASK_ULL(31, 24);
> 
> +		if (use_raw) {
> +			if (raw_epp > 255) {
> +				ret = -EINVAL;
> +				goto return_pref;
> +			}
> +			value |= (u64)raw_epp << 24;
> +			ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
> +			goto return_pref;
> +		}
> +
>  		if (epp == -EINVAL)
>  			epp = epp_values[pref_index - 1];
> 
> @@ -694,6 +708,8 @@ static ssize_t store_energy_performance_preference(
>  {
>  	struct cpudata *cpu_data = all_cpu_data[policy->cpu];
>  	char str_preference[21];
> +	bool raw = false;
> +	u32 epp;
>  	int ret;
> 
>  	ret = sscanf(buf, "%20s", str_preference);
> @@ -701,10 +717,21 @@ static ssize_t store_energy_performance_preference(
>  		return -EINVAL;
> 
>  	ret = match_string(energy_perf_strings, -1, str_preference);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		if (!boot_cpu_has(X86_FEATURE_HWP_EPP))
> +			return ret;
> +
> +		ret = kstrtouint(buf, 10, &epp);
> +		if (ret)
> +			return ret;
> +
> +		raw = true;
> +	}
> +
> +	ret = intel_pstate_set_energy_pref_index(cpu_data, ret, raw, epp);
> +	if (ret)
>  		return ret;
> 
> -	intel_pstate_set_energy_pref_index(cpu_data, ret);
>  	return count;
>  }
> 
> @@ -712,13 +739,16 @@ static ssize_t show_energy_performance_preference(
>  				struct cpufreq_policy *policy, char *buf)
>  {
>  	struct cpudata *cpu_data = all_cpu_data[policy->cpu];
> -	int preference;
> +	int preference, raw_epp;
> 
> -	preference = intel_pstate_get_energy_pref_index(cpu_data);
> +	preference = intel_pstate_get_energy_pref_index(cpu_data, &raw_epp);
>  	if (preference < 0)
>  		return preference;
> 
> -	return  sprintf(buf, "%s\n", energy_perf_strings[preference]);
> +	if (raw_epp)
> +		return  sprintf(buf, "%d\n", raw_epp);
> +	else
> +		return  sprintf(buf, "%s\n", energy_perf_strings[preference]);
>  }
> 
>  cpufreq_freq_attr_rw(energy_performance_preference);
> --
> 2.25.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 2/2] cpufreq: intel_pstate: Allow raw energy performance preference value
  2020-06-24 15:37   ` Doug Smythies
@ 2020-06-24 18:02     ` srinivas pandruvada
  0 siblings, 0 replies; 5+ messages in thread
From: srinivas pandruvada @ 2020-06-24 18:02 UTC (permalink / raw)
  To: Doug Smythies; +Cc: linux-pm, linux-kernel, rjw, viresh.kumar, lenb

Hi Doug,

On Wed, 2020-06-24 at 08:37 -0700, Doug Smythies wrote:
> Hi Srinivas,
> 
> I have immediate need for this. I have been using a tool
> I wrote myself for this which I can now retire.
> (it wasn't very good anyway).
> Yours remembers for each governor, and is way better.
> Thanks.
> 
I will incorporate your changes and re-post.

Thanks,
Srinivas

> On 2020.06.23 11:27 Srinivas Pandruvada wrote:
> 
> > Currently using attribute "energy_performance_preference", user
> > space can
> > write one of the four per-defined preference string. These
> > preference
> > strings gets mapped to a hard-coded Energy-Performance Preference
> > (EPP) or
> > Energy-Performance Bias (EPB) knob.
> > 
> > These four values supposed to cover broad spectrum of use cases,
> > but they
> > are not uniformly distributed in the range.
> 
> Suggest:
> 
> These four values are supposed to cover broad spectrum of use cases,
> but
> are not uniformly distributed in the range.
> 
> > There are number of cases,
> > where this is not enough. For example:
> > 
> > Suppose user wants more performance when connected to AC. Instead
> > of using
> > default "balance performance", the "performance" setting can be
> > used. This
> > changes EPP value from 0x80 to 0x00. But setting EPP to 0, results
> > in
> > electrical and thermal issues on some platforms.
> > This results in CPU to do
> > aggressive throttling, which causes drop in performance.
> 
> Suggest:
> 
> This results in aggressive throttling, which causes a drop in
> performance.
> 
> And:
> 
> Tough.
> I consider "performance mode" as sacrosanct, and have always
> expected these to behave identically and at max CPU freq:
> 
> intel_pstate no-hwp / performance
> intel_cpufreq no-hwp / performance  (a.k.a. passive)
> acpi_cpufreq / performance
> intel_pstate hwp / performance
> intel_cpufreq hwp / performance (in future)
> 
> as was always the case on my i7-2600K (no hwp) based computer
> and is not the case on my i5-9600K (hwp capable) computer.
> > But some value
> > between 0x80 and 0x00 results in better performance. But that value
> > can't
> > be fixed as the power curve is not linear. In some cases just
> > changing EPP
> > from 0x80 to 0x75 is enough to get significant performance gain.
> > 
> > Similarly on battery EPP 0x80 can be very aggressive in power
> > consumption.
> > But picking up the next choice "balance power" results in too much
> > loss
> > of performance, which cause bad user experience in use case like
> > "Google
> > Hangout". It was observed that some value between these two EPP is
> > optimal.
> > 
> > This change allows fine grain EPP tuning for platform like
> > Chromebooks.
> > Here based on the product and use cases, different EPP values can
> > be set.
> > This change is similar to the change done for:
> > /sys/devices/system/cpu/cpu*/power/energy_perf_bias
> > where user has choice to write a predefined string or raw value.
> > 
> > The change itself is trivial. When user preference doesn't match
> > predefined string preferences and value is an unsigned integer and
> > in
> > range, use that value for EPP. When the EPP feature is not prsent
>                                                              ^^^^^^
> s/prsent/present
> 
> > writing raw value is not supported.
> > 
> > Suggested-by: Len Brown <lenb@kernel.org>
> > Signed-off-by: Srinivas Pandruvada <
> > srinivas.pandruvada@linux.intel.com>
> > ---
> >  Documentation/admin-guide/pm/intel_pstate.rst |  6 ++-
> >  drivers/cpufreq/intel_pstate.c                | 50
> > +++++++++++++++----
> >  2 files changed, 45 insertions(+), 11 deletions(-)
> > 
> > diff --git a/Documentation/admin-guide/pm/intel_pstate.rst
> > b/Documentation/admin-
> > guide/pm/intel_pstate.rst
> > index 939bfdc53f4f..5e209926e0ed 100644
> > --- a/Documentation/admin-guide/pm/intel_pstate.rst
> > +++ b/Documentation/admin-guide/pm/intel_pstate.rst
> > @@ -561,7 +561,11 @@ somewhere between the two extremes:
> >  Strings written to the ``energy_performance_preference`` attribute
> > are
> >  internally translated to integer values written to the processor's
> >  Energy-Performance Preference (EPP) knob (if supported) or its
> > -Energy-Performance Bias (EPB) knob.
> > +Energy-Performance Bias (EPB) knob. It is also possible to write a
> > positive
> > +integer value between 0 to 255, if the EPP feature is present. If
> > the EPP
> > +feature is not present, writing integer value to this attribute is
> > not
> > +supported. In this case, user can use
> > + "/sys/devices/system/cpu/cpu*/power/energy_perf_bias" interface.
> > 
> >  [Note that tasks may by migrated from one CPU to another by the
> > scheduler's
> >  load-balancing algorithm and if different energy vs performance
> > hints are
> > diff --git a/drivers/cpufreq/intel_pstate.c
> > b/drivers/cpufreq/intel_pstate.c
> > index 1cf6d06f2314..d8f195c7a428 100644
> > --- a/drivers/cpufreq/intel_pstate.c
> > +++ b/drivers/cpufreq/intel_pstate.c
> > @@ -602,11 +602,12 @@ static const unsigned int epp_values[] = {
> >  	HWP_EPP_POWERSAVE
> >  };
> > 
> > -static int intel_pstate_get_energy_pref_index(struct cpudata
> > *cpu_data)
> > +static int intel_pstate_get_energy_pref_index(struct cpudata
> > *cpu_data, int *raw_epp)
> >  {
> >  	s16 epp;
> >  	int index = -EINVAL;
> > 
> > +	*raw_epp = 0;
> >  	epp = intel_pstate_get_epp(cpu_data, 0);
> >  	if (epp < 0)
> >  		return epp;
> > @@ -614,12 +615,14 @@ static int
> > intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
> >  	if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
> >  		if (epp == HWP_EPP_PERFORMANCE)
> >  			return 1;
> > -		if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
> > +		if (epp == HWP_EPP_BALANCE_PERFORMANCE)
> >  			return 2;
> > -		if (epp <= HWP_EPP_BALANCE_POWERSAVE)
> > +		if (epp == HWP_EPP_BALANCE_POWERSAVE)
> >  			return 3;
> > -		else
> > +		if (epp == HWP_EPP_POWERSAVE)
> >  			return 4;
> > +		*raw_epp = epp;
> > +		return 0;
> >  	} else if (boot_cpu_has(X86_FEATURE_EPB)) {
> >  		/*
> >  		 * Range:
> > @@ -638,7 +641,8 @@ static int
> > intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
> >  }
> > 
> >  static int intel_pstate_set_energy_pref_index(struct cpudata
> > *cpu_data,
> > -					      int pref_index)
> > +					      int pref_index, bool
> > use_raw,
> > +					      u32 raw_epp)
> >  {
> >  	int epp = -EINVAL;
> >  	int ret;
> > @@ -657,6 +661,16 @@ static int
> > intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
> > 
> >  		value &= ~GENMASK_ULL(31, 24);
> > 
> > +		if (use_raw) {
> > +			if (raw_epp > 255) {
> > +				ret = -EINVAL;
> > +				goto return_pref;
> > +			}
> > +			value |= (u64)raw_epp << 24;
> > +			ret = wrmsrl_on_cpu(cpu_data->cpu,
> > MSR_HWP_REQUEST, value);
> > +			goto return_pref;
> > +		}
> > +
> >  		if (epp == -EINVAL)
> >  			epp = epp_values[pref_index - 1];
> > 
> > @@ -694,6 +708,8 @@ static ssize_t
> > store_energy_performance_preference(
> >  {
> >  	struct cpudata *cpu_data = all_cpu_data[policy->cpu];
> >  	char str_preference[21];
> > +	bool raw = false;
> > +	u32 epp;
> >  	int ret;
> > 
> >  	ret = sscanf(buf, "%20s", str_preference);
> > @@ -701,10 +717,21 @@ static ssize_t
> > store_energy_performance_preference(
> >  		return -EINVAL;
> > 
> >  	ret = match_string(energy_perf_strings, -1, str_preference);
> > -	if (ret < 0)
> > +	if (ret < 0) {
> > +		if (!boot_cpu_has(X86_FEATURE_HWP_EPP))
> > +			return ret;
> > +
> > +		ret = kstrtouint(buf, 10, &epp);
> > +		if (ret)
> > +			return ret;
> > +
> > +		raw = true;
> > +	}
> > +
> > +	ret = intel_pstate_set_energy_pref_index(cpu_data, ret, raw,
> > epp);
> > +	if (ret)
> >  		return ret;
> > 
> > -	intel_pstate_set_energy_pref_index(cpu_data, ret);
> >  	return count;
> >  }
> > 
> > @@ -712,13 +739,16 @@ static ssize_t
> > show_energy_performance_preference(
> >  				struct cpufreq_policy *policy, char
> > *buf)
> >  {
> >  	struct cpudata *cpu_data = all_cpu_data[policy->cpu];
> > -	int preference;
> > +	int preference, raw_epp;
> > 
> > -	preference = intel_pstate_get_energy_pref_index(cpu_data);
> > +	preference = intel_pstate_get_energy_pref_index(cpu_data,
> > &raw_epp);
> >  	if (preference < 0)
> >  		return preference;
> > 
> > -	return  sprintf(buf, "%s\n", energy_perf_strings[preference]);
> > +	if (raw_epp)
> > +		return  sprintf(buf, "%d\n", raw_epp);
> > +	else
> > +		return  sprintf(buf, "%s\n",
> > energy_perf_strings[preference]);
> >  }
> > 
> >  cpufreq_freq_attr_rw(energy_performance_preference);
> > --
> > 2.25.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-06-24 18:02 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-23 18:27 cpufreq: intel_pstate: Support raw epp and energy_efficiency Srinivas Pandruvada
2020-06-23 18:27 ` [PATCH v2 1/2] cpufreq: intel_pstate: Allow enable/disable energy efficiency Srinivas Pandruvada
2020-06-23 18:27 ` [PATCH v2 2/2] cpufreq: intel_pstate: Allow raw energy performance preference value Srinivas Pandruvada
2020-06-24 15:37   ` Doug Smythies
2020-06-24 18:02     ` srinivas pandruvada

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).