Hi, This patch series cleans up the code in intel_pstate somewhat and makes some changes targeted at reducing overhead. Patches [1-9/16] are cleanups mostly getting rid of unnecessary stuff. Patches [10-11/16] make changes to reduce the overhead of utilization update callbacks used in the active mode. Patches [12-16/16] make more cleanups on top of that. Refer to the changelogs for details. The series is based on the linux-next branch of the linux-pm.git tree. Thanks, Rafael
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> After recent changes the purpose of struct perf_limits is not particularly clear any more and the code may be made somewhat easier to follow by eliminating it, so go for that. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 59 +++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 36 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -186,20 +186,6 @@ struct _pid { }; /** - * struct perf_limits - Store user and policy limits - * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct - * This value is used to limit max pstate - * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct - * This value is used to limit min pstate - * - * Storage for policy defined limits. - */ -struct perf_limits { - int32_t max_perf; - int32_t min_perf; -}; - -/** * struct global_params - Global parameters, mostly tunable via sysfs. * @no_turbo: Whether or not to use turbo P-states. * @turbo_disabled: Whethet or not turbo P-states are available at all, @@ -236,7 +222,10 @@ struct global_params { * @prev_cummulative_iowait: IO Wait time difference from last and * current sample * @sample: Storage for storing last Sample data - * @perf_limits: Capacity limits unique to this CPU + * @min_perf: Minimum capacity limit as a fraction of the maximum + * turbo P-state capacity. + * @max_perf: Maximum capacity limit as a fraction of the maximum + * turbo P-state capacity. * @acpi_perf_data: Stores ACPI perf information read from _PSS * @valid_pss_table: Set to true for valid ACPI _PSS entries found * @epp_powersave: Last saved HWP energy performance preference @@ -268,7 +257,8 @@ struct cpudata { u64 prev_tsc; u64 prev_cummulative_iowait; struct sample sample; - struct perf_limits perf_limits; + int32_t min_perf; + int32_t max_perf; #ifdef CONFIG_ACPI struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; @@ -833,7 +823,6 @@ static void intel_pstate_hwp_set(struct for_each_cpu(cpu, policy->cpus) { struct cpudata *cpu_data = all_cpu_data[cpu]; - struct perf_limits *perf_limits = &cpu_data->perf_limits; s16 epp; rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); @@ -843,11 +832,11 @@ static void intel_pstate_hwp_set(struct else hw_max = HWP_HIGHEST_PERF(cap); - max = fp_ext_toint(hw_max * perf_limits->max_perf); + max = fp_ext_toint(hw_max * cpu_data->max_perf); if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) min = max; else - min = fp_ext_toint(hw_max * perf_limits->min_perf); + min = fp_ext_toint(hw_max * cpu_data->min_perf); rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); @@ -1637,7 +1626,6 @@ static void intel_pstate_get_min_max(str int max_perf = cpu->pstate.turbo_pstate; int max_perf_adj; int min_perf; - struct perf_limits *perf_limits = &cpu->perf_limits; if (global.no_turbo || global.turbo_disabled) max_perf = cpu->pstate.max_pstate; @@ -1647,11 +1635,11 @@ static void intel_pstate_get_min_max(str * policy, or by cpu specific default values determined through * experimentation. */ - max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf); + max_perf_adj = fp_ext_toint(max_perf * cpu->max_perf); *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - min_perf = fp_ext_toint(max_perf * perf_limits->min_perf); + min_perf = fp_ext_toint(max_perf * cpu->min_perf); *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } @@ -2030,7 +2018,6 @@ static int intel_pstate_get_max_freq(str static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, struct cpudata *cpu) { - struct perf_limits *limits = &cpu->perf_limits; int max_freq = intel_pstate_get_max_freq(cpu); int32_t max_policy_perf, min_policy_perf; @@ -2046,8 +2033,8 @@ static void intel_pstate_update_perf_lim /* Normalize user input to [min_perf, max_perf] */ if (per_cpu_limits) { - limits->min_perf = min_policy_perf; - limits->max_perf = max_policy_perf; + cpu->min_perf = min_policy_perf; + cpu->max_perf = max_policy_perf; } else { int32_t global_min, global_max; @@ -2064,21 +2051,21 @@ static void intel_pstate_update_perf_lim } global_min = clamp_t(int32_t, global_min, 0, global_max); - limits->min_perf = max(min_policy_perf, global_min); - limits->min_perf = min(limits->min_perf, max_policy_perf); - limits->max_perf = min(max_policy_perf, global_max); - limits->max_perf = max(min_policy_perf, limits->max_perf); + cpu->min_perf = max(min_policy_perf, global_min); + cpu->min_perf = min(cpu->min_perf, max_policy_perf); + cpu->max_perf = min(max_policy_perf, global_max); + cpu->max_perf = max(min_policy_perf, cpu->max_perf); /* Make sure min_perf <= max_perf */ - limits->min_perf = min(limits->min_perf, limits->max_perf); + cpu->min_perf = min(cpu->min_perf, cpu->max_perf); } - limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS); - limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS); + cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS); + cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS); pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, - fp_ext_toint(limits->max_perf * 100), - fp_ext_toint(limits->min_perf * 100)); + fp_ext_toint(cpu->max_perf * 100), + fp_ext_toint(cpu->min_perf * 100)); } static int intel_pstate_set_policy(struct cpufreq_policy *policy) @@ -2181,8 +2168,8 @@ static int __intel_pstate_cpu_init(struc cpu = all_cpu_data[policy->cpu]; - cpu->perf_limits.max_perf = int_ext_tofp(1); - cpu->perf_limits.min_perf = 0; + cpu->max_perf = int_ext_tofp(1); + cpu->min_perf = 0; policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> The P-state selection algorithm used by intel_pstate for Atom processors is not based on the PID controller and the initialization of PID parametrs for those processors is pointless and confusing, so drop it. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -1540,14 +1540,6 @@ static struct cpu_defaults core_params = }; static const struct cpu_defaults silvermont_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 60, - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, - }, .funcs = { .get_max = atom_get_max_pstate, .get_max_physical = atom_get_max_pstate, @@ -1561,14 +1553,6 @@ static const struct cpu_defaults silverm }; static const struct cpu_defaults airmont_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 60, - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, - }, .funcs = { .get_max = atom_get_max_pstate, .get_max_physical = atom_get_max_pstate, @@ -1602,14 +1586,6 @@ static const struct cpu_defaults knl_par }; static const struct cpu_defaults bxt_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 60, - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, - }, .funcs = { .get_max = core_get_max_pstate, .get_max_physical = core_get_max_pstate_physical, @@ -2637,9 +2613,9 @@ static int __init intel_pstate_init(void return -ENODEV; cpu_def = (struct cpu_defaults *)id->driver_data; - - copy_pid_params(&cpu_def->pid_policy); copy_cpu_funcs(&cpu_def->funcs); + if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance) + copy_pid_params(&cpu_def->pid_policy); } if (intel_pstate_msrs_not_valid())
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Notice that both the existing struct cpu_defaults instances in which PID parameters are actually initialized use the same values of those parameters, so it is not really necessary to copy them over to pid_params dynamically. Instead, initialize pid_params statically with those values and drop the unused pid_policy member from struct cpu_defaults along with copy_pid_params() used for initializing it. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 42 +++++++++-------------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -321,19 +321,26 @@ struct pstate_funcs { /** * struct cpu_defaults- Per CPU model default config data - * @pid_policy: PID config data * @funcs: Callback function data */ struct cpu_defaults { - struct pstate_adjust_policy pid_policy; struct pstate_funcs funcs; }; static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); -static struct pstate_adjust_policy pid_params __read_mostly; static struct pstate_funcs pstate_funcs __read_mostly; +static struct pstate_adjust_policy pid_params __read_mostly = { + .sample_rate_ms = 10, + .sample_rate_ns = 10 * NSEC_PER_MSEC, + .deadband = 0, + .setpoint = 97, + .p_gain_pct = 20, + .d_gain_pct = 0, + .i_gain_pct = 0, +}; + static int hwp_active __read_mostly; static bool per_cpu_limits __read_mostly; @@ -1520,14 +1527,6 @@ static int knl_get_turbo_pstate(void) } static struct cpu_defaults core_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 97, - .p_gain_pct = 20, - .d_gain_pct = 0, - .i_gain_pct = 0, - }, .funcs = { .get_max = core_get_max_pstate, .get_max_physical = core_get_max_pstate_physical, @@ -1566,14 +1565,6 @@ static const struct cpu_defaults airmont }; static const struct cpu_defaults knl_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 97, - .p_gain_pct = 20, - .d_gain_pct = 0, - .i_gain_pct = 0, - }, .funcs = { .get_max = core_get_max_pstate, .get_max_physical = core_get_max_pstate_physical, @@ -2412,17 +2403,6 @@ static int __init intel_pstate_msrs_not_ return 0; } -static void __init copy_pid_params(struct pstate_adjust_policy *policy) -{ - pid_params.sample_rate_ms = policy->sample_rate_ms; - pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; - pid_params.p_gain_pct = policy->p_gain_pct; - pid_params.i_gain_pct = policy->i_gain_pct; - pid_params.d_gain_pct = policy->d_gain_pct; - pid_params.deadband = policy->deadband; - pid_params.setpoint = policy->setpoint; -} - #ifdef CONFIG_ACPI static void intel_pstate_use_acpi_profile(void) { @@ -2614,8 +2594,6 @@ static int __init intel_pstate_init(void cpu_def = (struct cpu_defaults *)id->driver_data; copy_cpu_funcs(&cpu_def->funcs); - if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance) - copy_pid_params(&cpu_def->pid_policy); } if (intel_pstate_msrs_not_valid())
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> There is only one caller of intel_pstate_reset_all_pid(), which is pid_param_set() used in the debugfs interface only, and having that code split does not make it particularly convenient to follow. For this reason, move the body of intel_pstate_reset_all_pid() into its caller and drop that function. Also change the loop from for_each_online_cpu() (which is obviously racy with respect to CPU offline/online) to for_each_possible_cpu(), so that all PID parameters are reset for all CPUs regardless of their online/offline status (to prevent, for example, a previously offline CPU from going online with a stale set of PID parameters). Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -576,16 +576,6 @@ static inline void intel_pstate_busy_pid pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); } -static inline void intel_pstate_reset_all_pid(void) -{ - unsigned int cpu; - - for_each_online_cpu(cpu) { - if (all_cpu_data[cpu]) - intel_pstate_busy_pid_reset(all_cpu_data[cpu]); - } -} - static inline void update_turbo_state(void) { u64 misc_en; @@ -941,9 +931,14 @@ static void intel_pstate_update_policies /************************** debugfs begin ************************/ static int pid_param_set(void *data, u64 val) { + unsigned int cpu; + *(u32 *)data = val; pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; - intel_pstate_reset_all_pid(); + for_each_possible_cpu(cpu) + if (all_cpu_data[cpu]) + intel_pstate_busy_pid_reset(all_cpu_data[cpu]); + return 0; }
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> intel_pstate_busy_pid_reset() is the only caller of pid_reset(), pid_p_gain_set(), pid_i_gain_set(), and pid_d_gain_set(). Moreover, it passes constants as two parameters of pid_reset() and all of the other routines above essentially contain the same code, so fold all of them into the caller and drop unnecessary computations. Introduce percent_fp() for converting integer values in percent to fixed-point fractions and use it in the above code cleanup. Finally, rename intel_pstate_busy_pid_reset() to intel_pstate_pid_reset() as it also is used for the initialization of PID parameters for every CPU and the meaning of the "busy" part of the name is not particularly clear. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 46 ++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 30 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -74,6 +74,11 @@ static inline int ceiling_fp(int32_t x) return ret; } +static inline int32_t percent_fp(int percent) +{ + return div_fp(percent, 100); +} + static inline u64 mul_ext_fp(u64 x, u64 y) { return (x * y) >> EXT_FRAC_BITS; @@ -507,29 +512,6 @@ static inline void intel_pstate_exit_per } #endif -static inline void pid_reset(struct _pid *pid, int setpoint, int busy, - int deadband, int integral) { - pid->setpoint = int_tofp(setpoint); - pid->deadband = int_tofp(deadband); - pid->integral = int_tofp(integral); - pid->last_err = int_tofp(setpoint) - int_tofp(busy); -} - -static inline void pid_p_gain_set(struct _pid *pid, int percent) -{ - pid->p_gain = div_fp(percent, 100); -} - -static inline void pid_i_gain_set(struct _pid *pid, int percent) -{ - pid->i_gain = div_fp(percent, 100); -} - -static inline void pid_d_gain_set(struct _pid *pid, int percent) -{ - pid->d_gain = div_fp(percent, 100); -} - static signed int pid_calc(struct _pid *pid, int32_t busy) { signed int result; @@ -567,13 +549,17 @@ static signed int pid_calc(struct _pid * return (signed int)fp_toint(result); } -static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) +static inline void intel_pstate_pid_reset(struct cpudata *cpu) { - pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); - pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); - pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); + struct _pid *pid = &cpu->pid; - pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); + pid->p_gain = percent_fp(pid_params.p_gain_pct); + pid->d_gain = percent_fp(pid_params.d_gain_pct); + pid->i_gain = percent_fp(pid_params.i_gain_pct); + pid->setpoint = int_tofp(pid_params.setpoint); + pid->last_err = pid->setpoint - int_tofp(100); + pid->deadband = int_tofp(pid_params.deadband); + pid->integral = 0; } static inline void update_turbo_state(void) @@ -937,7 +923,7 @@ static int pid_param_set(void *data, u64 pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; for_each_possible_cpu(cpu) if (all_cpu_data[cpu]) - intel_pstate_busy_pid_reset(all_cpu_data[cpu]); + intel_pstate_pid_reset(all_cpu_data[cpu]); return 0; } @@ -1931,7 +1917,7 @@ static int intel_pstate_init_cpu(unsigne intel_pstate_get_cpu_pstates(cpu); - intel_pstate_busy_pid_reset(cpu); + intel_pstate_pid_reset(cpu); pr_debug("controlling: cpu %d\n", cpunum);
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> In the HWP enabled case pid_params.sample_rate_ns only needs to be updated once, because it is global, so do that when setting hwp_active instead of doing it during the initialization of every CPU. Moreover, pid_params.sample_rate_ms is never used if HWP is enabled, so do not update it at all then. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -1911,8 +1911,6 @@ static int intel_pstate_init_cpu(unsigne intel_pstate_disable_ee(cpunum); intel_pstate_hwp_enable(cpu); - pid_params.sample_rate_ms = 50; - pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; } intel_pstate_get_cpu_pstates(cpu); @@ -2563,6 +2561,7 @@ static int __init intel_pstate_init(void } else { hwp_active++; intel_pstate.attr = hwp_cpufreq_attrs; + pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; goto hwp_cpu_matched; } } else {
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> PID controller parameters only need to be initialized if the get_target_pstate_use_performance() P-state selection routine is going to be used. It is not necessary to initialize them otherwise, so don't do that. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -1911,12 +1911,12 @@ static int intel_pstate_init_cpu(unsigne intel_pstate_disable_ee(cpunum); intel_pstate_hwp_enable(cpu); + } else if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance) { + intel_pstate_pid_reset(cpu); } intel_pstate_get_cpu_pstates(cpu); - intel_pstate_pid_reset(cpu); - pr_debug("controlling: cpu %d\n", cpunum); return 0;
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> The driver_registered variable in intel_pstate is used for checking whether or not the driver has been registered, but intel_pstate_driver can be used for that too (with the rule that the driver is not registered as long as it is NULL). That is a bit more straightforward and the code may be simplified a bit this way, so modify the driver accordingly. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 46 ++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -349,7 +349,7 @@ static struct pstate_adjust_policy pid_p static int hwp_active __read_mostly; static bool per_cpu_limits __read_mostly; -static bool driver_registered __read_mostly; +static struct cpufreq_driver *intel_pstate_driver __read_mostly; #ifdef CONFIG_ACPI static bool acpi_ppc; @@ -1035,7 +1035,7 @@ static ssize_t show_turbo_pct(struct kob mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1060,7 +1060,7 @@ static ssize_t show_num_pstates(struct k mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1080,7 +1080,7 @@ static ssize_t show_no_turbo(struct kobj mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1108,7 +1108,7 @@ static ssize_t store_no_turbo(struct kob mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1155,7 +1155,7 @@ static ssize_t store_max_perf_pct(struct mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1185,7 +1185,7 @@ static ssize_t store_min_perf_pct(struct mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -2255,7 +2255,7 @@ static struct cpufreq_driver intel_cpufr .name = "intel_cpufreq", }; -static struct cpufreq_driver *intel_pstate_driver = &intel_pstate; +static struct cpufreq_driver *default_driver = &intel_pstate; static void intel_pstate_driver_cleanup(void) { @@ -2272,15 +2272,17 @@ static void intel_pstate_driver_cleanup( } } put_online_cpus(); + intel_pstate_driver = NULL; } -static int intel_pstate_register_driver(void) +static int intel_pstate_register_driver(struct cpufreq_driver *driver) { int ret; memset(&global, 0, sizeof(global)); global.max_perf_pct = 100; + intel_pstate_driver = driver; ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { intel_pstate_driver_cleanup(); @@ -2289,10 +2291,6 @@ static int intel_pstate_register_driver( global.min_perf_pct = min_perf_pct_min(); - mutex_lock(&intel_pstate_limits_lock); - driver_registered = true; - mutex_unlock(&intel_pstate_limits_lock); - if (intel_pstate_driver == &intel_pstate && !hwp_active && pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) intel_pstate_debug_expose_params(); @@ -2309,10 +2307,6 @@ static int intel_pstate_unregister_drive pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) intel_pstate_debug_hide_params(); - mutex_lock(&intel_pstate_limits_lock); - driver_registered = false; - mutex_unlock(&intel_pstate_limits_lock); - cpufreq_unregister_driver(intel_pstate_driver); intel_pstate_driver_cleanup(); @@ -2321,7 +2315,7 @@ static int intel_pstate_unregister_drive static ssize_t intel_pstate_show_status(char *buf) { - if (!driver_registered) + if (!intel_pstate_driver) return sprintf(buf, "off\n"); return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ? @@ -2333,11 +2327,11 @@ static int intel_pstate_update_status(co int ret; if (size == 3 && !strncmp(buf, "off", size)) - return driver_registered ? + return intel_pstate_driver ? intel_pstate_unregister_driver() : -EINVAL; if (size == 6 && !strncmp(buf, "active", size)) { - if (driver_registered) { + if (intel_pstate_driver) { if (intel_pstate_driver == &intel_pstate) return 0; @@ -2346,12 +2340,11 @@ static int intel_pstate_update_status(co return ret; } - intel_pstate_driver = &intel_pstate; - return intel_pstate_register_driver(); + return intel_pstate_register_driver(&intel_pstate); } if (size == 7 && !strncmp(buf, "passive", size)) { - if (driver_registered) { + if (intel_pstate_driver) { if (intel_pstate_driver != &intel_pstate) return 0; @@ -2360,8 +2353,7 @@ static int intel_pstate_update_status(co return ret; } - intel_pstate_driver = &intel_cpufreq; - return intel_pstate_register_driver(); + return intel_pstate_register_driver(&intel_cpufreq); } return -EINVAL; @@ -2601,7 +2593,7 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); mutex_lock(&intel_pstate_driver_lock); - rc = intel_pstate_register_driver(); + rc = intel_pstate_register_driver(default_driver); mutex_unlock(&intel_pstate_driver_lock); if (rc) return rc; @@ -2622,7 +2614,7 @@ static int __init intel_pstate_setup(cha no_load = 1; } else if (!strcmp(str, "passive")) { pr_info("Passive mode enabled\n"); - intel_pstate_driver = &intel_cpufreq; + default_driver = &intel_cpufreq; no_hwp = 1; } if (!strcmp(str, "no_hwp")) {
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> One of the checks in intel_pstate_update_status() implicitly relies on the information that there are only two struct cpufreq_driver objects available, but it is better to do it directly against the value it really is about (to make the code easier to follow if nothing else). Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -2345,7 +2345,7 @@ static int intel_pstate_update_status(co if (size == 7 && !strncmp(buf, "passive", size)) { if (intel_pstate_driver) { - if (intel_pstate_driver != &intel_pstate) + if (intel_pstate_driver == &intel_cpufreq) return 0; ret = intel_pstate_unregister_driver();
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Notice that some overhead in the utilization update callbacks registered by intel_pstate in the active mode can be avoided if those callbacks are tailored to specific configurations of the driver. For example, the utilization update callback for the HWP enabled case only needs to update the average CPU performance periodically whereas the utilization update callback for the PID-based algorithm does not need to take IO-wait boosting into account and so on. With that in mind, define three utilization update callbacks for three different use cases: HWP enabled, the CPU load "powersave" P-state selection algorithm and the PID-based "powersave" P-state selection algorithm and modify the driver initialization to choose the callback matching its current configuration. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 79 ++++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 25 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -37,6 +37,9 @@ #include <asm/cpufeature.h> #include <asm/intel-family.h> +#define INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) +#define INTEL_PSTATE_HWP_SAMPLING_INTERVAL (50 * NSEC_PER_MSEC) + #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 #ifdef CONFIG_ACPI @@ -1676,7 +1679,11 @@ static inline bool intel_pstate_sample(s * that sample.time will always be reset before setting the utilization * update hook and make the caller skip the sample then. */ - return !!cpu->last_sample_time; + if (cpu->last_sample_time) { + intel_pstate_calc_avg_perf(cpu); + return true; + } + return false; } static inline int32_t get_avg_frequency(struct cpudata *cpu) @@ -1783,7 +1790,7 @@ static void intel_pstate_update_pstate(s wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); } -static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) +static void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) { int from, target_pstate; struct sample *sample; @@ -1811,36 +1818,56 @@ static inline void intel_pstate_adjust_b fp_toint(cpu->iowait_boost * 100)); } +static void intel_pstate_update_util_hwp(struct update_util_data *data, + u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; + + if ((s64)delta_ns >= INTEL_PSTATE_HWP_SAMPLING_INTERVAL) + intel_pstate_sample(cpu, time); +} + +static void intel_pstate_update_util_pid(struct update_util_data *data, + u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; + + if ((s64)delta_ns < pid_params.sample_rate_ns) + return; + + if (intel_pstate_sample(cpu, time)) + intel_pstate_adjust_busy_pstate(cpu); +} + static void intel_pstate_update_util(struct update_util_data *data, u64 time, unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns; - if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) { - if (flags & SCHED_CPUFREQ_IOWAIT) { - cpu->iowait_boost = int_tofp(1); - } else if (cpu->iowait_boost) { - /* Clear iowait_boost if the CPU may have been idle. */ - delta_ns = time - cpu->last_update; - if (delta_ns > TICK_NSEC) - cpu->iowait_boost = 0; - } - cpu->last_update = time; + if (flags & SCHED_CPUFREQ_IOWAIT) { + cpu->iowait_boost = int_tofp(1); + } else if (cpu->iowait_boost) { + /* Clear iowait_boost if the CPU may have been idle. */ + delta_ns = time - cpu->last_update; + if (delta_ns > TICK_NSEC) + cpu->iowait_boost = 0; } - + cpu->last_update = time; delta_ns = time - cpu->sample.time; - if ((s64)delta_ns >= pid_params.sample_rate_ns) { - bool sample_taken = intel_pstate_sample(cpu, time); + if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL) + return; - if (sample_taken) { - intel_pstate_calc_avg_perf(cpu); - if (!hwp_active) - intel_pstate_adjust_busy_pstate(cpu); - } - } + if (intel_pstate_sample(cpu, time)) + intel_pstate_adjust_busy_pstate(cpu); } +/* Utilization update callback to register in the active mode. */ +static void (*update_util_cb)(struct update_util_data *data, u64 time, + unsigned int flags) = intel_pstate_update_util; + #define ICPU(model, policy) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ (unsigned long)&policy } @@ -1938,8 +1965,7 @@ static void intel_pstate_set_update_util /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; - cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, - intel_pstate_update_util); + cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, update_util_cb); cpu->update_util_set = true; } @@ -2405,6 +2431,9 @@ static void __init copy_cpu_funcs(struct pstate_funcs.get_target_pstate = funcs->get_target_pstate; intel_pstate_use_acpi_profile(); + + if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance) + update_util_cb = intel_pstate_update_util_pid; } #ifdef CONFIG_ACPI @@ -2549,11 +2578,11 @@ static int __init intel_pstate_init(void if (x86_match_cpu(hwp_support_ids)) { copy_cpu_funcs(&core_params.funcs); if (no_hwp) { - pstate_funcs.get_target_pstate = get_target_pstate_use_cpu_load; + update_util_cb = intel_pstate_update_util; } else { hwp_active++; intel_pstate.attr = hwp_cpufreq_attrs; - pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; + update_util_cb = intel_pstate_update_util_hwp; goto hwp_cpu_matched; } } else {
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Avoid using extra function pointers during P-state selection by dropping the get_target_pstate member from struct pstate_funcs, adding a new update_util callback to it (to be registered with the CPU scheduler as the utilization update callback in the active mode) and reworking the utilization update callback routines to invoke specific P-state selection functions directly. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 81 +++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 38 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -311,7 +311,7 @@ struct pstate_adjust_policy { * @get_scaling: Callback to get frequency scaling factor * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms - * @get_target_pstate: Callback to a function to calculate next P state to use + * @update_util: Active mode utilization update callback. * * Core and Atom CPU models have different way to get P State limits. This * structure is used to store those callbacks. @@ -324,7 +324,8 @@ struct pstate_funcs { int (*get_scaling)(void); u64 (*get_val)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); - int32_t (*get_target_pstate)(struct cpudata *); + void (*update_util)(struct update_util_data *data, u64 time, + unsigned int flags); }; /** @@ -335,9 +336,6 @@ struct cpu_defaults { struct pstate_funcs funcs; }; -static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); -static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); - static struct pstate_funcs pstate_funcs __read_mostly; static struct pstate_adjust_policy pid_params __read_mostly = { .sample_rate_ms = 10, @@ -1510,6 +1508,11 @@ static int knl_get_turbo_pstate(void) return ret; } +static void intel_pstate_update_util_pid(struct update_util_data *data, + u64 time, unsigned int flags); +static void intel_pstate_update_util(struct update_util_data *data, u64 time, + unsigned int flags); + static struct cpu_defaults core_params = { .funcs = { .get_max = core_get_max_pstate, @@ -1518,7 +1521,7 @@ static struct cpu_defaults core_params = .get_turbo = core_get_turbo_pstate, .get_scaling = core_get_scaling, .get_val = core_get_val, - .get_target_pstate = get_target_pstate_use_performance, + .update_util = intel_pstate_update_util_pid, }, }; @@ -1531,7 +1534,7 @@ static const struct cpu_defaults silverm .get_val = atom_get_val, .get_scaling = silvermont_get_scaling, .get_vid = atom_get_vid, - .get_target_pstate = get_target_pstate_use_cpu_load, + .update_util = intel_pstate_update_util, }, }; @@ -1544,7 +1547,7 @@ static const struct cpu_defaults airmont .get_val = atom_get_val, .get_scaling = airmont_get_scaling, .get_vid = atom_get_vid, - .get_target_pstate = get_target_pstate_use_cpu_load, + .update_util = intel_pstate_update_util, }, }; @@ -1556,7 +1559,7 @@ static const struct cpu_defaults knl_par .get_turbo = knl_get_turbo_pstate, .get_scaling = core_get_scaling, .get_val = core_get_val, - .get_target_pstate = get_target_pstate_use_performance, + .update_util = intel_pstate_update_util_pid, }, }; @@ -1568,7 +1571,7 @@ static const struct cpu_defaults bxt_par .get_turbo = core_get_turbo_pstate, .get_scaling = core_get_scaling, .get_val = core_get_val, - .get_target_pstate = get_target_pstate_use_cpu_load, + .update_util = intel_pstate_update_util, }, }; @@ -1704,6 +1707,9 @@ static inline int32_t get_target_pstate_ int32_t busy_frac, boost; int target, avg_pstate; + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) + return cpu->pstate.turbo_pstate; + busy_frac = div_fp(sample->mperf, sample->tsc); boost = cpu->iowait_boost; @@ -1740,6 +1746,9 @@ static inline int32_t get_target_pstate_ int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; u64 duration_ns; + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) + return cpu->pstate.turbo_pstate; + /* * perf_scaled is the ratio of the average P-state during the last * sampling period to the P-state requested last time (in percent). @@ -1790,16 +1799,11 @@ static void intel_pstate_update_pstate(s wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); } -static void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) +static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate) { - int from, target_pstate; + int from = cpu->pstate.current_pstate; struct sample *sample; - from = cpu->pstate.current_pstate; - - target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ? - cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu); - update_turbo_state(); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); @@ -1837,8 +1841,12 @@ static void intel_pstate_update_util_pid if ((s64)delta_ns < pid_params.sample_rate_ns) return; - if (intel_pstate_sample(cpu, time)) - intel_pstate_adjust_busy_pstate(cpu); + if (intel_pstate_sample(cpu, time)) { + int target_pstate; + + target_pstate = get_target_pstate_use_performance(cpu); + intel_pstate_adjust_pstate(cpu, target_pstate); + } } static void intel_pstate_update_util(struct update_util_data *data, u64 time, @@ -1860,13 +1868,13 @@ static void intel_pstate_update_util(str if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL) return; - if (intel_pstate_sample(cpu, time)) - intel_pstate_adjust_busy_pstate(cpu); -} + if (intel_pstate_sample(cpu, time)) { + int target_pstate; -/* Utilization update callback to register in the active mode. */ -static void (*update_util_cb)(struct update_util_data *data, u64 time, - unsigned int flags) = intel_pstate_update_util; + target_pstate = get_target_pstate_use_cpu_load(cpu); + intel_pstate_adjust_pstate(cpu, target_pstate); + } +} #define ICPU(model, policy) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ @@ -1938,7 +1946,7 @@ static int intel_pstate_init_cpu(unsigne intel_pstate_disable_ee(cpunum); intel_pstate_hwp_enable(cpu); - } else if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance) { + } else if (pstate_funcs.update_util == intel_pstate_update_util_pid) { intel_pstate_pid_reset(cpu); } @@ -1965,7 +1973,8 @@ static void intel_pstate_set_update_util /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; - cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, update_util_cb); + cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, + pstate_funcs.update_util); cpu->update_util_set = true; } @@ -2318,7 +2327,7 @@ static int intel_pstate_register_driver( global.min_perf_pct = min_perf_pct_min(); if (intel_pstate_driver == &intel_pstate && !hwp_active && - pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) + pstate_funcs.update_util == intel_pstate_update_util_pid) intel_pstate_debug_expose_params(); return 0; @@ -2329,8 +2338,8 @@ static int intel_pstate_unregister_drive if (hwp_active) return -EBUSY; - if (intel_pstate_driver == &intel_pstate && !hwp_active && - pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) + if (intel_pstate_driver == &intel_pstate && + pstate_funcs.update_util == intel_pstate_update_util_pid) intel_pstate_debug_hide_params(); cpufreq_unregister_driver(intel_pstate_driver); @@ -2409,8 +2418,7 @@ static void intel_pstate_use_acpi_profil case PM_APPLIANCE_PC: case PM_DESKTOP: case PM_WORKSTATION: - pstate_funcs.get_target_pstate = - get_target_pstate_use_cpu_load; + pstate_funcs.update_util = intel_pstate_update_util; } } #else @@ -2428,12 +2436,9 @@ static void __init copy_cpu_funcs(struct pstate_funcs.get_scaling = funcs->get_scaling; pstate_funcs.get_val = funcs->get_val; pstate_funcs.get_vid = funcs->get_vid; - pstate_funcs.get_target_pstate = funcs->get_target_pstate; + pstate_funcs.update_util = funcs->update_util; intel_pstate_use_acpi_profile(); - - if (pstate_funcs.get_target_pstate == get_target_pstate_use_performance) - update_util_cb = intel_pstate_update_util_pid; } #ifdef CONFIG_ACPI @@ -2578,11 +2583,11 @@ static int __init intel_pstate_init(void if (x86_match_cpu(hwp_support_ids)) { copy_cpu_funcs(&core_params.funcs); if (no_hwp) { - update_util_cb = intel_pstate_update_util; + pstate_funcs.update_util = intel_pstate_update_util; } else { hwp_active++; intel_pstate.attr = hwp_cpufreq_attrs; - update_util_cb = intel_pstate_update_util_hwp; + pstate_funcs.update_util = intel_pstate_update_util_hwp; goto hwp_cpu_matched; } } else {
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Move the definitions of the cpu_defaults structures after the definitions of utilization update callback routines to avoid extra declarations of the latter. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 129 +++++++++++++++++++---------------------- 1 file changed, 62 insertions(+), 67 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -1508,73 +1508,6 @@ static int knl_get_turbo_pstate(void) return ret; } -static void intel_pstate_update_util_pid(struct update_util_data *data, - u64 time, unsigned int flags); -static void intel_pstate_update_util(struct update_util_data *data, u64 time, - unsigned int flags); - -static struct cpu_defaults core_params = { - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = core_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .update_util = intel_pstate_update_util_pid, - }, -}; - -static const struct cpu_defaults silvermont_params = { - .funcs = { - .get_max = atom_get_max_pstate, - .get_max_physical = atom_get_max_pstate, - .get_min = atom_get_min_pstate, - .get_turbo = atom_get_turbo_pstate, - .get_val = atom_get_val, - .get_scaling = silvermont_get_scaling, - .get_vid = atom_get_vid, - .update_util = intel_pstate_update_util, - }, -}; - -static const struct cpu_defaults airmont_params = { - .funcs = { - .get_max = atom_get_max_pstate, - .get_max_physical = atom_get_max_pstate, - .get_min = atom_get_min_pstate, - .get_turbo = atom_get_turbo_pstate, - .get_val = atom_get_val, - .get_scaling = airmont_get_scaling, - .get_vid = atom_get_vid, - .update_util = intel_pstate_update_util, - }, -}; - -static const struct cpu_defaults knl_params = { - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = knl_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .update_util = intel_pstate_update_util_pid, - }, -}; - -static const struct cpu_defaults bxt_params = { - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = core_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .update_util = intel_pstate_update_util, - }, -}; - static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) { int max_perf = cpu->pstate.turbo_pstate; @@ -1876,6 +1809,68 @@ static void intel_pstate_update_util(str } } +static struct cpu_defaults core_params = { + .funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util_pid, + }, +}; + +static const struct cpu_defaults silvermont_params = { + .funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .get_val = atom_get_val, + .get_scaling = silvermont_get_scaling, + .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, + }, +}; + +static const struct cpu_defaults airmont_params = { + .funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .get_val = atom_get_val, + .get_scaling = airmont_get_scaling, + .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, + }, +}; + +static const struct cpu_defaults knl_params = { + .funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = knl_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util_pid, + }, +}; + +static const struct cpu_defaults bxt_params = { + .funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util, + }, +}; + #define ICPU(model, policy) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ (unsigned long)&policy }
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> The cpu_defaults structure is redundant, because it only contains one member of type struct pstate_funcs which can be used directly instead of struct cpu_defaults. For this reason, drop struct cpu_defaults, use struct pstate_funcs directly instead of it where applicable and rename all of the variables of that type accordingly. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 170 ++++++++++++++++++----------------------- 1 file changed, 75 insertions(+), 95 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -328,14 +328,6 @@ struct pstate_funcs { unsigned int flags); }; -/** - * struct cpu_defaults- Per CPU model default config data - * @funcs: Callback function data - */ -struct cpu_defaults { - struct pstate_funcs funcs; -}; - static struct pstate_funcs pstate_funcs __read_mostly; static struct pstate_adjust_policy pid_params __read_mostly = { .sample_rate_ms = 10, @@ -1809,66 +1801,56 @@ static void intel_pstate_update_util(str } } -static struct cpu_defaults core_params = { - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = core_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .update_util = intel_pstate_update_util_pid, - }, -}; - -static const struct cpu_defaults silvermont_params = { - .funcs = { - .get_max = atom_get_max_pstate, - .get_max_physical = atom_get_max_pstate, - .get_min = atom_get_min_pstate, - .get_turbo = atom_get_turbo_pstate, - .get_val = atom_get_val, - .get_scaling = silvermont_get_scaling, - .get_vid = atom_get_vid, - .update_util = intel_pstate_update_util, - }, -}; - -static const struct cpu_defaults airmont_params = { - .funcs = { - .get_max = atom_get_max_pstate, - .get_max_physical = atom_get_max_pstate, - .get_min = atom_get_min_pstate, - .get_turbo = atom_get_turbo_pstate, - .get_val = atom_get_val, - .get_scaling = airmont_get_scaling, - .get_vid = atom_get_vid, - .update_util = intel_pstate_update_util, - }, -}; - -static const struct cpu_defaults knl_params = { - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = knl_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .update_util = intel_pstate_update_util_pid, - }, -}; - -static const struct cpu_defaults bxt_params = { - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = core_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .update_util = intel_pstate_update_util, - }, +static struct pstate_funcs core_funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util_pid, +}; + +static const struct pstate_funcs silvermont_funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .get_val = atom_get_val, + .get_scaling = silvermont_get_scaling, + .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, +}; + +static const struct pstate_funcs airmont_funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .get_val = atom_get_val, + .get_scaling = airmont_get_scaling, + .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, +}; + +static const struct pstate_funcs knl_funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = knl_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util_pid, +}; + +static const struct pstate_funcs bxt_funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util, }; #define ICPU(model, policy) \ @@ -1876,38 +1858,38 @@ static const struct cpu_defaults bxt_par (unsigned long)&policy } static const struct x86_cpu_id intel_pstate_cpu_ids[] = { - ICPU(INTEL_FAM6_SANDYBRIDGE, core_params), - ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_params), - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_params), - ICPU(INTEL_FAM6_IVYBRIDGE, core_params), - ICPU(INTEL_FAM6_HASWELL_CORE, core_params), - ICPU(INTEL_FAM6_BROADWELL_CORE, core_params), - ICPU(INTEL_FAM6_IVYBRIDGE_X, core_params), - ICPU(INTEL_FAM6_HASWELL_X, core_params), - ICPU(INTEL_FAM6_HASWELL_ULT, core_params), - ICPU(INTEL_FAM6_HASWELL_GT3E, core_params), - ICPU(INTEL_FAM6_BROADWELL_GT3E, core_params), - ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_params), - ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_params), - ICPU(INTEL_FAM6_BROADWELL_X, core_params), - ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params), - ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), - ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params), - ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_params), - ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params), + ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs), + ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs), + ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_funcs), + ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs), + ICPU(INTEL_FAM6_HASWELL_CORE, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_CORE, core_funcs), + ICPU(INTEL_FAM6_IVYBRIDGE_X, core_funcs), + ICPU(INTEL_FAM6_HASWELL_X, core_funcs), + ICPU(INTEL_FAM6_HASWELL_ULT, core_funcs), + ICPU(INTEL_FAM6_HASWELL_GT3E, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_GT3E, core_funcs), + ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_funcs), + ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs), + ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs), + ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), + ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { - ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), - ICPU(INTEL_FAM6_BROADWELL_X, core_params), - ICPU(INTEL_FAM6_SKYLAKE_X, core_params), + ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), {} }; static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { - ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_params), + ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_funcs), {} }; @@ -2576,7 +2558,7 @@ static int __init intel_pstate_init(void return -ENODEV; if (x86_match_cpu(hwp_support_ids)) { - copy_cpu_funcs(&core_params.funcs); + copy_cpu_funcs(&core_funcs); if (no_hwp) { pstate_funcs.update_util = intel_pstate_update_util; } else { @@ -2587,14 +2569,12 @@ static int __init intel_pstate_init(void } } else { const struct x86_cpu_id *id; - struct cpu_defaults *cpu_def; id = x86_match_cpu(intel_pstate_cpu_ids); if (!id) return -ENODEV; - cpu_def = (struct cpu_defaults *)id->driver_data; - copy_cpu_funcs(&cpu_def->funcs); + copy_cpu_funcs((struct pstate_funcs *)id->driver_data); } if (intel_pstate_msrs_not_valid())
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Add a new function pid_in_use() to return the information on whether or not the PID-based P-state selection algorithm is in use. That allows a couple of complicated conditions in the code to be reduced to simple checks against the new function's return value. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -1893,6 +1893,8 @@ static const struct x86_cpu_id intel_pst {} }; +static bool pid_in_use(void); + static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -1923,7 +1925,7 @@ static int intel_pstate_init_cpu(unsigne intel_pstate_disable_ee(cpunum); intel_pstate_hwp_enable(cpu); - } else if (pstate_funcs.update_util == intel_pstate_update_util_pid) { + } else if (pid_in_use()) { intel_pstate_pid_reset(cpu); } @@ -2269,6 +2271,12 @@ static struct cpufreq_driver intel_cpufr static struct cpufreq_driver *default_driver = &intel_pstate; +static bool pid_in_use(void) +{ + return intel_pstate_driver == &intel_pstate && + pstate_funcs.update_util == intel_pstate_update_util_pid; +} + static void intel_pstate_driver_cleanup(void) { unsigned int cpu; @@ -2303,8 +2311,7 @@ static int intel_pstate_register_driver( global.min_perf_pct = min_perf_pct_min(); - if (intel_pstate_driver == &intel_pstate && !hwp_active && - pstate_funcs.update_util == intel_pstate_update_util_pid) + if (pid_in_use()) intel_pstate_debug_expose_params(); return 0; @@ -2315,8 +2322,7 @@ static int intel_pstate_unregister_drive if (hwp_active) return -EBUSY; - if (intel_pstate_driver == &intel_pstate && - pstate_funcs.update_util == intel_pstate_update_util_pid) + if (pid_in_use()) intel_pstate_debug_hide_params(); cpufreq_unregister_driver(intel_pstate_driver);
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> intel_pstate_hwp_set() is the only function walking policy->cpus in intel_pstate. The rest of the code simply assumes one CPU per policy, including the initialization code. Therefore it doesn't make sense for intel_pstate_hwp_set() to walk policy->cpus as it is guaranteed to have only one bit set for policy->cpu. For this reason, rearrange intel_pstate_hwp_set() to take the CPU number as the argument and drop the loop over policy->cpus from it. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 132 +++++++++++++++++++---------------------- 1 file changed, 64 insertions(+), 68 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -792,84 +792,80 @@ static struct freq_attr *hwp_cpufreq_att NULL, }; -static void intel_pstate_hwp_set(struct cpufreq_policy *policy) +static void intel_pstate_hwp_set(unsigned int cpu) { - int min, hw_min, max, hw_max, cpu; + struct cpudata *cpu_data = all_cpu_data[cpu]; + int min, hw_min, max, hw_max; u64 value, cap; + s16 epp; - for_each_cpu(cpu, policy->cpus) { - struct cpudata *cpu_data = all_cpu_data[cpu]; - s16 epp; - - rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); - hw_min = HWP_LOWEST_PERF(cap); - if (global.no_turbo) - hw_max = HWP_GUARANTEED_PERF(cap); - else - hw_max = HWP_HIGHEST_PERF(cap); - - max = fp_ext_toint(hw_max * cpu_data->max_perf); - if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) - min = max; - else - min = fp_ext_toint(hw_max * cpu_data->min_perf); - - rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); + rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); + hw_min = HWP_LOWEST_PERF(cap); + if (global.no_turbo) + hw_max = HWP_GUARANTEED_PERF(cap); + else + hw_max = HWP_HIGHEST_PERF(cap); - value &= ~HWP_MIN_PERF(~0L); - value |= HWP_MIN_PERF(min); + max = fp_ext_toint(hw_max * cpu_data->max_perf); + if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) + min = max; + else + min = fp_ext_toint(hw_max * cpu_data->min_perf); - value &= ~HWP_MAX_PERF(~0L); - value |= HWP_MAX_PERF(max); + rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); - if (cpu_data->epp_policy == cpu_data->policy) - goto skip_epp; + value &= ~HWP_MIN_PERF(~0L); + value |= HWP_MIN_PERF(min); - cpu_data->epp_policy = cpu_data->policy; + value &= ~HWP_MAX_PERF(~0L); + value |= HWP_MAX_PERF(max); - if (cpu_data->epp_saved >= 0) { - epp = cpu_data->epp_saved; - cpu_data->epp_saved = -EINVAL; - goto update_epp; - } + if (cpu_data->epp_policy == cpu_data->policy) + goto skip_epp; - if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) { - epp = intel_pstate_get_epp(cpu_data, value); - cpu_data->epp_powersave = epp; - /* If EPP read was failed, then don't try to write */ - if (epp < 0) - goto skip_epp; - - - epp = 0; - } else { - /* skip setting EPP, when saved value is invalid */ - if (cpu_data->epp_powersave < 0) - goto skip_epp; - - /* - * No need to restore EPP when it is not zero. This - * means: - * - Policy is not changed - * - user has manually changed - * - Error reading EPB - */ - epp = intel_pstate_get_epp(cpu_data, value); - if (epp) - goto skip_epp; + cpu_data->epp_policy = cpu_data->policy; - epp = cpu_data->epp_powersave; - } + if (cpu_data->epp_saved >= 0) { + epp = cpu_data->epp_saved; + cpu_data->epp_saved = -EINVAL; + goto update_epp; + } + + if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) { + epp = intel_pstate_get_epp(cpu_data, value); + cpu_data->epp_powersave = epp; + /* If EPP read was failed, then don't try to write */ + if (epp < 0) + goto skip_epp; + + epp = 0; + } else { + /* skip setting EPP, when saved value is invalid */ + if (cpu_data->epp_powersave < 0) + goto skip_epp; + + /* + * No need to restore EPP when it is not zero. This + * means: + * - Policy is not changed + * - user has manually changed + * - Error reading EPB + */ + epp = intel_pstate_get_epp(cpu_data, value); + if (epp) + goto skip_epp; + + epp = cpu_data->epp_powersave; + } update_epp: - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - } else { - intel_pstate_set_epb(cpu, epp); - } -skip_epp: - wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); + if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + } else { + intel_pstate_set_epb(cpu, epp); } +skip_epp: + wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) @@ -892,7 +888,7 @@ static int intel_pstate_resume(struct cp mutex_lock(&intel_pstate_limits_lock); all_cpu_data[policy->cpu]->epp_policy = 0; - intel_pstate_hwp_set(policy); + intel_pstate_hwp_set(policy->cpu); mutex_unlock(&intel_pstate_limits_lock); @@ -2057,7 +2053,7 @@ static int intel_pstate_set_policy(struc intel_pstate_set_update_util_hook(policy->cpu); if (hwp_active) - intel_pstate_hwp_set(policy); + intel_pstate_hwp_set(policy->cpu); mutex_unlock(&intel_pstate_limits_lock);
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Some computations in intel_pstate_get_min_max() are not necessary and one of its two callers doesn't even use the full result. First off, the fixed-point value of cpu->max_perf represents a non-negative number between 0 and 1 inclusive and cpu->min_perf cannot be greater than cpu->max_perf. It is not necessary to check those conditions every time the numbers in question are used. Moreover, since intel_pstate_max_within_limits() only needs the upper boundary, it doesn't make sense to compute the lower one in there and returning min and max from intel_pstate_get_min_max() via pointers doesn't look particularly nice. For the above reasons, drop intel_pstate_get_min_max(), add a helper to get the base P-state for min/max computations and carry out them directly in the previous callers of intel_pstate_get_min_max(). Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/intel_pstate.c | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) Index: linux-pm/drivers/cpufreq/intel_pstate.c =================================================================== --- linux-pm.orig/drivers/cpufreq/intel_pstate.c +++ linux-pm/drivers/cpufreq/intel_pstate.c @@ -1496,26 +1496,10 @@ static int knl_get_turbo_pstate(void) return ret; } -static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) +static int intel_pstate_get_base_pstate(struct cpudata *cpu) { - int max_perf = cpu->pstate.turbo_pstate; - int max_perf_adj; - int min_perf; - - if (global.no_turbo || global.turbo_disabled) - max_perf = cpu->pstate.max_pstate; - - /* - * performance can be limited by user through sysfs, by cpufreq - * policy, or by cpu specific default values determined through - * experimentation. - */ - max_perf_adj = fp_ext_toint(max_perf * cpu->max_perf); - *max = clamp_t(int, max_perf_adj, - cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - - min_perf = fp_ext_toint(max_perf * cpu->min_perf); - *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); + return global.no_turbo || global.turbo_disabled ? + cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; } static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) @@ -1538,11 +1522,13 @@ static void intel_pstate_set_min_pstate( static void intel_pstate_max_within_limits(struct cpudata *cpu) { - int min_pstate, max_pstate; + int pstate; update_turbo_state(); - intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate); - intel_pstate_set_pstate(cpu, max_pstate); + pstate = intel_pstate_get_base_pstate(cpu); + pstate = max(cpu->pstate.min_pstate, + fp_ext_toint(pstate * cpu->max_perf)); + intel_pstate_set_pstate(cpu, pstate); } static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) @@ -1704,11 +1690,13 @@ static inline int32_t get_target_pstate_ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) { - int max_perf, min_perf; + int max_pstate = intel_pstate_get_base_pstate(cpu); + int min_pstate; - intel_pstate_get_min_max(cpu, &min_perf, &max_perf); - pstate = clamp_t(int, pstate, min_perf, max_perf); - return pstate; + min_pstate = max(cpu->pstate.min_pstate, + fp_ext_toint(max_pstate * cpu->min_perf)); + max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf)); + return clamp_t(int, pstate, min_pstate, max_pstate); } static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)