From: "Rafael J. Wysocki" <rjw@rjwysocki.net>
To: Francisco Jerez <currojerez@riseup.net>
Cc: Peter Zijlstra <peterz@infradead.org>,
intel-gfx@lists.freedesktop.org, "Pandruvada,
Srinivas" <srinivas.pandruvada@intel.com>,
linux-pm@vger.kernel.org
Subject: Re: [Intel-gfx] [PATCH 05/10] cpufreq: intel_pstate: Implement VLP controller statistics and status calculation.
Date: Thu, 19 Mar 2020 12:06:02 +0100 [thread overview]
Message-ID: <3789314.5IfZyTANZo@kreacher> (raw)
In-Reply-To: <20200310214203.26459-6-currojerez@riseup.net>
On Tuesday, March 10, 2020 10:41:58 PM CET Francisco Jerez wrote:
> The goal of the helper code introduced here is to compute two
> informational data structures: struct vlp_input_stats aggregating
> various scheduling and PM statistics gathered in every call of the
> update_util() hook, and struct vlp_status_sample which contains status
> information derived from the former indicating whether the system is
> likely to have an IO or CPU bottleneck. This will be used as main
> heuristic input by the new variably low-pass filtering controller (AKA
> VLP)
I'm not sure how widely used this is.
It would be good to provide a pointer to a definition of it where all of
the maths is described and the foundation of it is explained. Alternatively,
document it in the kernel source.
> that will assist the HWP at finding a reasonably energy-efficient
> P-state given the additional information available to the kernel about
> I/O utilization and scheduling behavior.
>
> Signed-off-by: Francisco Jerez <currojerez@riseup.net>
> ---
> drivers/cpufreq/intel_pstate.c | 230 +++++++++++++++++++++++++++++++++
> 1 file changed, 230 insertions(+)
>
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 8cb5bf419b40..12ee350db2a9 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -19,6 +19,7 @@
> #include <linux/list.h>
> #include <linux/cpu.h>
> #include <linux/cpufreq.h>
> +#include <linux/debugfs.h>
> #include <linux/sysfs.h>
> #include <linux/types.h>
> #include <linux/fs.h>
> @@ -33,6 +34,8 @@
> #include <asm/cpufeature.h>
> #include <asm/intel-family.h>
>
> +#include "../../kernel/sched/sched.h"
> +
> #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)
>
> #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000
> @@ -59,6 +62,11 @@ static inline int32_t mul_fp(int32_t x, int32_t y)
> return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
> }
>
> +static inline int rnd_fp(int32_t x)
What does md stand for?
> +{
> + return (x + (1 << (FRAC_BITS - 1))) >> FRAC_BITS;
> +}
> +
> static inline int32_t div_fp(s64 x, s64 y)
> {
> return div64_s64((int64_t)x << FRAC_BITS, y);
> @@ -169,6 +177,49 @@ struct vid_data {
> int32_t ratio;
> };
>
> +/**
> + * Scheduling and PM statistics gathered by update_vlp_sample() at
> + * every call of the VLP update_state() hook, used as heuristic
> + * inputs.
> + */
> +struct vlp_input_stats {
> + int32_t realtime_count;
> + int32_t io_wait_count;
> + uint32_t max_response_frequency_hz;
> + uint32_t last_response_frequency_hz;
> +};
> +
> +enum vlp_status {
> + VLP_BOTTLENECK_IO = 1 << 0,
> + /*
> + * XXX - Add other status bits here indicating a CPU or TDP
> + * bottleneck.
> + */
> +};
> +
> +/**
> + * Heuristic status information calculated by get_vlp_status_sample()
> + * from struct vlp_input_stats above, indicating whether the system
> + * has a potential IO or latency bottleneck.
> + */
> +struct vlp_status_sample {
> + enum vlp_status value;
> + int32_t realtime_avg;
> +};
> +
> +/**
> + * struct vlp_data - VLP controller parameters and state.
> + * @sample_interval_ns: Update interval in ns.
> + * @sample_frequency_hz: Reciprocal of the update interval in Hz.
> + */
> +struct vlp_data {
> + s64 sample_interval_ns;
> + int32_t sample_frequency_hz;
> +
> + struct vlp_input_stats stats;
> + struct vlp_status_sample status;
> +};
> +
> /**
> * struct global_params - Global parameters, mostly tunable via sysfs.
> * @no_turbo: Whether or not to use turbo P-states.
> @@ -239,6 +290,7 @@ struct cpudata {
>
> struct pstate_data pstate;
> struct vid_data vid;
> + struct vlp_data vlp;
>
> u64 last_update;
> u64 last_sample_time;
> @@ -268,6 +320,18 @@ struct cpudata {
>
> static struct cpudata **all_cpu_data;
>
> +/**
> + * struct vlp_params - VLP controller static configuration
> + * @sample_interval_ms: Update interval in ms.
> + * @avg*_hz: Exponential averaging frequencies of the various
> + * low-pass filters as an integer in Hz.
> + */
> +struct vlp_params {
> + int sample_interval_ms;
> + int avg_hz;
> + int debug;
> +};
> +
> /**
> * struct pstate_funcs - Per CPU model specific callbacks
> * @get_max: Callback to get maximum non turbo effective P state
> @@ -296,6 +360,11 @@ struct pstate_funcs {
> };
>
> static struct pstate_funcs pstate_funcs __read_mostly;
> +static struct vlp_params vlp_params __read_mostly = {
> + .sample_interval_ms = 10,
> + .avg_hz = 2,
> + .debug = 0,
> +};
>
> static int hwp_active __read_mostly;
> static int hwp_mode_bdw __read_mostly;
> @@ -1793,6 +1862,167 @@ static inline int32_t get_target_pstate(struct cpudata *cpu)
> return target;
> }
>
> +/**
> + * Initialize the struct vlp_data of the specified CPU to the defaults
> + * calculated from @vlp_params.
> + */
Nit: All of the function header comments need to be in the canonical kerneldoc
format, ie. with arguments listed etc.
> +static void intel_pstate_reset_vlp(struct cpudata *cpu)
> +{
> + struct vlp_data *vlp = &cpu->vlp;
> +
> + vlp->sample_interval_ns = vlp_params.sample_interval_ms * NSEC_PER_MSEC;
> + vlp->sample_frequency_hz = max(1u, (uint32_t)MSEC_PER_SEC /
> + vlp_params.sample_interval_ms);
> + vlp->stats.last_response_frequency_hz = vlp_params.avg_hz;
> +}
> +
> +/**
> + * Fixed point representation with twice the usual number of
> + * fractional bits.
> + */
> +#define DFRAC_BITS 16
> +#define DFRAC_ONE (1 << DFRAC_BITS)
> +#define DFRAC_MAX_INT (0u - (uint32_t)DFRAC_ONE)
> +
> +/**
> + * Fast but rather inaccurate piecewise-linear approximation of a
> + * fixed-point inverse exponential:
> + *
> + * exp2n(p) = int_tofp(1) * 2 ^ (-p / DFRAC_ONE) + O(1)
> + *
> + * The error term should be lower in magnitude than 0.044.
> + */
> +static int32_t exp2n(uint32_t p)
> +{
> + if (p < 32 * DFRAC_ONE) {
> + /* Interpolate between 2^-floor(p) and 2^-ceil(p). */
> + const uint32_t floor_p = p >> DFRAC_BITS;
> + const uint32_t ceil_p = (p + DFRAC_ONE - 1) >> DFRAC_BITS;
> + const uint64_t frac_p = p - (floor_p << DFRAC_BITS);
> +
> + return ((int_tofp(1) >> floor_p) * (DFRAC_ONE - frac_p) +
> + (ceil_p >= 32 ? 0 : int_tofp(1) >> ceil_p) * frac_p) >>
> + DFRAC_BITS;
> + }
> +
> + /* Short-circuit to avoid overflow. */
> + return 0;
> +}
> +
> +/**
> + * Calculate the exponential averaging weight for a new sample based
> + * on the requested averaging frequency @hz and the delay since the
> + * last update.
> + */
> +static int32_t get_last_sample_avg_weight(struct cpudata *cpu, unsigned int hz)
> +{
> + /*
> + * Approximate, but saves several 64-bit integer divisions
> + * below and should be fully evaluated at compile-time.
> + * Causes the exponential averaging to have an effective base
> + * of 1.90702343749, which has little functional implications
> + * as long as the hz parameter is scaled accordingly.
> + */
> + const uint32_t ns_per_s_shift = order_base_2(NSEC_PER_SEC);
> + const uint64_t delta_ns = cpu->sample.time - cpu->last_sample_time;
> +
> + return exp2n(min((uint64_t)DFRAC_MAX_INT,
> + (hz * delta_ns) >> (ns_per_s_shift - DFRAC_BITS)));
> +}
> +
> +/**
> + * Calculate some status information heuristically based on the struct
> + * vlp_input_stats statistics gathered by the update_state() hook.
> + */
> +static const struct vlp_status_sample *get_vlp_status_sample(
> + struct cpudata *cpu, const int32_t po)
> +{
> + struct vlp_data *vlp = &cpu->vlp;
> + struct vlp_input_stats *stats = &vlp->stats;
> + struct vlp_status_sample *last_status = &vlp->status;
> +
> + /*
> + * Calculate the VLP_BOTTLENECK_IO state bit, which indicates
> + * whether some IO device driver has requested a PM response
> + * frequency bound, typically due to the device being under
> + * close to full utilization, which should cause the
> + * controller to make a more conservative trade-off between
> + * latency and energy usage, since performance isn't
> + * guaranteed to scale further with increasing CPU frequency
> + * whenever the system is close to IO-bound.
> + *
> + * Note that the maximum achievable response frequency is
> + * limited by the sampling frequency of the controller,
> + * response frequency requests greater than that will be
> + * promoted to infinity (i.e. no low-pass filtering) in order
> + * to avoid violating the response frequency constraint
> + * provided via PM QoS.
> + */
> + const bool bottleneck_io = stats->max_response_frequency_hz <
> + vlp->sample_frequency_hz;
> +
> + /*
> + * Calculate the realtime statistic that tracks the
> + * exponentially-averaged rate of occurrence of
> + * latency-sensitive events (like wake-ups from IO wait).
> + */
> + const uint64_t delta_ns = cpu->sample.time - cpu->last_sample_time;
> + const int32_t realtime_sample =
> + div_fp((uint64_t)(stats->realtime_count +
> + (bottleneck_io ? 0 : stats->io_wait_count)) *
> + NSEC_PER_SEC,
> + 100 * delta_ns);
> + const int32_t alpha = get_last_sample_avg_weight(cpu,
> + vlp_params.avg_hz);
> + const int32_t realtime_avg = realtime_sample +
> + mul_fp(alpha, last_status->realtime_avg - realtime_sample);
> +
> + /* Consume the input statistics. */
> + stats->io_wait_count = 0;
> + stats->realtime_count = 0;
> + if (bottleneck_io)
> + stats->last_response_frequency_hz =
> + stats->max_response_frequency_hz;
> + stats->max_response_frequency_hz = 0;
> +
> + /* Update the state of the controller. */
> + last_status->realtime_avg = realtime_avg;
> + last_status->value = (bottleneck_io ? VLP_BOTTLENECK_IO : 0);
> +
> + /* Update state used for tracing. */
> + cpu->sample.busy_scaled = int_tofp(stats->max_response_frequency_hz);
> + cpu->iowait_boost = realtime_avg;
> +
> + return last_status;
> +}
> +
> +/**
> + * Collect some scheduling and PM statistics in response to an
> + * update_state() call.
> + */
> +static bool update_vlp_sample(struct cpudata *cpu, u64 time, unsigned int flags)
> +{
> + struct vlp_input_stats *stats = &cpu->vlp.stats;
> +
> + /* Update PM QoS request. */
> + const uint32_t resp_hz = cpu_response_frequency_qos_limit();
> +
> + stats->max_response_frequency_hz = !resp_hz ? UINT_MAX :
> + max(stats->max_response_frequency_hz, resp_hz);
> +
> + /* Update scheduling statistics. */
> + if ((flags & SCHED_CPUFREQ_IOWAIT))
> + stats->io_wait_count++;
> +
> + if (cpu_rq(cpu->cpu)->rt.rt_nr_running)
> + stats->realtime_count++;
> +
> + /* Return whether a P-state update is due. */
> + return smp_processor_id() == cpu->cpu &&
> + time - cpu->sample.time >= cpu->vlp.sample_interval_ns &&
> + intel_pstate_sample(cpu, time);
> +}
> +
> static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
> {
> int min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2020-03-19 11:06 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-10 21:41 [Intel-gfx] [RFC] GPU-bound energy efficiency improvements for the intel_pstate driver (v2) Francisco Jerez
2020-03-10 21:41 ` [Intel-gfx] [PATCH 01/10] PM: QoS: Add CPU_RESPONSE_FREQUENCY global PM QoS limit Francisco Jerez
2020-03-11 12:42 ` Peter Zijlstra
2020-03-11 19:23 ` Francisco Jerez
2020-03-11 19:23 ` [Intel-gfx] [PATCHv2 " Francisco Jerez
2020-03-19 10:25 ` Rafael J. Wysocki
2020-03-10 21:41 ` [Intel-gfx] [PATCH 02/10] drm/i915: Adjust PM QoS response frequency based on GPU load Francisco Jerez
2020-03-10 22:26 ` Chris Wilson
2020-03-11 0:34 ` Francisco Jerez
2020-03-18 19:42 ` Francisco Jerez
2020-03-20 2:46 ` Francisco Jerez
2020-03-20 10:06 ` Chris Wilson
2020-03-11 10:00 ` Tvrtko Ursulin
2020-03-11 10:21 ` Chris Wilson
2020-03-11 19:54 ` Francisco Jerez
2020-03-12 11:52 ` Tvrtko Ursulin
2020-03-13 7:39 ` Francisco Jerez
2020-03-16 20:54 ` Francisco Jerez
2020-03-10 21:41 ` [Intel-gfx] [PATCH 03/10] OPTIONAL: drm/i915: Expose PM QoS control parameters via debugfs Francisco Jerez
2020-03-10 21:41 ` [Intel-gfx] [PATCH 04/10] Revert "cpufreq: intel_pstate: Drop ->update_util from pstate_funcs" Francisco Jerez
2020-03-19 10:45 ` Rafael J. Wysocki
2020-03-10 21:41 ` [Intel-gfx] [PATCH 05/10] cpufreq: intel_pstate: Implement VLP controller statistics and status calculation Francisco Jerez
2020-03-19 11:06 ` Rafael J. Wysocki [this message]
2020-03-10 21:41 ` [Intel-gfx] [PATCH 06/10] cpufreq: intel_pstate: Implement VLP controller target P-state range estimation Francisco Jerez
2020-03-19 11:12 ` Rafael J. Wysocki
2020-03-10 21:42 ` [Intel-gfx] [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP parts Francisco Jerez
2020-03-17 23:59 ` Pandruvada, Srinivas
2020-03-18 19:51 ` Francisco Jerez
2020-03-18 20:10 ` Pandruvada, Srinivas
2020-03-18 20:22 ` Francisco Jerez
2020-03-23 20:13 ` Pandruvada, Srinivas
2020-03-10 21:42 ` [Intel-gfx] [PATCH 08/10] cpufreq: intel_pstate: Enable VLP controller based on ACPI FADT profile and CPUID Francisco Jerez
2020-03-19 11:20 ` Rafael J. Wysocki
2020-03-10 21:42 ` [Intel-gfx] [PATCH 09/10] OPTIONAL: cpufreq: intel_pstate: Add tracing of VLP controller status Francisco Jerez
2020-03-10 21:42 ` [Intel-gfx] [PATCH 10/10] OPTIONAL: cpufreq: intel_pstate: Expose VLP controller parameters via debugfs Francisco Jerez
2020-03-11 2:35 ` [Intel-gfx] [RFC] GPU-bound energy efficiency improvements for the intel_pstate driver (v2) Pandruvada, Srinivas
2020-03-11 3:55 ` Francisco Jerez
2020-03-11 4:25 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork
2020-03-12 2:31 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for GPU-bound energy efficiency improvements for the intel_pstate driver (v2). (rev2) Patchwork
2020-03-12 2:32 ` Patchwork
2020-03-23 23:29 ` [Intel-gfx] [RFC] GPU-bound energy efficiency improvements for the intel_pstate driver (v2) Pandruvada, Srinivas
2020-03-24 0:23 ` Francisco Jerez
2020-03-24 19:16 ` Francisco Jerez
2020-03-24 20:03 ` Pandruvada, Srinivas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3789314.5IfZyTANZo@kreacher \
--to=rjw@rjwysocki.net \
--cc=currojerez@riseup.net \
--cc=intel-gfx@lists.freedesktop.org \
--cc=linux-pm@vger.kernel.org \
--cc=peterz@infradead.org \
--cc=srinivas.pandruvada@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).