linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Rafael J. Wysocki" <rjw@rjwysocki.net>
To: Linux PM <linux-pm@vger.kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>,
	Viresh Kumar <viresh.kumar@linaro.org>,
	Juri Lelli <juri.lelli@arm.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Patrick Bellasi <patrick.bellasi@arm.com>,
	Joel Fernandes <joelaf@google.com>,
	Morten Rasmussen <morten.rasmussen@arm.com>,
	Ingo Molnar <mingo@redhat.com>
Subject: [RFC][PATCH 2/2] cpufreq: schedutil: Force max frequency on busy CPUs
Date: Sun, 19 Mar 2017 14:34:32 +0100	[thread overview]
Message-ID: <2185243.flNrap3qq1@aspire.rjw.lan> (raw)
In-Reply-To: <4366682.tsferJN35u@aspire.rjw.lan>

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

The PELT metric used by the schedutil governor underestimates the
CPU utilization in some cases.  The reason for that may be time spent
in interrupt handlers and similar which is not accounted for by PELT.

That can be easily demonstrated by running kernel compilation on
a Sandy Bridge Intel processor, running turbostat in parallel with
it and looking at the values written to the MSR_IA32_PERF_CTL
register.  Namely, the expected result would be that when all CPUs
were 100% busy, all of them would be requested to run in the maximum
P-state, but observation shows that this clearly isn't the case.
The CPUs run in the maximum P-state for a while and then are
requested to run slower and go back to the maximum P-state after
a while again.  That causes the actual frequency of the processor to
visibly oscillate below the sustainable maximum in a jittery fashion
which clearly is not desirable.

To work around this issue use the observation that, from the
schedutil governor's perspective, CPUs that are never idle should
always run at the maximum frequency and make that happen.

To that end, add a counter of idle calls to struct sugov_cpu and
modify cpuidle_idle_call() to increment that counter every time it
is about to put the given CPU into an idle state.  Next, make the
schedutil governor look at that counter for the current CPU every
time before it is about to start heavy computations.  If the counter
has not changed for over SUGOV_BUSY_THRESHOLD time (equal to 50 ms),
the CPU has not been idle for at least that long and the governor
will choose the maximum frequency for it without looking at the PELT
metric at all.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/sched/cpufreq.h    |    6 ++++++
 kernel/sched/cpufreq_schedutil.c |   38 ++++++++++++++++++++++++++++++++++++--
 kernel/sched/idle.c              |    3 +++
 3 files changed, 45 insertions(+), 2 deletions(-)

Index: linux-pm/kernel/sched/cpufreq_schedutil.c
===================================================================
--- linux-pm.orig/kernel/sched/cpufreq_schedutil.c
+++ linux-pm/kernel/sched/cpufreq_schedutil.c
@@ -20,6 +20,7 @@
 #include "sched.h"
 
 #define SUGOV_KTHREAD_PRIORITY	50
+#define SUGOV_BUSY_THRESHOLD	(50 * NSEC_PER_MSEC)
 
 struct sugov_tunables {
 	struct gov_attr_set attr_set;
@@ -55,6 +56,9 @@ struct sugov_cpu {
 
 	unsigned long iowait_boost;
 	unsigned long iowait_boost_max;
+	unsigned long idle_calls;
+	unsigned long saved_idle_calls;
+	u64 busy_start;
 	u64 last_update;
 
 	/* The fields below are only needed when sharing a policy. */
@@ -192,6 +196,34 @@ static void sugov_iowait_boost(struct su
 	sg_cpu->iowait_boost >>= 1;
 }
 
+void cpufreq_schedutil_idle_call(void)
+{
+	struct sugov_cpu *sg_cpu = this_cpu_ptr(&sugov_cpu);
+
+	sg_cpu->idle_calls++;
+}
+
+static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
+{
+	if (sg_cpu->idle_calls != sg_cpu->saved_idle_calls) {
+		sg_cpu->busy_start = 0;
+		return false;
+	}
+
+	if (!sg_cpu->busy_start) {
+		sg_cpu->busy_start = sg_cpu->last_update;
+		return false;
+	}
+
+	return sg_cpu->last_update - sg_cpu->busy_start > SUGOV_BUSY_THRESHOLD;
+}
+
+static void sugov_save_idle_calls(struct sugov_cpu *sg_cpu)
+{
+	if (!sg_cpu->busy_start)
+		sg_cpu->saved_idle_calls = sg_cpu->idle_calls;
+}
+
 static void sugov_update_single(struct update_util_data *hook, u64 time,
 				unsigned int flags)
 {
@@ -207,7 +239,7 @@ static void sugov_update_single(struct u
 	if (!sugov_should_update_freq(sg_policy, time))
 		return;
 
-	if (flags & SCHED_CPUFREQ_RT_DL) {
+	if ((flags & SCHED_CPUFREQ_RT_DL) || sugov_cpu_is_busy(sg_cpu)) {
 		next_f = policy->cpuinfo.max_freq;
 	} else {
 		sugov_get_util(&util, &max);
@@ -215,6 +247,7 @@ static void sugov_update_single(struct u
 		next_f = get_next_freq(sg_policy, util, max);
 	}
 	sugov_update_commit(sg_policy, time, next_f);
+	sugov_save_idle_calls(sg_cpu);
 }
 
 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
@@ -278,12 +311,13 @@ static void sugov_update_shared(struct u
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		if (flags & SCHED_CPUFREQ_RT_DL)
+		if ((flags & SCHED_CPUFREQ_RT_DL) || sugov_cpu_is_busy(sg_cpu))
 			next_f = sg_policy->policy->cpuinfo.max_freq;
 		else
 			next_f = sugov_next_freq_shared(sg_cpu);
 
 		sugov_update_commit(sg_policy, time, next_f);
+		sugov_save_idle_calls(sg_cpu);
 	}
 
 	raw_spin_unlock(&sg_policy->update_lock);
Index: linux-pm/kernel/sched/idle.c
===================================================================
--- linux-pm.orig/kernel/sched/idle.c
+++ linux-pm/kernel/sched/idle.c
@@ -151,6 +151,9 @@ static void cpuidle_idle_call(void)
 	 */
 	rcu_idle_enter();
 
+	/* Notify the schedutil cpufreq governor that we are entering idle. */
+	cpufreq_schedutil_idle_call();
+
 	if (cpuidle_not_available(drv, dev)) {
 		default_idle_call();
 		goto exit_idle;
Index: linux-pm/include/linux/sched/cpufreq.h
===================================================================
--- linux-pm.orig/include/linux/sched/cpufreq.h
+++ linux-pm/include/linux/sched/cpufreq.h
@@ -24,4 +24,10 @@ void cpufreq_add_update_util_hook(int cp
 void cpufreq_remove_update_util_hook(int cpu);
 #endif /* CONFIG_CPU_FREQ */
 
+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
+extern void cpufreq_schedutil_idle_call(void);
+#else
+static inline void cpufreq_schedutil_idle_call(void) {}
+#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
+
 #endif /* _LINUX_SCHED_CPUFREQ_H */

  parent reply	other threads:[~2017-03-19 13:41 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-19 13:21 [PATCH 0/2] cpufreq: schedutil: Fix and optimization Rafael J. Wysocki
2017-03-19 13:30 ` [PATCH 1/2] cpufreq: schedutil: Fix per-CPU structure initialization in sugov_start() Rafael J. Wysocki
2017-03-20  3:28   ` Viresh Kumar
2017-03-20 12:36     ` Rafael J. Wysocki
2017-03-19 13:34 ` Rafael J. Wysocki [this message]
2017-03-19 21:24   ` [RFC][PATCH 2/2] cpufreq: schedutil: Force max frequency on busy CPUs Rafael J. Wysocki
2017-03-19 21:42     ` Rafael J. Wysocki
2017-03-20 10:38     ` Peter Zijlstra
2017-03-20 12:31       ` Rafael J. Wysocki
2017-03-20  3:57   ` Viresh Kumar
2017-03-20  8:26     ` Vincent Guittot
2017-03-20 12:34       ` Patrick Bellasi
2017-03-22 23:56         ` Joel Fernandes
2017-03-23 22:08           ` Vincent Guittot
2017-03-25  3:48             ` Joel Fernandes
2017-03-27  6:59               ` Vincent Guittot
2017-03-20 12:59       ` Rafael J. Wysocki
2017-03-20 13:20         ` Vincent Guittot
2017-03-20 12:48     ` Rafael J. Wysocki
2017-03-20 10:36   ` Peter Zijlstra
2017-03-20 12:35     ` Rafael J. Wysocki
2017-03-20 12:50       ` Peter Zijlstra
2017-03-20 13:04         ` Rafael J. Wysocki
2017-03-20 13:06         ` Patrick Bellasi
2017-03-20 13:05           ` Rafael J. Wysocki
2017-03-20 14:13             ` Patrick Bellasi
2017-03-20 21:46   ` [RFC][PATCH v2 2/2] cpufreq: schedutil: Avoid decreasing frequency of " Rafael J. Wysocki
2017-03-21  6:40     ` Viresh Kumar
2017-03-21 12:30       ` Rafael J. Wysocki
2017-03-21  8:50     ` Vincent Guittot
2017-03-21 11:56       ` Patrick Bellasi
2017-03-21 13:22       ` Peter Zijlstra
2017-03-21 13:37         ` Vincent Guittot
2017-03-21 14:03           ` Peter Zijlstra
2017-03-21 14:18             ` Vincent Guittot
2017-03-21 14:25             ` Patrick Bellasi
     [not found]             ` <CAKfTPtALorn7HNpz4LOfWWSc3u+9y5iHB5byzfTHGQXDA+tVJQ@mail.gmail.com>
2017-03-21 14:58               ` Peter Zijlstra
2017-03-21 17:00                 ` Vincent Guittot
2017-03-21 17:01                   ` Vincent Guittot
2017-03-21 14:26           ` Rafael J. Wysocki
2017-03-21 14:38             ` Patrick Bellasi
2017-03-21 14:46               ` Rafael J. Wysocki
2017-03-21 14:50                 ` Rafael J. Wysocki
2017-03-21 15:04                 ` Peter Zijlstra
2017-03-21 15:18                   ` Rafael J. Wysocki
2017-03-21 17:00                     ` Peter Zijlstra
2017-03-21 17:17                       ` Rafael J. Wysocki
2017-03-21 15:08                 ` Patrick Bellasi
2017-03-21 15:18                   ` Peter Zijlstra
2017-03-21 19:28                     ` Patrick Bellasi
2017-03-21 15:02             ` Peter Zijlstra
2017-03-21 11:50     ` Patrick Bellasi
2017-03-21 23:08     ` [RFC][PATCH v3 2/2] cpufreq: schedutil: Avoid reducing frequency of busy CPUs prematurely Rafael J. Wysocki
2017-03-22  9:26       ` Peter Zijlstra
2017-03-22  9:54       ` Viresh Kumar
2017-03-23  1:04       ` Joel Fernandes
2017-03-23 19:26       ` Sai Gurrappadi
2017-03-23 20:48         ` Sai Gurrappadi
2017-03-24  1:39         ` Rafael J. Wysocki
2017-03-24 19:08           ` Sai Gurrappadi
2017-03-25  1:14       ` Sai Gurrappadi
2017-03-25  1:39         ` Rafael J. Wysocki
2017-03-27  7:04         ` Vincent Guittot
2017-03-27 21:01           ` Sai Gurrappadi
2017-03-27 21:11             ` Rafael J. Wysocki
2017-05-08  3:49       ` Wanpeng Li
2017-05-08  4:01         ` Viresh Kumar
2017-05-08  5:15           ` Wanpeng Li
2017-05-08 22:16           ` Rafael J. Wysocki
2017-05-08 22:36             ` Wanpeng Li
2017-05-08 23:01               ` Rafael J. Wysocki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2185243.flNrap3qq1@aspire.rjw.lan \
    --to=rjw@rjwysocki.net \
    --cc=joelaf@google.com \
    --cc=juri.lelli@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=morten.rasmussen@arm.com \
    --cc=patrick.bellasi@arm.com \
    --cc=peterz@infradead.org \
    --cc=srinivas.pandruvada@linux.intel.com \
    --cc=vincent.guittot@linaro.org \
    --cc=viresh.kumar@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).