All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
To: linux-kernel@vger.kernel.org
Cc: linux-pm@vger.kernel.org, linux@arm.linux.org.uk,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Russell King <rmk+kernel@armlinux.org.uk>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will.deacon@arm.com>,
	Juri Lelli <juri.lelli@arm.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Morten Rasmussen <morten.rasmussen@arm.com>,
	Viresh Kumar <viresh.kumar@linaro.org>,
	"Rafael J . Wysocki" <rjw@rjwysocki.net>
Subject: [PATCH v2 10/10] drivers base/arch_topology: inline cpu- and frequency-invariant accounting
Date: Thu,  6 Jul 2017 10:49:48 +0100	[thread overview]
Message-ID: <20170706094948.8779-11-dietmar.eggemann@arm.com> (raw)
In-Reply-To: <20170706094948.8779-1-dietmar.eggemann@arm.com>

To speed up the cpu- and frequency-invariant accounting of the task
scheduler make sure that the CIE (topology_get_cpu_scale()) and FIE
(topology_get_freq_scale() get completely inlined into the task
scheduler consumer functions (e.g. __update_load_avg_se()).

This patch-set changes the interface for CIE and FIE from:

drivers/base/arch_topology.c:

 static DEFINE_PER_CPU(unsigned long, item);

 unsigned long topology_get_item_scale(...)
 {
 	return per_cpu(item, cpu)
 }

include/linux/arch_topology.h:

 unsigned long topology_get_item_scale(...);

to:

drivers/base/arch_topology.c:

 DEFINE_PER_CPU(unsigned long, item);

include/linux/arch_topology.h:

 DECLARE_PER_CPU(unsigned long, item);

 static inline
 unsigned long topology_get_item_scale(...)
 {
 	return per_cpu(item, cpu)
 }

An uplift in performance could be detected running the kernel with the
following test patch on top (on JUNO R0 (arm64)):

@@ -2812,10 +2812,18 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
	unsigned long scale_freq, scale_cpu;
	u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
	u64 periods;
+       u64 t1, t2;
+
+       t1 = sched_clock_cpu(cpu);

	scale_freq = arch_scale_freq_capacity(NULL, cpu);
	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);

+       t2 = sched_clock_cpu(cpu);
+
+       trace_printk("cpu=%d t1=%llu t2=%llu diff=%llu\n",
+                    cpu, t1, t2, t2 - t1);
+
	delta += sa->period_contrib;
	periods = delta / 1024; /* A period is * 1024us * (~1ms) */

The following test results (3 test runs each) have been obtained by
tracing this trace printk (diff=x) for Cortex A-53 (LITTLE) and Cortex
A-57 (big) cpus w/ (inline) and w/o (non-inline) this patch.

         mean  max  min

A-57 inline:

	119.6  300   60
	 96.8  280   60
	110.2  660   60

A-57 non-inline:

	142.8  460   80
	157.6  680   80
	153.4  720   80

A-53 inline:

	141.6  360  100
	118.8  500  100
	148.6  380  100

A-53 non-inline:

	293    840  120
	253.2  840  120
	299.6 1060  140

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
---
 drivers/base/arch_topology.c  | 14 ++------------
 include/linux/arch_topology.h | 15 +++++++++++++--
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 63fb3f945d21..b4481cff14bf 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -22,12 +22,7 @@
 #include <linux/string.h>
 #include <linux/sched/topology.h>
 
-static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
-
-unsigned long topology_get_freq_scale(struct sched_domain *sd, int cpu)
-{
-	return per_cpu(freq_scale, cpu);
-}
+DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
 
 void topology_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 			     unsigned long max_freq)
@@ -43,12 +38,7 @@ void topology_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 
 
 static DEFINE_MUTEX(cpu_scale_mutex);
-static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
-
-unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu)
-{
-	return per_cpu(cpu_scale, cpu);
-}
+DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
 
 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
 {
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 168104d2d2cf..361e85a30151 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -11,12 +11,23 @@ void topology_normalize_cpu_scale(void);
 struct device_node;
 int topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
 
+DECLARE_PER_CPU(unsigned long, cpu_scale);
+DECLARE_PER_CPU(unsigned long, freq_scale);
+
 struct sched_domain;
-unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu);
+static inline
+unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(cpu_scale, cpu);
+}
 
 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
 
-unsigned long topology_get_freq_scale(struct sched_domain *sd, int cpu);
+static inline
+unsigned long topology_get_freq_scale(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(freq_scale, cpu);
+}
 
 void topology_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 			     unsigned long max_freq);
-- 
2.11.0

  parent reply	other threads:[~2017-07-06  9:50 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-06  9:49 [PATCH v2 00/10] arm, arm64: frequency- and cpu-invariant accounting support for task scheduler Dietmar Eggemann
2017-07-06  9:49 ` [PATCH v2 01/10] drivers base/arch_topology: free cpumask cpus_to_visit Dietmar Eggemann
2017-07-06 10:22   ` Viresh Kumar
2017-07-06 10:59     ` Juri Lelli
2017-07-06 11:15       ` Viresh Kumar
2017-07-07 15:50         ` Dietmar Eggemann
2017-07-06  9:49 ` [PATCH v2 02/10] cpufreq: provide data for frequency-invariant load-tracking support Dietmar Eggemann
2017-07-06 10:40   ` Viresh Kumar
2017-07-06 22:38     ` Rafael J. Wysocki
2017-07-07 16:01     ` Dietmar Eggemann
2017-07-07 16:18       ` Rafael J. Wysocki
2017-07-07 17:06         ` Dietmar Eggemann
2017-07-08 12:09           ` Rafael J. Wysocki
2017-07-10  6:54             ` Viresh Kumar
2017-07-10 12:46               ` Rafael J. Wysocki
2017-07-11  6:39                 ` Viresh Kumar
2017-07-11 15:21                   ` Dietmar Eggemann
2017-07-13 12:40                     ` Sudeep Holla
2017-07-13 13:08                       ` Dietmar Eggemann
2017-07-13 14:06                         ` Sudeep Holla
2017-07-10  9:30             ` Peter Zijlstra
2017-07-10  9:42               ` Viresh Kumar
2017-07-10 10:31                 ` Dietmar Eggemann
2017-07-10 12:02             ` Dietmar Eggemann
2017-07-11  6:01               ` Viresh Kumar
2017-07-11 15:06                 ` Dietmar Eggemann
2017-07-11 14:59                   ` Rafael J. Wysocki
2017-07-11 15:12                     ` Dietmar Eggemann
2017-07-12  4:09                   ` Viresh Kumar
2017-07-12  8:31                     ` Peter Zijlstra
2017-07-12  9:27                       ` Viresh Kumar
2017-07-12 11:14                         ` Peter Zijlstra
2017-07-12 23:13                           ` Rafael J. Wysocki
2017-07-13  7:49                             ` Peter Zijlstra
2017-07-13  8:48                             ` Viresh Kumar
2017-07-13 11:15                               ` Peter Zijlstra
2017-07-13 14:04                           ` Sudeep Holla
2017-07-13 14:42                             ` Peter Zijlstra
2017-07-13 15:00                               ` Sudeep Holla
2017-07-13 12:54                         ` Sudeep Holla
2017-07-13 12:49                     ` Sudeep Holla
2017-07-10  6:40       ` Viresh Kumar
2017-07-06  9:49 ` [PATCH v2 03/10] drivers base/arch_topology: " Dietmar Eggemann
2017-07-06 10:45   ` Viresh Kumar
2017-07-07 16:51     ` Dietmar Eggemann
2017-07-06  9:49 ` [PATCH v2 04/10] arm: wire cpufreq input data for frequency-invariant accounting up to the arch Dietmar Eggemann
2017-07-06 10:42   ` Viresh Kumar
2017-07-10 15:13     ` Dietmar Eggemann
2017-07-11  6:32       ` Viresh Kumar
2017-07-06  9:49 ` [PATCH v2 05/10] arm: wire frequency-invariant accounting support up to the task scheduler Dietmar Eggemann
2017-07-06 10:46   ` Viresh Kumar
2017-07-06  9:49 ` [PATCH v2 06/10] arm: wire cpu-invariant " Dietmar Eggemann
2017-07-06 10:47   ` Viresh Kumar
2017-07-06  9:49 ` [PATCH v2 07/10] arm64: wire cpufreq input data for frequency-invariant accounting up to the arch Dietmar Eggemann
2017-07-06 10:48   ` Viresh Kumar
2017-07-06  9:49 ` [PATCH v2 08/10] arm64: wire frequency-invariant accounting support up to the task scheduler Dietmar Eggemann
2017-07-06 10:48   ` Viresh Kumar
2017-07-06  9:49 ` [PATCH v2 09/10] arm64: wire cpu-invariant " Dietmar Eggemann
2017-07-06 10:49   ` Viresh Kumar
2017-07-06  9:49 ` Dietmar Eggemann [this message]
2017-07-06 10:57   ` [PATCH v2 10/10] drivers base/arch_topology: inline cpu- and frequency-invariant accounting Viresh Kumar
2017-07-10 15:17     ` Dietmar Eggemann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170706094948.8779-11-dietmar.eggemann@arm.com \
    --to=dietmar.eggemann@arm.com \
    --cc=catalin.marinas@arm.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=juri.lelli@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=linux@arm.linux.org.uk \
    --cc=morten.rasmussen@arm.com \
    --cc=peterz@infradead.org \
    --cc=rjw@rjwysocki.net \
    --cc=rmk+kernel@armlinux.org.uk \
    --cc=vincent.guittot@linaro.org \
    --cc=viresh.kumar@linaro.org \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.