LKML Archive on lore.kernel.org
 help / Atom feed
From: Quentin Perret <quentin.perret@arm.com>
To: peterz@infradead.org, rjw@rjwysocki.net,
	linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org
Cc: gregkh@linuxfoundation.org, mingo@redhat.com,
	dietmar.eggemann@arm.com, morten.rasmussen@arm.com,
	chris.redpath@arm.com, patrick.bellasi@arm.com,
	valentin.schneider@arm.com, vincent.guittot@linaro.org,
	thara.gopinath@linaro.org, viresh.kumar@linaro.org,
	tkjos@google.com, joel@joelfernandes.org, smuckle@google.com,
	adharmap@quicinc.com, skannan@quicinc.com,
	pkondeti@codeaurora.org, juri.lelli@redhat.com,
	edubezval@gmail.com, srinivas.pandruvada@linux.intel.com,
	currojerez@riseup.net, javi.merino@kernel.org,
	quentin.perret@arm.com
Subject: [PATCH v5 05/14] sched/topology: Reference the Energy Model of CPUs when available
Date: Tue, 24 Jul 2018 13:25:12 +0100
Message-ID: <20180724122521.22109-6-quentin.perret@arm.com> (raw)
In-Reply-To: <20180724122521.22109-1-quentin.perret@arm.com>

The existing scheduling domain hierarchy is defined to map to the cache
topology of the system. However, Energy Aware Scheduling (EAS) requires
more knowledge about the platform, and specifically needs to know about
the span of Frequency Domains (FD), which do not always align with
caches.

To address this issue, use the Energy Model (EM) of the system to extend
the scheduler topology code with a representation of the FDs, alongside
the scheduling domains. More specifically, a linked list of FDs is
attached to each root domain. When multiple root domains are in use,
each list contains only the FDs covering the CPUs of its root domain. If
a FD spans over CPUs of two different root domains, it will be
duplicated in both lists.

The lists are fully maintained by the scheduler from
partition_sched_domains() in order to cope with hotplug and cpuset
changes. As for scheduling domains, the list are protected by RCU to
ensure safe concurrent updates.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/sched.h    |  23 +++++++
 kernel/sched/topology.c | 139 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 158 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2a72f1b9be0f..fdf6924d53e7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -44,6 +44,7 @@
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
 #include <linux/delayacct.h>
+#include <linux/energy_model.h>
 #include <linux/init_task.h>
 #include <linux/kprobes.h>
 #include <linux/kthread.h>
@@ -700,6 +701,12 @@ static inline bool sched_asym_prefer(int a, int b)
 	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
 }
 
+struct freq_domain {
+	struct em_freq_domain *obj;
+	struct freq_domain *next;
+	struct rcu_head rcu;
+};
+
 /*
  * We add the notion of a root-domain which will be used to define per-domain
  * variables. Each exclusive cpuset essentially defines an island domain by
@@ -748,6 +755,14 @@ struct root_domain {
 	struct cpupri		cpupri;
 
 	unsigned long		max_cpu_capacity;
+
+#ifdef CONFIG_ENERGY_MODEL
+	/*
+	 * NULL-terminated list of frequency domains intersecting with the
+	 * CPUs of the rd. Protected by RCU.
+	 */
+	struct freq_domain *fd;
+#endif
 };
 
 extern struct root_domain def_root_domain;
@@ -2203,3 +2218,11 @@ static inline unsigned long cpu_util_irq(struct rq *rq)
 
 #endif
 #endif
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_ENERGY_MODEL
+#define freq_domain_span(fd) (to_cpumask(((fd)->obj->cpus)))
+#else
+#define freq_domain_span(fd) NULL
+#endif
+#endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 05a831427bc7..ade1eae9d21b 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -201,6 +201,121 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	return 1;
 }
 
+#ifdef CONFIG_ENERGY_MODEL
+static void free_fd(struct freq_domain *fd)
+{
+	struct freq_domain *tmp;
+
+	while (fd) {
+		tmp = fd->next;
+		kfree(fd);
+		fd = tmp;
+	}
+}
+
+static void free_rd_fd(struct root_domain *rd)
+{
+	free_fd(rd->fd);
+}
+
+static struct freq_domain *find_fd(struct freq_domain *fd, int cpu)
+{
+	while (fd) {
+		if (cpumask_test_cpu(cpu, freq_domain_span(fd)))
+			return fd;
+		fd = fd->next;
+	}
+
+	return NULL;
+}
+
+static struct freq_domain *fd_init(int cpu)
+{
+	struct em_freq_domain *obj = em_cpu_get(cpu);
+	struct freq_domain *fd;
+
+	if (!obj) {
+		if (sched_debug())
+			pr_info("%s: no EM found for CPU%d\n", __func__, cpu);
+		return NULL;
+	}
+
+	fd = kzalloc(sizeof(*fd), GFP_KERNEL);
+	if (!fd)
+		return NULL;
+	fd->obj = obj;
+
+	return fd;
+}
+
+static void freq_domain_debug(const struct cpumask *cpu_map,
+						struct freq_domain *fd)
+{
+	if (!sched_debug() || !fd)
+		return;
+
+	printk(KERN_DEBUG "root_domain %*pbl: fd:", cpumask_pr_args(cpu_map));
+
+	while (fd) {
+		printk(KERN_CONT " { fd%d cpus=%*pbl nr_cstate=%d }",
+				cpumask_first(freq_domain_span(fd)),
+				cpumask_pr_args(freq_domain_span(fd)),
+				em_fd_nr_cap_states(fd->obj));
+		fd = fd->next;
+	}
+
+	printk(KERN_CONT "\n");
+}
+
+static void destroy_freq_domain_rcu(struct rcu_head *rp)
+{
+	struct freq_domain *fd;
+
+	fd = container_of(rp, struct freq_domain, rcu);
+	free_fd(fd);
+}
+
+static void build_freq_domains(const struct cpumask *cpu_map)
+{
+	struct freq_domain *fd = NULL, *tmp;
+	int cpu = cpumask_first(cpu_map);
+	struct root_domain *rd = cpu_rq(cpu)->rd;
+	int i;
+
+	for_each_cpu(i, cpu_map) {
+		/* Skip already covered CPUs. */
+		if (find_fd(fd, i))
+			continue;
+
+		/* Create the new fd and add it to the local list. */
+		tmp = fd_init(i);
+		if (!tmp)
+			goto free;
+		tmp->next = fd;
+		fd = tmp;
+	}
+
+	freq_domain_debug(cpu_map, fd);
+
+	/* Attach the new list of frequency domains to the root domain. */
+	tmp = rd->fd;
+	rcu_assign_pointer(rd->fd, fd);
+	if (tmp)
+		call_rcu(&tmp->rcu, destroy_freq_domain_rcu);
+
+	return;
+
+free:
+	free_fd(fd);
+	tmp = rd->fd;
+	rcu_assign_pointer(rd->fd, NULL);
+	if (tmp)
+		call_rcu(&tmp->rcu, destroy_freq_domain_rcu);
+}
+#else
+static void free_rd_fd(struct root_domain *rd) { }
+#endif
+
 static void free_rootdomain(struct rcu_head *rcu)
 {
 	struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
@@ -211,6 +326,7 @@ static void free_rootdomain(struct rcu_head *rcu)
 	free_cpumask_var(rd->rto_mask);
 	free_cpumask_var(rd->online);
 	free_cpumask_var(rd->span);
+	free_rd_fd(rd);
 	kfree(rd);
 }
 
@@ -1882,8 +1998,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 	/* Destroy deleted domains: */
 	for (i = 0; i < ndoms_cur; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
-			if (cpumask_equal(doms_cur[i], doms_new[j])
-			    && dattrs_equal(dattr_cur, i, dattr_new, j))
+			if (cpumask_equal(doms_cur[i], doms_new[j]) &&
+			    dattrs_equal(dattr_cur, i, dattr_new, j))
 				goto match1;
 		}
 		/* No match - a current sched domain not in new doms_new[] */
@@ -1903,8 +2019,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 	/* Build new domains: */
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
-			if (cpumask_equal(doms_new[i], doms_cur[j])
-			    && dattrs_equal(dattr_new, i, dattr_cur, j))
+			if (cpumask_equal(doms_new[i], doms_cur[j]) &&
+			    dattrs_equal(dattr_new, i, dattr_cur, j))
 				goto match2;
 		}
 		/* No match - add a new doms_new */
@@ -1913,6 +2029,21 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 		;
 	}
 
+#ifdef CONFIG_ENERGY_MODEL
+	/* Build freq domains: */
+	for (i = 0; i < ndoms_new; i++) {
+		for (j = 0; j < n; j++) {
+			if (cpumask_equal(doms_new[i], doms_cur[j]) &&
+			    cpu_rq(cpumask_first(doms_cur[j]))->rd->fd)
+				goto match3;
+		}
+		/* No match - add freq domains for a new rd */
+		build_freq_domains(doms_new[i]);
+match3:
+		;
+	}
+#endif
+
 	/* Remember the new sched domains: */
 	if (doms_cur != &fallback_doms)
 		free_sched_domains(doms_cur, ndoms_cur);
-- 
2.18.0


  parent reply index

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-24 12:25 [PATCH v5 00/14] Energy Aware Scheduling Quentin Perret
2018-07-24 12:25 ` [PATCH v5 01/14] sched: Relocate arch_scale_cpu_capacity Quentin Perret
2018-07-24 12:25 ` [PATCH v5 02/14] sched/cpufreq: Factor out utilization to frequency mapping Quentin Perret
2018-07-24 12:25 ` [PATCH v5 03/14] PM: Introduce an Energy Model management framework Quentin Perret
2018-08-09 21:52   ` Rafael J. Wysocki
2018-08-10  8:15     ` Quentin Perret
2018-08-10  8:41       ` Rafael J. Wysocki
2018-08-10  9:12         ` Quentin Perret
2018-08-10 11:13           ` Rafael J. Wysocki
2018-08-10 12:30             ` Quentin Perret
2018-08-12  9:49               ` Rafael J. Wysocki
2018-07-24 12:25 ` [PATCH v5 04/14] PM / EM: Expose the Energy Model in sysfs Quentin Perret
2018-07-24 12:25 ` Quentin Perret [this message]
2018-07-24 12:25 ` [PATCH v5 06/14] sched/topology: Lowest energy aware balancing sched_domain level pointer Quentin Perret
2018-07-26 16:00   ` Valentin Schneider
2018-07-26 17:01     ` Quentin Perret
2018-07-24 12:25 ` [PATCH v5 07/14] sched/topology: Introduce sched_energy_present static key Quentin Perret
2018-07-24 12:25 ` [PATCH v5 08/14] sched/fair: Clean-up update_sg_lb_stats parameters Quentin Perret
2018-07-24 12:25 ` [PATCH v5 09/14] sched: Add over-utilization/tipping point indicator Quentin Perret
2018-08-02 12:26   ` Peter Zijlstra
2018-08-02 13:03     ` Quentin Perret
2018-08-02 13:08       ` Peter Zijlstra
2018-08-02 13:18         ` Quentin Perret
2018-08-02 13:48           ` Vincent Guittot
2018-08-02 14:14             ` Quentin Perret
2018-08-02 15:14               ` Vincent Guittot
2018-08-02 15:30                 ` Quentin Perret
2018-08-02 15:55                   ` Vincent Guittot
2018-08-02 16:00                     ` Quentin Perret
2018-08-02 16:07                       ` Vincent Guittot
2018-08-02 16:10                         ` Quentin Perret
2018-08-02 16:38                           ` Vincent Guittot
2018-08-02 16:59                             ` Quentin Perret
2018-08-03  7:48                               ` Vincent Guittot
2018-08-03  8:18                                 ` Quentin Perret
2018-08-03 13:49                                   ` Vincent Guittot
2018-08-03 14:21                                     ` Vincent Guittot
2018-08-03 15:55                                     ` Quentin Perret
2018-08-06  8:40                                       ` Vincent Guittot
2018-08-06  9:43                                         ` Quentin Perret
2018-08-06 10:45                                           ` Vincent Guittot
2018-08-06 11:02                                             ` Quentin Perret
2018-08-06 10:08                                         ` Dietmar Eggemann
2018-08-06 10:33                                           ` Vincent Guittot
2018-08-06 12:29                                             ` Dietmar Eggemann
2018-08-06 12:37                                               ` Vincent Guittot
2018-08-06 13:20                                                 ` Dietmar Eggemann
2018-08-09  9:30   ` Vincent Guittot
2018-08-09  9:38     ` Quentin Perret
2018-07-24 12:25 ` [PATCH v5 10/14] sched/cpufreq: Refactor the utilization aggregation method Quentin Perret
2018-07-30 19:35   ` skannan
2018-07-31  7:59     ` Quentin Perret
2018-07-31 19:31       ` skannan
2018-08-01  7:32         ` Rafael J. Wysocki
2018-08-01  8:23           ` Quentin Perret
2018-08-01  8:35             ` Rafael J. Wysocki
2018-08-01  9:23               ` Quentin Perret
2018-08-01  9:40                 ` Rafael J. Wysocki
2018-08-02 13:04                 ` Peter Zijlstra
2018-08-02 15:39                   ` Quentin Perret
2018-08-03 13:04                     ` Quentin Perret
2018-08-02 12:33     ` Peter Zijlstra
2018-08-02 12:45       ` Peter Zijlstra
2018-08-02 15:21         ` Quentin Perret
2018-08-02 17:36           ` Peter Zijlstra
2018-08-03 12:42             ` Quentin Perret
2018-07-24 12:25 ` [PATCH v5 11/14] sched/fair: Introduce an energy estimation helper function Quentin Perret
2018-07-24 12:25 ` [PATCH v5 12/14] sched/fair: Select an energy-efficient CPU on task wake-up Quentin Perret
2018-08-02 13:54   ` Peter Zijlstra
2018-08-02 16:21     ` Quentin Perret
2018-07-24 12:25 ` [PATCH v5 13/14] OPTIONAL: arch_topology: Start Energy Aware Scheduling Quentin Perret
2018-07-24 12:25 ` [PATCH v5 14/14] OPTIONAL: cpufreq: dt: Register an Energy Model Quentin Perret

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180724122521.22109-6-quentin.perret@arm.com \
    --to=quentin.perret@arm.com \
    --cc=adharmap@quicinc.com \
    --cc=chris.redpath@arm.com \
    --cc=currojerez@riseup.net \
    --cc=dietmar.eggemann@arm.com \
    --cc=edubezval@gmail.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=javi.merino@kernel.org \
    --cc=joel@joelfernandes.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=morten.rasmussen@arm.com \
    --cc=patrick.bellasi@arm.com \
    --cc=peterz@infradead.org \
    --cc=pkondeti@codeaurora.org \
    --cc=rjw@rjwysocki.net \
    --cc=skannan@quicinc.com \
    --cc=smuckle@google.com \
    --cc=srinivas.pandruvada@linux.intel.com \
    --cc=thara.gopinath@linaro.org \
    --cc=tkjos@google.com \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    --cc=viresh.kumar@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox