From: Quentin Perret <quentin.perret@arm.com> To: peterz@infradead.org, rjw@rjwysocki.net, linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org Cc: gregkh@linuxfoundation.org, mingo@redhat.com, dietmar.eggemann@arm.com, morten.rasmussen@arm.com, chris.redpath@arm.com, patrick.bellasi@arm.com, valentin.schneider@arm.com, vincent.guittot@linaro.org, thara.gopinath@linaro.org, viresh.kumar@linaro.org, tkjos@google.com, joel@joelfernandes.org, smuckle@google.com, adharmap@codeaurora.org, skannan@codeaurora.org, pkondeti@codeaurora.org, juri.lelli@redhat.com, edubezval@gmail.com, srinivas.pandruvada@linux.intel.com, currojerez@riseup.net, javi.merino@kernel.org, quentin.perret@arm.com Subject: [PATCH v10 05/15] sched/topology: Reference the Energy Model of CPUs when available Date: Mon, 3 Dec 2018 09:56:18 +0000 [thread overview] Message-ID: <20181203095628.11858-6-quentin.perret@arm.com> (raw) In-Reply-To: <20181203095628.11858-1-quentin.perret@arm.com> The existing scheduling domain hierarchy is defined to map to the cache topology of the system. However, Energy Aware Scheduling (EAS) requires more knowledge about the platform, and specifically needs to know about the span of Performance Domains (PD), which do not always align with caches. To address this issue, use the Energy Model (EM) of the system to extend the scheduler topology code with a representation of the PDs, alongside the scheduling domains. More specifically, a linked list of PDs is attached to each root domain. When multiple root domains are in use, each list contains only the PDs covering the CPUs of its root domain. If a PD spans over CPUs of multiple different root domains, it will be duplicated in all lists. The lists are fully maintained by the scheduler from partition_sched_domains() in order to cope with hotplug and cpuset changes. As for scheduling domains, the list are protected by RCU to ensure safe concurrent updates. Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Quentin Perret <quentin.perret@arm.com> --- kernel/sched/sched.h | 21 +++++++ kernel/sched/topology.c | 134 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 151 insertions(+), 4 deletions(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index dbbf966baf04..9a75ffedbe64 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -44,6 +44,7 @@ #include <linux/ctype.h> #include <linux/debugfs.h> #include <linux/delayacct.h> +#include <linux/energy_model.h> #include <linux/init_task.h> #include <linux/kprobes.h> #include <linux/kthread.h> @@ -708,6 +709,12 @@ static inline bool sched_asym_prefer(int a, int b) return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b); } +struct perf_domain { + struct em_perf_domain *em_pd; + struct perf_domain *next; + struct rcu_head rcu; +}; + /* * We add the notion of a root-domain which will be used to define per-domain * variables. Each exclusive cpuset essentially defines an island domain by @@ -760,6 +767,12 @@ struct root_domain { struct cpupri cpupri; unsigned long max_cpu_capacity; + + /* + * NULL-terminated list of performance domains intersecting with the + * CPUs of the rd. Protected by RCU. + */ + struct perf_domain *pd; }; extern struct root_domain def_root_domain; @@ -2278,3 +2291,11 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned return util; } #endif + +#ifdef CONFIG_SMP +#ifdef CONFIG_ENERGY_MODEL +#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus))) +#else +#define perf_domain_span(pd) NULL +#endif +#endif diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 8d7f15ba5916..649d4aad4002 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -201,6 +201,116 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) return 1; } +#ifdef CONFIG_ENERGY_MODEL +static void free_pd(struct perf_domain *pd) +{ + struct perf_domain *tmp; + + while (pd) { + tmp = pd->next; + kfree(pd); + pd = tmp; + } +} + +static struct perf_domain *find_pd(struct perf_domain *pd, int cpu) +{ + while (pd) { + if (cpumask_test_cpu(cpu, perf_domain_span(pd))) + return pd; + pd = pd->next; + } + + return NULL; +} + +static struct perf_domain *pd_init(int cpu) +{ + struct em_perf_domain *obj = em_cpu_get(cpu); + struct perf_domain *pd; + + if (!obj) { + if (sched_debug()) + pr_info("%s: no EM found for CPU%d\n", __func__, cpu); + return NULL; + } + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return NULL; + pd->em_pd = obj; + + return pd; +} + +static void perf_domain_debug(const struct cpumask *cpu_map, + struct perf_domain *pd) +{ + if (!sched_debug() || !pd) + return; + + printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map)); + + while (pd) { + printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_cstate=%d }", + cpumask_first(perf_domain_span(pd)), + cpumask_pr_args(perf_domain_span(pd)), + em_pd_nr_cap_states(pd->em_pd)); + pd = pd->next; + } + + printk(KERN_CONT "\n"); +} + +static void destroy_perf_domain_rcu(struct rcu_head *rp) +{ + struct perf_domain *pd; + + pd = container_of(rp, struct perf_domain, rcu); + free_pd(pd); +} + +static void build_perf_domains(const struct cpumask *cpu_map) +{ + struct perf_domain *pd = NULL, *tmp; + int cpu = cpumask_first(cpu_map); + struct root_domain *rd = cpu_rq(cpu)->rd; + int i; + + for_each_cpu(i, cpu_map) { + /* Skip already covered CPUs. */ + if (find_pd(pd, i)) + continue; + + /* Create the new pd and add it to the local list. */ + tmp = pd_init(i); + if (!tmp) + goto free; + tmp->next = pd; + pd = tmp; + } + + perf_domain_debug(cpu_map, pd); + + /* Attach the new list of performance domains to the root domain. */ + tmp = rd->pd; + rcu_assign_pointer(rd->pd, pd); + if (tmp) + call_rcu(&tmp->rcu, destroy_perf_domain_rcu); + + return; + +free: + free_pd(pd); + tmp = rd->pd; + rcu_assign_pointer(rd->pd, NULL); + if (tmp) + call_rcu(&tmp->rcu, destroy_perf_domain_rcu); +} +#else +static void free_pd(struct perf_domain *pd) { } +#endif /* CONFIG_ENERGY_MODEL */ + static void free_rootdomain(struct rcu_head *rcu) { struct root_domain *rd = container_of(rcu, struct root_domain, rcu); @@ -211,6 +321,7 @@ static void free_rootdomain(struct rcu_head *rcu) free_cpumask_var(rd->rto_mask); free_cpumask_var(rd->online); free_cpumask_var(rd->span); + free_pd(rd->pd); kfree(rd); } @@ -1961,8 +2072,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], /* Destroy deleted domains: */ for (i = 0; i < ndoms_cur; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(doms_cur[i], doms_new[j]) - && dattrs_equal(dattr_cur, i, dattr_new, j)) + if (cpumask_equal(doms_cur[i], doms_new[j]) && + dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; } /* No match - a current sched domain not in new doms_new[] */ @@ -1982,8 +2093,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], /* Build new domains: */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(doms_new[i], doms_cur[j]) - && dattrs_equal(dattr_new, i, dattr_cur, j)) + if (cpumask_equal(doms_new[i], doms_cur[j]) && + dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; } /* No match - add a new doms_new */ @@ -1992,6 +2103,21 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ; } +#ifdef CONFIG_ENERGY_MODEL + /* Build perf. domains: */ + for (i = 0; i < ndoms_new; i++) { + for (j = 0; j < n; j++) { + if (cpumask_equal(doms_new[i], doms_cur[j]) && + cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) + goto match3; + } + /* No match - add perf. domains for a new rd */ + build_perf_domains(doms_new[i]); +match3: + ; + } +#endif + /* Remember the new sched domains: */ if (doms_cur != &fallback_doms) free_sched_domains(doms_cur, ndoms_cur); -- 2.19.2
next prev parent reply other threads:[~2018-12-03 9:57 UTC|newest] Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-12-03 9:56 [PATCH v10 00/15] Energy Aware Scheduling Quentin Perret 2018-12-03 9:56 ` [PATCH v10 01/15] sched: Relocate arch_scale_cpu_capacity Quentin Perret 2018-12-11 15:32 ` [tip:sched/core] sched/topology: Relocate arch_scale_cpu_capacity() to the internal header tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 02/15] sched/cpufreq: Prepare schedutil for Energy Aware Scheduling Quentin Perret 2018-12-11 12:01 ` Rafael J. Wysocki 2018-12-11 12:17 ` Quentin Perret 2018-12-11 12:22 ` Rafael J. Wysocki 2018-12-11 12:24 ` Quentin Perret 2018-12-11 15:33 ` [tip:sched/core] " tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 03/15] PM: Introduce an Energy Model management framework Quentin Perret 2018-12-11 15:33 ` [tip:sched/core] " tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 04/15] PM / EM: Expose the Energy Model in sysfs Quentin Perret 2018-12-11 14:18 ` Ingo Molnar 2018-12-11 15:04 ` Quentin Perret 2018-12-03 9:56 ` Quentin Perret [this message] 2018-12-11 15:34 ` [tip:sched/core] sched/topology: Reference the Energy Model of CPUs when available tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 06/15] sched/topology: Lowest CPU asymmetry sched_domain level pointer Quentin Perret 2018-12-11 15:34 ` [tip:sched/core] sched/topology: Add lowest " tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 07/15] sched/topology: Disable EAS on inappropriate platforms Quentin Perret 2018-12-11 15:35 ` [tip:sched/core] " tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 08/15] sched/topology: Make Energy Aware Scheduling depend on schedutil Quentin Perret 2018-12-11 15:36 ` [tip:sched/core] " tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 09/15] sched: Introduce sched_energy_present static key Quentin Perret 2018-12-11 15:36 ` [tip:sched/core] sched/toplogy: Introduce the 'sched_energy_present' " tip-bot for Quentin Perret 2018-12-13 13:56 ` Quentin Perret 2018-12-03 9:56 ` [PATCH v10 10/15] sched: Introduce a sysctl for Energy Aware Scheduling Quentin Perret 2018-12-11 14:15 ` Ingo Molnar 2018-12-11 14:49 ` Quentin Perret 2018-12-13 14:03 ` Peter Zijlstra 2019-01-21 11:35 ` [tip:sched/core] sched/topology: " tip-bot for Quentin Perret 2019-01-21 13:51 ` tip-bot for Quentin Perret 2019-01-27 11:34 ` tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 11/15] sched/fair: Clean-up update_sg_lb_stats parameters Quentin Perret 2018-12-11 15:37 ` [tip:sched/core] " tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 12/15] sched: Add over-utilization/tipping point indicator Quentin Perret 2018-12-11 15:37 ` [tip:sched/core] sched/fair: " tip-bot for Morten Rasmussen 2018-12-03 9:56 ` [PATCH v10 13/15] sched/fair: Introduce an energy estimation helper function Quentin Perret 2018-12-11 15:38 ` [tip:sched/core] " tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 14/15] sched/fair: Select an energy-efficient CPU on task wake-up Quentin Perret 2018-12-11 15:39 ` [tip:sched/core] " tip-bot for Quentin Perret 2018-12-03 9:56 ` [PATCH v10 15/15] OPTIONAL: cpufreq: dt: Register an Energy Model Quentin Perret 2019-01-08 20:38 ` Matthias Kaehlcke 2019-01-09 10:57 ` Quentin Perret 2019-01-09 18:14 ` Matthias Kaehlcke 2019-01-10 9:08 ` Quentin Perret
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20181203095628.11858-6-quentin.perret@arm.com \ --to=quentin.perret@arm.com \ --cc=adharmap@codeaurora.org \ --cc=chris.redpath@arm.com \ --cc=currojerez@riseup.net \ --cc=dietmar.eggemann@arm.com \ --cc=edubezval@gmail.com \ --cc=gregkh@linuxfoundation.org \ --cc=javi.merino@kernel.org \ --cc=joel@joelfernandes.org \ --cc=juri.lelli@redhat.com \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-pm@vger.kernel.org \ --cc=mingo@redhat.com \ --cc=morten.rasmussen@arm.com \ --cc=patrick.bellasi@arm.com \ --cc=peterz@infradead.org \ --cc=pkondeti@codeaurora.org \ --cc=rjw@rjwysocki.net \ --cc=skannan@codeaurora.org \ --cc=smuckle@google.com \ --cc=srinivas.pandruvada@linux.intel.com \ --cc=thara.gopinath@linaro.org \ --cc=tkjos@google.com \ --cc=valentin.schneider@arm.com \ --cc=vincent.guittot@linaro.org \ --cc=viresh.kumar@linaro.org \ --subject='Re: [PATCH v10 05/15] sched/topology: Reference the Energy Model of CPUs when available' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).