linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Morten Rasmussen <morten.rasmussen@arm.com>
To: peterz@infradead.org, mingo@redhat.com
Cc: vincent.guittot@linaro.org, daniel.lezcano@linaro.org,
	Dietmar Eggemann <Dietmar.Eggemann@arm.com>,
	yuyang.du@intel.com, mturquette@baylibre.com, rjw@rjwysocki.net,
	Juri Lelli <Juri.Lelli@arm.com>,
	sgurrappadi@nvidia.com, pang.xunlei@zte.com.cn,
	linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org
Subject: [RFCv5 PATCH 25/46] sched: Add over-utilization/tipping point indicator
Date: Tue,  7 Jul 2015 19:24:08 +0100	[thread overview]
Message-ID: <1436293469-25707-26-git-send-email-morten.rasmussen@arm.com> (raw)
In-Reply-To: <1436293469-25707-1-git-send-email-morten.rasmussen@arm.com>

Energy-aware scheduling is only meant to be active while the system is
_not_ over-utilized. That is, there are spare cycles available to shift
tasks around based on their actual utilization to get a more
energy-efficient task distribution without depriving any tasks. When
above the tipping point task placement is done the traditional way,
spreading the tasks across as many cpus as possible based on priority
scaled load to preserve smp_nice.

The over-utilization condition is conservatively chosen to indicate
over-utilization as soon as one cpu is fully utilized at it's highest
frequency. We don't consider groups as lumping usage and capacity
together for a group of cpus may hide the fact that one or more cpus in
the group are over-utilized while group-siblings are partially idle. The
tasks could be served better if moved to another group with completely
idle cpus. This is particularly problematic if some cpus have a
significantly reduced capacity due to RT/IRQ pressure or if the system
has cpus of different capacity (e.g. ARM big.LITTLE).

cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
---
 kernel/sched/fair.c  | 35 +++++++++++++++++++++++++++++++----
 kernel/sched/sched.h |  3 +++
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bf1d34c..99e43ee 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4281,6 +4281,8 @@ static inline void hrtick_update(struct rq *rq)
 }
 #endif
 
+static bool cpu_overutilized(int cpu);
+
 /*
  * The enqueue_task method is called before nr_running is
  * increased. Here we update the fair scheduling stats and
@@ -4291,6 +4293,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+	int task_new = !(flags & ENQUEUE_WAKEUP);
 
 	for_each_sched_entity(se) {
 		if (se->on_rq)
@@ -4325,6 +4328,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (!se) {
 		update_rq_runnable_avg(rq, rq->nr_running);
 		add_nr_running(rq, 1);
+		if (!task_new && !rq->rd->overutilized &&
+		    cpu_overutilized(rq->cpu))
+			rq->rd->overutilized = true;
 	}
 	hrtick_update(rq);
 }
@@ -4952,6 +4958,14 @@ static int find_new_capacity(struct energy_env *eenv,
 	return idx;
 }
 
+static unsigned int capacity_margin = 1280; /* ~20% margin */
+
+static bool cpu_overutilized(int cpu)
+{
+	return (capacity_of(cpu) * 1024) <
+				(get_cpu_usage(cpu) * capacity_margin);
+}
+
 /*
  * sched_group_energy(): Returns absolute energy consumption of cpus belonging
  * to the sched_group including shared resources shared only by members of the
@@ -6756,11 +6770,12 @@ static enum group_type group_classify(struct lb_env *env,
  * @local_group: Does group contain this_cpu.
  * @sgs: variable to hold the statistics for this group.
  * @overload: Indicate more than one runnable task for any CPU.
+ * @overutilized: Indicate overutilization for any CPU.
  */
 static inline void update_sg_lb_stats(struct lb_env *env,
 			struct sched_group *group, int load_idx,
 			int local_group, struct sg_lb_stats *sgs,
-			bool *overload)
+			bool *overload, bool *overutilized)
 {
 	unsigned long load;
 	int i;
@@ -6790,6 +6805,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 		sgs->sum_weighted_load += weighted_cpuload(i);
 		if (idle_cpu(i))
 			sgs->idle_cpus++;
+
+		if (cpu_overutilized(i))
+			*overutilized = true;
 	}
 
 	/* Adjust by relative CPU capacity of the group */
@@ -6895,7 +6913,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 	struct sched_group *sg = env->sd->groups;
 	struct sg_lb_stats tmp_sgs;
 	int load_idx, prefer_sibling = 0;
-	bool overload = false;
+	bool overload = false, overutilized = false;
 
 	if (child && child->flags & SD_PREFER_SIBLING)
 		prefer_sibling = 1;
@@ -6917,7 +6935,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		}
 
 		update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
-						&overload);
+						&overload, &overutilized);
 
 		if (local_group)
 			goto next_group;
@@ -6959,8 +6977,14 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		/* update overload indicator if we are at root domain */
 		if (env->dst_rq->rd->overload != overload)
 			env->dst_rq->rd->overload = overload;
-	}
 
+		/* Update over-utilization (tipping point, U >= 0) indicator */
+		if (env->dst_rq->rd->overutilized != overutilized)
+			env->dst_rq->rd->overutilized = overutilized;
+	} else {
+		if (!env->dst_rq->rd->overutilized && overutilized)
+			env->dst_rq->rd->overutilized = true;
+	}
 }
 
 /**
@@ -8324,6 +8348,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 		task_tick_numa(rq, curr);
 
 	update_rq_runnable_avg(rq, 1);
+
+	if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr)))
+		rq->rd->overutilized = true;
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8a51692..fbe2da0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -535,6 +535,9 @@ struct root_domain {
 	/* Indicate more than one runnable task for any CPU */
 	bool overload;
 
+	/* Indicate one or more cpus over-utilized (tipping point) */
+	bool overutilized;
+
 	/*
 	 * The bit corresponding to a CPU gets set here if such CPU has more
 	 * than one runnable -deadline task (as it is below for RT tasks).
-- 
1.9.1


  parent reply	other threads:[~2015-07-07 18:24 UTC|newest]

Thread overview: 155+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-07 18:23 [RFCv5 PATCH 00/46] sched: Energy cost model for energy-aware scheduling Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 01/46] arm: Frequency invariant scheduler load-tracking support Morten Rasmussen
2015-07-21 15:41   ` [RFCv5, " Leo Yan
2015-07-22 13:31     ` Morten Rasmussen
2015-07-22 14:59       ` Leo Yan
2015-07-23 11:06         ` Morten Rasmussen
2015-07-23 14:22           ` Leo Yan
2015-07-24  9:43             ` Morten Rasmussen
2015-08-03  9:22   ` [RFCv5 PATCH " Vincent Guittot
2015-08-17 15:59     ` Dietmar Eggemann
2015-08-11  9:27   ` Peter Zijlstra
2015-08-14 16:08     ` Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 02/46] sched: Make load tracking frequency scale-invariant Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 03/46] arm: vexpress: Add CPU clock-frequencies to TC2 device-tree Morten Rasmussen
2015-07-08 12:36   ` Jon Medhurst (Tixy)
2015-07-10 13:35     ` Dietmar Eggemann
2015-07-07 18:23 ` [RFCv5 PATCH 04/46] sched: Convert arch_scale_cpu_capacity() from weak function to #define Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 05/46] arm: Update arch_scale_cpu_capacity() to reflect change to define Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 06/46] sched: Make usage tracking cpu scale-invariant Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 07/46] arm: Cpu invariant scheduler load-tracking support Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 08/46] sched: Get rid of scaling usage by cpu_capacity_orig Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 09/46] sched: Track blocked utilization contributions Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 10/46] sched: Include blocked utilization in usage tracking Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 11/46] sched: Remove blocked load and utilization contributions of dying tasks Morten Rasmussen
2015-07-22  6:51   ` Leo Yan
2015-07-22 13:45     ` Morten Rasmussen
2015-08-11 11:39   ` Peter Zijlstra
2015-08-11 14:58     ` Morten Rasmussen
2015-08-11 17:23       ` Peter Zijlstra
2015-08-12  9:08         ` Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 12/46] sched: Initialize CFS task load and usage before placing task on rq Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 13/46] sched: Documentation for scheduler energy cost model Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 14/46] sched: Make energy awareness a sched feature Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 15/46] sched: Introduce energy data structures Morten Rasmussen
2015-07-07 18:23 ` [RFCv5 PATCH 16/46] sched: Allocate and initialize " Morten Rasmussen
2015-08-12 10:04   ` Peter Zijlstra
2015-08-12 17:08     ` Dietmar Eggemann
2015-08-12 10:17   ` Peter Zijlstra
2015-08-12 17:09     ` Dietmar Eggemann
2015-08-12 17:23       ` Peter Zijlstra
2015-07-07 18:24 ` [RFCv5 PATCH 17/46] sched: Introduce SD_SHARE_CAP_STATES sched_domain flag Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 18/46] arm: topology: Define TC2 energy and provide it to the scheduler Morten Rasmussen
2015-08-12 10:33   ` Peter Zijlstra
2015-08-12 18:47     ` Dietmar Eggemann
2015-08-17  9:19   ` [RFCv5, " Leo Yan
2015-08-20 19:19     ` Dietmar Eggemann
2015-07-07 18:24 ` [RFCv5 PATCH 19/46] sched: Compute cpu capacity available at current frequency Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 20/46] sched: Relocated get_cpu_usage() and change return type Morten Rasmussen
2015-08-12 10:59   ` Peter Zijlstra
2015-08-12 14:40     ` Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 21/46] sched: Highest energy aware balancing sched_domain level pointer Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 22/46] sched: Calculate energy consumption of sched_group Morten Rasmussen
2015-08-13 15:34   ` Peter Zijlstra
2015-08-14 10:28     ` Morten Rasmussen
2015-09-02 17:19   ` Leo Yan
2015-09-17 16:41     ` Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 23/46] sched: Extend sched_group_energy to test load-balancing decisions Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 24/46] sched: Estimate energy impact of scheduling decisions Morten Rasmussen
2015-07-07 18:24 ` Morten Rasmussen [this message]
2015-08-13 17:35   ` [RFCv5 PATCH 25/46] sched: Add over-utilization/tipping point indicator Peter Zijlstra
2015-08-14 13:02     ` Morten Rasmussen
2015-09-29 20:08       ` Steve Muckle
2015-10-09 12:49         ` Morten Rasmussen
2015-08-17 13:10   ` Leo Yan
2015-07-07 18:24 ` [RFCv5 PATCH 26/46] sched: Store system-wide maximum cpu capacity in root domain Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 27/46] sched, cpuidle: Track cpuidle state index in the scheduler Morten Rasmussen
2015-07-21  6:41   ` Leo Yan
2015-07-21 15:16     ` Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 28/46] sched: Count number of shallower idle-states in struct sched_group_energy Morten Rasmussen
2015-08-13 18:10   ` Peter Zijlstra
2015-08-14 19:08     ` Sai Gurrappadi
2015-07-07 18:24 ` [RFCv5 PATCH 29/46] sched: Determine the current sched_group idle-state Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 30/46] sched: Add cpu capacity awareness to wakeup balancing Morten Rasmussen
2015-08-13 18:24   ` Peter Zijlstra
2015-08-14 16:20     ` Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 31/46] sched: Consider spare cpu capacity at task wake-up Morten Rasmussen
2015-07-21  0:37   ` Sai Gurrappadi
2015-07-21 15:12     ` Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 32/46] sched: Energy-aware wake-up task placement Morten Rasmussen
2015-07-17  0:10   ` Sai Gurrappadi
2015-07-20 15:38     ` Morten Rasmussen
2015-08-17 16:23   ` Leo Yan
2015-09-02 17:11   ` Leo Yan
2015-09-18 10:34     ` Dietmar Eggemann
2015-09-20 18:39       ` Steve Muckle
2015-09-20 22:03         ` Leo Yan
2015-09-29  0:15           ` Steve Muckle
2015-07-07 18:24 ` [RFCv5 PATCH 33/46] sched: Consider a not over-utilized energy-aware system as balanced Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 34/46] sched: Enable idle balance to pull single task towards cpu with higher capacity Morten Rasmussen
2015-08-15  9:15   ` Peter Zijlstra
2015-07-07 18:24 ` [RFCv5 PATCH 35/46] sched: Disable energy-unfriendly nohz kicks Morten Rasmussen
2015-08-15  9:33   ` Peter Zijlstra
2015-07-07 18:24 ` [RFCv5 PATCH 36/46] sched: Prevent unnecessary active balance of single task in sched group Morten Rasmussen
2015-08-15  9:46   ` Peter Zijlstra
2015-07-07 18:24 ` [RFCv5 PATCH 37/46] cpufreq: introduce cpufreq_driver_might_sleep Morten Rasmussen
2015-07-08 15:09   ` Michael Turquette
2015-07-07 18:24 ` [RFCv5 PATCH 38/46] sched: scheduler-driven cpu frequency selection Morten Rasmussen
2015-07-08 15:09   ` Michael Turquette
2015-08-11  2:14   ` Leo Yan
2015-08-11  8:59     ` Juri Lelli
2015-08-15 12:35   ` Peter Zijlstra
2015-09-04 13:27     ` Juri Lelli
2015-09-14 15:57       ` Juri Lelli
2015-09-15 13:45         ` Peter Zijlstra
2015-09-15 16:22           ` Juri Lelli
2015-08-15 13:05   ` Peter Zijlstra
2015-08-25 10:45     ` Juri Lelli
2015-10-08  0:14       ` Steve Muckle
2015-10-08  9:41         ` Juri Lelli
2015-09-28 16:48   ` Punit Agrawal
2015-09-29  0:26     ` Steve Muckle
2015-07-07 18:24 ` [RFCv5 PATCH 39/46] sched/cpufreq_sched: use static key for " Morten Rasmussen
2015-07-08 15:19   ` Michael Turquette
2015-07-10  9:50     ` Juri Lelli
2015-08-15 12:40     ` Peter Zijlstra
2015-07-07 18:24 ` [RFCv5 PATCH 40/46] sched/cpufreq_sched: compute freq_new based on capacity_orig_of() Morten Rasmussen
2015-07-08 15:22   ` Michael Turquette
2015-07-09 16:21     ` Juri Lelli
2015-08-15 12:46   ` Peter Zijlstra
2015-08-16  4:03     ` Michael Turquette
2015-08-16 20:24       ` Peter Zijlstra
2015-08-17 12:19         ` Juri Lelli
2015-10-13 19:47           ` Steve Muckle
2015-07-07 18:24 ` [RFCv5 PATCH 41/46] sched/fair: add triggers for OPP change requests Morten Rasmussen
2015-07-08 15:42   ` Michael Turquette
2015-07-09 16:52     ` Juri Lelli
2015-08-04 13:41   ` Vincent Guittot
2015-08-10 13:43     ` Juri Lelli
2015-08-10 15:07       ` Vincent Guittot
2015-08-11  9:08         ` Juri Lelli
2015-08-11 11:41           ` Vincent Guittot
2015-08-11 15:07             ` Juri Lelli
2015-08-11 16:37               ` Vincent Guittot
2015-08-12 15:15                 ` Juri Lelli
2015-08-13 12:08                   ` Vincent Guittot
2015-08-14 11:39                     ` Juri Lelli
2015-08-17  9:43                       ` Vincent Guittot
2015-08-15 12:48   ` Peter Zijlstra
2015-08-16  3:50     ` Michael Turquette
2015-08-17 18:22     ` Rafael J. Wysocki
2015-07-07 18:24 ` [RFCv5 PATCH 42/46] sched/{core,fair}: trigger OPP change request on fork() Morten Rasmussen
2015-07-07 18:24 ` [RFCv5 PATCH 43/46] sched/{fair,cpufreq_sched}: add reset_capacity interface Morten Rasmussen
2015-10-08 20:40   ` Steve Muckle
2015-10-09  9:14     ` Juri Lelli
2015-10-12 19:02       ` Steve Muckle
2015-07-07 18:24 ` [RFCv5 PATCH 44/46] sched/fair: jump to max OPP when crossing UP threshold Morten Rasmussen
2015-07-08 16:40   ` Michael Turquette
2015-07-08 16:47   ` Michael Turquette
2015-07-10 10:17     ` Juri Lelli
2015-07-07 18:24 ` [RFCv5 PATCH 45/46] sched/cpufreq_sched: modify pcpu_capacity handling Morten Rasmussen
2015-07-08 16:42   ` Michael Turquette
2015-07-09 16:55     ` Juri Lelli
2015-08-16 20:35   ` Peter Zijlstra
2015-08-17 11:16     ` Juri Lelli
2015-07-07 18:24 ` [RFCv5 PATCH 46/46] sched/fair: cpufreq_sched triggers for load balancing Morten Rasmussen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1436293469-25707-26-git-send-email-morten.rasmussen@arm.com \
    --to=morten.rasmussen@arm.com \
    --cc=Dietmar.Eggemann@arm.com \
    --cc=Juri.Lelli@arm.com \
    --cc=daniel.lezcano@linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=mturquette@baylibre.com \
    --cc=pang.xunlei@zte.com.cn \
    --cc=peterz@infradead.org \
    --cc=rjw@rjwysocki.net \
    --cc=sgurrappadi@nvidia.com \
    --cc=vincent.guittot@linaro.org \
    --cc=yuyang.du@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).