linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: vincent.guittot@linaro.org (Vincent Guittot)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v5 11/12] sched: replace capacity_factor by utilization
Date: Tue, 26 Aug 2014 13:06:54 +0200	[thread overview]
Message-ID: <1409051215-16788-12-git-send-email-vincent.guittot@linaro.org> (raw)
In-Reply-To: <1409051215-16788-1-git-send-email-vincent.guittot@linaro.org>

The scheduler tries to compute how many tasks a group of CPUs can handle by
assuming that a task's load is SCHED_LOAD_SCALE and a CPU capacity is
SCHED_CAPACITY_SCALE.
We can now have a better idea of the capacity of a group of CPUs and of the
utilization of this group thanks to the rework of group_capacity_orig and the
group_utilization. We can now deduct how many capacity is still available.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 kernel/sched/fair.c | 121 ++++++++++++++++++++++------------------------------
 1 file changed, 51 insertions(+), 70 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2f95d1c..80bd64e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5673,13 +5673,13 @@ struct sg_lb_stats {
 	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
 	unsigned long load_per_task;
 	unsigned long group_capacity;
+	unsigned long group_capacity_orig;
 	unsigned long group_utilization; /* Total utilization of the group */
 	unsigned int sum_nr_running; /* Nr tasks running in the group */
-	unsigned int group_capacity_factor;
 	unsigned int idle_cpus;
 	unsigned int group_weight;
 	enum group_type group_type;
-	int group_has_free_capacity;
+	int group_out_of_capacity;
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
 	unsigned int nr_preferred_running;
@@ -5901,31 +5901,6 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 }
 
 /*
- * Try and fix up capacity for tiny siblings, this is needed when
- * things like SD_ASYM_PACKING need f_b_g to select another sibling
- * which on its own isn't powerful enough.
- *
- * See update_sd_pick_busiest() and check_asym_packing().
- */
-static inline int
-fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
-{
-	/*
-	 * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
-	 */
-	if (!(sd->flags & SD_SHARE_CPUCAPACITY))
-		return 0;
-
-	/*
-	 * If ~90% of the cpu_capacity is still there, we're good.
-	 */
-	if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29)
-		return 1;
-
-	return 0;
-}
-
-/*
  * Group imbalance indicates (and tries to solve) the problem where balancing
  * groups is inadequate due to tsk_cpus_allowed() constraints.
  *
@@ -5959,38 +5934,37 @@ static inline int sg_imbalanced(struct sched_group *group)
 	return group->sgc->imbalance;
 }
 
-/*
- * Compute the group capacity factor.
- *
- * Avoid the issue where N*frac(smt_capacity) >= 1 creates 'phantom' cores by
- * first dividing out the smt factor and computing the actual number of cores
- * and limit unit capacity with that.
- */
-static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group)
+static inline int group_has_free_capacity(struct sg_lb_stats *sgs,
+			struct lb_env *env)
 {
-	unsigned int capacity_factor, smt, cpus;
-	unsigned int capacity, capacity_orig;
+	if ((sgs->group_capacity_orig * 100) >
+			(sgs->group_utilization * env->sd->imbalance_pct))
+		return 1;
+
+	if (sgs->sum_nr_running < sgs->group_weight)
+		return 1;
 
-	capacity = group->sgc->capacity;
-	capacity_orig = group->sgc->capacity_orig;
-	cpus = group->group_weight;
+	return 0;
+}
 
-	/* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
-	smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig);
-	capacity_factor = cpus / smt; /* cores */
+static inline int group_is_overloaded(struct sg_lb_stats *sgs,
+			struct lb_env *env)
+{
+	if (sgs->sum_nr_running <= sgs->group_weight)
+		return 0;
 
-	capacity_factor = min_t(unsigned,
-		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE));
-	if (!capacity_factor)
-		capacity_factor = fix_small_capacity(env->sd, group);
+	if ((sgs->group_capacity_orig * 100) <
+			(sgs->group_utilization * env->sd->imbalance_pct))
+		return 1;
 
-	return capacity_factor;
+	return 0;
 }
 
 static enum group_type
-group_classify(struct sched_group *group, struct sg_lb_stats *sgs)
+group_classify(struct sched_group *group, struct sg_lb_stats *sgs,
+			struct lb_env *env)
 {
-	if (sgs->sum_nr_running > sgs->group_capacity_factor)
+	if (group_is_overloaded(sgs, env))
 		return group_overloaded;
 
 	if (sg_imbalanced(group))
@@ -6043,6 +6017,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 			sgs->idle_cpus++;
 	}
 
+	sgs->group_capacity_orig = group->sgc->capacity_orig;
 	/* Adjust by relative CPU capacity of the group */
 	sgs->group_capacity = group->sgc->capacity;
 	sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
@@ -6051,11 +6026,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
 
 	sgs->group_weight = group->group_weight;
-	sgs->group_capacity_factor = sg_capacity_factor(env, group);
-	sgs->group_type = group_classify(group, sgs);
 
-	if (sgs->group_capacity_factor > sgs->sum_nr_running)
-		sgs->group_has_free_capacity = 1;
+	sgs->group_type = group_classify(group, sgs, env);
+
+	sgs->group_out_of_capacity = group_is_overloaded(sgs, env);
 }
 
 /**
@@ -6185,17 +6159,21 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 
 		/*
 		 * In case the child domain prefers tasks go to siblings
-		 * first, lower the sg capacity factor to one so that we'll try
+		 * first, lower the sg capacity to one so that we'll try
 		 * and move all the excess tasks away. We lower the capacity
 		 * of a group only if the local group has the capacity to fit
-		 * these excess tasks, i.e. nr_running < group_capacity_factor. The
+		 * these excess tasks, i.e. group_capacity > 0. The
 		 * extra check prevents the case where you always pull from the
 		 * heaviest group when it is already under-utilized (possible
 		 * with a large weight task outweighs the tasks on the system).
 		 */
 		if (prefer_sibling && sds->local &&
-		    sds->local_stat.group_has_free_capacity)
-			sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U);
+		    group_has_free_capacity(&sds->local_stat, env)) {
+			if (sgs->sum_nr_running > 1)
+				sgs->group_out_of_capacity = 1;
+			sgs->group_capacity = min(sgs->group_capacity,
+						SCHED_CAPACITY_SCALE);
+		}
 
 		if (update_sd_pick_busiest(env, sds, sg, sgs)) {
 			sds->busiest = sg;
@@ -6373,11 +6351,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 	 */
 	if (busiest->group_type == group_overloaded &&
 	    local->group_type   == group_overloaded) {
-		load_above_capacity =
-			(busiest->sum_nr_running - busiest->group_capacity_factor);
-
-		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE);
-		load_above_capacity /= busiest->group_capacity;
+		load_above_capacity = busiest->sum_nr_running *
+					SCHED_LOAD_SCALE;
+		if (load_above_capacity > busiest->group_capacity)
+			load_above_capacity -= busiest->group_capacity;
+		else
+			load_above_capacity = ~0UL;
 	}
 
 	/*
@@ -6440,6 +6419,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	local = &sds.local_stat;
 	busiest = &sds.busiest_stat;
 
+	/* ASYM feature bypasses nice load balance check */
 	if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
 	    check_asym_packing(env, &sds))
 		return sds.busiest;
@@ -6460,8 +6440,9 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 		goto force_balance;
 
 	/* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
-	if (env->idle == CPU_NEWLY_IDLE && local->group_has_free_capacity &&
-	    !busiest->group_has_free_capacity)
+	if (env->idle == CPU_NEWLY_IDLE &&
+			group_has_free_capacity(local, env) &&
+			busiest->group_out_of_capacity)
 		goto force_balance;
 
 	/*
@@ -6519,7 +6500,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 	int i;
 
 	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
-		unsigned long capacity, capacity_factor, wl;
+		unsigned long capacity, wl;
 		enum fbq_type rt;
 
 		rq = cpu_rq(i);
@@ -6548,9 +6529,6 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 			continue;
 
 		capacity = capacity_of(i);
-		capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE);
-		if (!capacity_factor)
-			capacity_factor = fix_small_capacity(env->sd, group);
 
 		wl = weighted_cpuload(i);
 
@@ -6558,7 +6536,10 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 		 * When comparing with imbalance, use weighted_cpuload()
 		 * which is not scaled with the cpu capacity.
 		 */
-		if (capacity_factor && rq->nr_running == 1 && wl > env->imbalance)
+
+		if (rq->nr_running == 1 && wl > env->imbalance &&
+		    ((capacity * env->sd->imbalance_pct) >=
+				(rq->cpu_capacity_orig * 100)))
 			continue;
 
 		/*
-- 
1.9.1

  parent reply	other threads:[~2014-08-26 11:06 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-26 11:06 [PATCH v5 00/12] sched: consolidation of cpu_capacity Vincent Guittot
2014-08-26 11:06 ` [PATCH v5 01/12] sched: fix imbalance flag reset Vincent Guittot
2014-08-26 11:06 ` [PATCH v5 02/12] sched: remove a wake_affine condition Vincent Guittot
2014-08-26 11:06 ` [PATCH v5 03/12] sched: fix avg_load computation Vincent Guittot
2014-08-30 12:00   ` Preeti U Murthy
2014-09-03 11:09     ` Vincent Guittot
2014-09-03 23:43       ` Tim Chen
2014-09-04  7:17         ` Vincent Guittot
2014-09-04 16:26           ` Tim Chen
2014-09-05 11:10   ` Preeti U Murthy
2014-08-26 11:06 ` [PATCH v5 04/12] sched: Allow all archs to set the capacity_orig Vincent Guittot
2014-08-27 13:12   ` Kamalesh Babulal
2014-08-30 17:07   ` Preeti U Murthy
2014-09-01  8:05     ` Vincent Guittot
2014-09-03  8:41       ` Preeti U Murthy
2014-09-10 13:50     ` Peter Zijlstra
2014-09-10 14:22       ` Vincent Guittot
2014-09-11 10:36       ` Preeti U Murthy
2014-08-26 11:06 ` [PATCH v5 05/12] ARM: topology: use new cpu_capacity interface Vincent Guittot
2014-09-11 18:52   ` Nicolas Pitre
2014-08-26 11:06 ` [PATCH v5 06/12] sched: add per rq cpu_capacity_orig Vincent Guittot
2014-08-27 13:32   ` Kamalesh Babulal
2014-08-28  7:34     ` Vincent Guittot
2014-09-10 13:53   ` Peter Zijlstra
2014-09-10 14:19     ` Vincent Guittot
2014-09-11 19:02   ` Nicolas Pitre
2014-09-15 21:22     ` Vincent Guittot
2014-08-26 11:06 ` [PATCH v5 07/12] sched: test the cpu's capacity in wake affine Vincent Guittot
2014-09-10 14:19   ` Peter Zijlstra
2014-08-26 11:06 ` [PATCH v5 08/12] sched: move cfs task on a CPU with higher capacity Vincent Guittot
2014-08-30 17:50   ` Preeti U Murthy
2014-09-01  8:45     ` Vincent Guittot
2014-09-03  9:11       ` Preeti U Murthy
2014-09-03 11:44         ` Vincent Guittot
2014-09-03 12:26           ` Preeti U Murthy
2014-09-03 12:49             ` Vincent Guittot
2014-09-11  9:27             ` Peter Zijlstra
2014-09-05 12:06   ` Preeti U Murthy
2014-09-05 12:24     ` Vincent Guittot
2014-09-11 10:07   ` Peter Zijlstra
2014-09-11 11:20     ` Vincent Guittot
2014-09-11 10:13   ` Peter Zijlstra
2014-09-11 12:14     ` Vincent Guittot
2014-09-11 11:54   ` Peter Zijlstra
2014-08-26 11:06 ` [PATCH v5 09/12] sched: add usage_load_avg Vincent Guittot
2014-09-04  7:34   ` [PATCH v5 09/11] " Vincent Guittot
2014-09-11 11:17     ` Peter Zijlstra
2014-09-11 11:17   ` [PATCH v5 09/12] " Peter Zijlstra
2014-09-11 12:18     ` Vincent Guittot
2014-09-11 12:20     ` Vincent Guittot
2014-09-15 19:15   ` Morten Rasmussen
2014-09-15 22:33     ` Vincent Guittot
2014-08-26 11:06 ` [PATCH v5 10/12] sched: get CPU's utilization statistic Vincent Guittot
2014-09-11 12:34   ` Peter Zijlstra
2014-09-11 13:07     ` Vincent Guittot
2014-09-11 14:04       ` Peter Zijlstra
2014-09-11 19:17         ` Nicolas Pitre
2014-09-12  7:41           ` Vincent Guittot
2014-09-15 19:45         ` Morten Rasmussen
2014-09-16 22:43           ` Vincent Guittot
2014-09-15 19:28     ` Morten Rasmussen
2014-08-26 11:06 ` Vincent Guittot [this message]
2014-09-11 15:39   ` [PATCH v5 11/12] sched: replace capacity_factor by utilization Peter Zijlstra
2014-09-11 16:15   ` Peter Zijlstra
2014-09-11 17:26     ` Vincent Guittot
2014-09-14 19:41       ` Peter Zijlstra
2014-09-14 19:51         ` Peter Zijlstra
2014-09-15 11:42         ` Peter Zijlstra
2014-09-15 19:07           ` Nicolas Pitre
2014-09-15 20:01             ` Peter Zijlstra
2014-09-17 18:45               ` Morten Rasmussen
2014-09-17 18:58                 ` Morten Rasmussen
2014-09-17 23:03                 ` Peter Zijlstra
2014-09-15 22:14           ` Vincent Guittot
2014-09-15 22:18             ` Vincent Guittot
2014-09-17 22:25             ` Peter Zijlstra
2014-09-18  1:32               ` Vincent Guittot
2014-09-16 17:00         ` Dietmar Eggemann
2014-08-26 11:06 ` [PATCH v5 12/12] sched: add SD_PREFER_SIBLING for SMT level Vincent Guittot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409051215-16788-12-git-send-email-vincent.guittot@linaro.org \
    --to=vincent.guittot@linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).