[v4,05/11] sched/fair: use rq->nr_running when balancing load
diff mbox series

Message ID 1571405198-27570-6-git-send-email-vincent.guittot@linaro.org
State New, archived
Headers show
Series
  • sched/fair: rework the CFS load balance
Related show

Commit Message

Vincent Guittot Oct. 18, 2019, 1:26 p.m. UTC
cfs load_balance only takes care of CFS tasks whereas CPUs can be used by
other scheduling class. Typically, a CFS task preempted by a RT or deadline
task will not get a chance to be pulled on another CPU because the
load_balance doesn't take into account tasks from other classes.
Add sum of nr_running in the statistics and use it to detect such
situation.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 kernel/sched/fair.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

Comments

Mel Gorman Oct. 30, 2019, 3:54 p.m. UTC | #1
On Fri, Oct 18, 2019 at 03:26:32PM +0200, Vincent Guittot wrote:
> cfs load_balance only takes care of CFS tasks whereas CPUs can be used by
> other scheduling class. Typically, a CFS task preempted by a RT or deadline
> task will not get a chance to be pulled on another CPU because the
> load_balance doesn't take into account tasks from other classes.
> Add sum of nr_running in the statistics and use it to detect such
> situation.
> 
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

Patch is ok but it'll be easier in the future to mix up sum_nr_running
and sum_h_nr_running in the future. Might be best to make sum_nr_running
sum_any_running and the hierarchy one sum_cfs_running. I don't feel
strongly either way, because it's almost certainly due to the fact I
almost never care about non-cfs tasks when thinking about the scheduler.

Patch
diff mbox series

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5ae5281..e09fe12b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7704,6 +7704,7 @@  struct sg_lb_stats {
 	unsigned long group_load; /* Total load over the CPUs of the group */
 	unsigned long group_capacity;
 	unsigned long group_util; /* Total utilization of the group */
+	unsigned int sum_nr_running; /* Nr of tasks running in the group */
 	unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */
 	unsigned int idle_cpus;
 	unsigned int group_weight;
@@ -7938,7 +7939,7 @@  static inline int sg_imbalanced(struct sched_group *group)
 static inline bool
 group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
 {
-	if (sgs->sum_h_nr_running < sgs->group_weight)
+	if (sgs->sum_nr_running < sgs->group_weight)
 		return true;
 
 	if ((sgs->group_capacity * 100) >
@@ -7959,7 +7960,7 @@  group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
 static inline bool
 group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
 {
-	if (sgs->sum_h_nr_running <= sgs->group_weight)
+	if (sgs->sum_nr_running <= sgs->group_weight)
 		return false;
 
 	if ((sgs->group_capacity * 100) <
@@ -8063,6 +8064,8 @@  static inline void update_sg_lb_stats(struct lb_env *env,
 		sgs->sum_h_nr_running += rq->cfs.h_nr_running;
 
 		nr_running = rq->nr_running;
+		sgs->sum_nr_running += nr_running;
+
 		if (nr_running > 1)
 			*sg_status |= SG_OVERLOAD;
 
@@ -8420,13 +8423,13 @@  static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 		}
 
 		if (busiest->group_weight == 1 || sds->prefer_sibling) {
-			unsigned int nr_diff = busiest->sum_h_nr_running;
+			unsigned int nr_diff = busiest->sum_nr_running;
 			/*
 			 * When prefer sibling, evenly spread running tasks on
 			 * groups.
 			 */
 			env->migration_type = migrate_task;
-			lsub_positive(&nr_diff, local->sum_h_nr_running);
+			lsub_positive(&nr_diff, local->sum_nr_running);
 			env->imbalance = nr_diff >> 1;
 			return;
 		}
@@ -8590,7 +8593,7 @@  static struct sched_group *find_busiest_group(struct lb_env *env)
 
 	/* Try to move all excess tasks to child's sibling domain */
 	if (sds.prefer_sibling && local->group_type == group_has_spare &&
-	    busiest->sum_h_nr_running > local->sum_h_nr_running + 1)
+	    busiest->sum_nr_running > local->sum_nr_running + 1)
 		goto force_balance;
 
 	if (busiest->group_type != group_overloaded &&