All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched/fair: Introduce priority load balance to reduce interference from IDLE tasks
@ 2022-08-09 13:29 zhangsong
  2022-08-09 16:14 ` kernel test robot
  0 siblings, 1 reply; 2+ messages in thread
From: zhangsong @ 2022-08-09 13:29 UTC (permalink / raw)
  To: mingo, peterz, juri.lelli, vincent.guittot
  Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
	linux-kernel, zhangsong

For co-location with NORMAL and IDLE tasks, when CFS trigger load balance,
it is reasonable to prefer migrating NORMAL(Latency Sensitive) tasks from
the busy src CPU to dst CPU, and migrating IDLE tasks lastly.

This is very important for reducing interference from IDLE tasks.
So the CFS load balance can be optimized to below:

1.`cfs_tasks` list of CPU rq is owned by NORMAL tasks.
2.`cfs_idle_tasks` list of CPU rq which is owned by IDLE tasks.
3.Prefer to migrate NORMAL tasks of cfs_tasks to dst CPU.
4.Lastly migrate IDLE tasks of cfs_idle_tasks to dst CPU.

This was tested with the following reproduction:
- small number of NORMAL tasks colocated with a large number of IDLE tasks

With this patch, NORMAL tasks latency can be reduced
about 5~10% compared with current.

Signed-off-by: zhangsong <zhangsong34@huawei.com>
---
 kernel/sched/core.c  |  1 +
 kernel/sched/fair.c  | 43 +++++++++++++++++++++++++++++++++++++++----
 kernel/sched/sched.h |  1 +
 3 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ee28253c9ac0..7325c6e552d8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9733,6 +9733,7 @@ void __init sched_init(void)
 		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 
 		INIT_LIST_HEAD(&rq->cfs_tasks);
+		INIT_LIST_HEAD(&rq->cfs_idle_tasks);
 
 		rq_attach_root(rq, &def_root_domain);
 #ifdef CONFIG_NO_HZ_COMMON
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 914096c5b1ae..5860d34a8a52 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3034,6 +3034,19 @@ static inline void update_scan_period(struct task_struct *p, int new_cpu)
 
 #endif /* CONFIG_NUMA_BALANCING */
 
+static void
+adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *),
+	struct rq *rq,
+	struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	if (task_has_idle_policy(task_of(se)) || tg_is_idle(cfs_rq->tg))
+		(*list_op)(&se->group_node, &rq->cfs_idle_tasks);
+	else
+		(*list_op)(&se->group_node, &rq->cfs_tasks);
+}
+
 static void
 account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
@@ -3043,7 +3056,7 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		struct rq *rq = rq_of(cfs_rq);
 
 		account_numa_enqueue(rq, task_of(se));
-		list_add(&se->group_node, &rq->cfs_tasks);
+		adjust_rq_cfs_tasks(list_add, rq, se);
 	}
 #endif
 	cfs_rq->nr_running++;
@@ -7465,7 +7478,7 @@ done: __maybe_unused;
 	 * the list, so our cfs_tasks list becomes MRU
 	 * one.
 	 */
-	list_move(&p->se.group_node, &rq->cfs_tasks);
+	adjust_rq_cfs_tasks(list_move, rq, &p->se);
 #endif
 
 	if (hrtick_enabled_fair(rq))
@@ -7788,6 +7801,9 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
 	if (unlikely(task_has_idle_policy(p)))
 		return 0;
 
+	if (tg_is_idle(cfs_rq_of(&p->se)->tg))
+		return 0;
+
 	/* SMT siblings share cache */
 	if (env->sd->flags & SD_SHARE_CPUCAPACITY)
 		return 0;
@@ -7800,6 +7816,11 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
 			 &p->se == cfs_rq_of(&p->se)->last))
 		return 1;
 
+	/* Preempt sched idle cpu do not consider migration cost */
+	if (cpus_share_cache(env->src_cpu, env->dst_cpu) &&
+	    sched_idle_cpu(env->dst_cpu))
+		return 0;
+
 	if (sysctl_sched_migration_cost == -1)
 		return 1;
 
@@ -7990,11 +8011,14 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
 static struct task_struct *detach_one_task(struct lb_env *env)
 {
 	struct task_struct *p;
+	struct list_head *tasks = &env->src_rq->cfs_tasks;
+	int loop = 0;
 
 	lockdep_assert_rq_held(env->src_rq);
 
+again:
 	list_for_each_entry_reverse(p,
-			&env->src_rq->cfs_tasks, se.group_node) {
+			tasks, se.group_node) {
 		if (!can_migrate_task(p, env))
 			continue;
 
@@ -8009,6 +8033,10 @@ static struct task_struct *detach_one_task(struct lb_env *env)
 		schedstat_inc(env->sd->lb_gained[env->idle]);
 		return p;
 	}
+	if (++loop == 1) {
+		tasks = &env->src_rq->cfs_idle_tasks;
+		goto again;
+	}
 	return NULL;
 }
 
@@ -8026,6 +8054,7 @@ static int detach_tasks(struct lb_env *env)
 	unsigned long util, load;
 	struct task_struct *p;
 	int detached = 0;
+	int loop = 0;
 
 	lockdep_assert_rq_held(env->src_rq);
 
@@ -8041,6 +8070,7 @@ static int detach_tasks(struct lb_env *env)
 	if (env->imbalance <= 0)
 		return 0;
 
+again:
 	while (!list_empty(tasks)) {
 		/*
 		 * We don't want to steal all, otherwise we may be treated likewise,
@@ -8142,6 +8172,11 @@ static int detach_tasks(struct lb_env *env)
 		list_move(&p->se.group_node, tasks);
 	}
 
+	if (env->imbalance > 0 && ++loop == 1) {
+		tasks = &env->src_rq->cfs_idle_tasks;
+		goto again;
+	}
+
 	/*
 	 * Right now, this is one of only two places we collect this stat
 	 * so we can safely collect detach_one_task() stats here rather
@@ -11643,7 +11678,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
 		 * Move the next running task to the front of the list, so our
 		 * cfs_tasks list becomes MRU one.
 		 */
-		list_move(&se->group_node, &rq->cfs_tasks);
+		adjust_rq_cfs_tasks(list_move, rq, se);
 	}
 #endif
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e26688d387ae..accb4eea9769 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1068,6 +1068,7 @@ struct rq {
 	int			online;
 
 	struct list_head cfs_tasks;
+	struct list_head cfs_idle_tasks;
 
 	struct sched_avg	avg_rt;
 	struct sched_avg	avg_dl;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] sched/fair: Introduce priority load balance to reduce interference from IDLE tasks
  2022-08-09 13:29 [PATCH] sched/fair: Introduce priority load balance to reduce interference from IDLE tasks zhangsong
@ 2022-08-09 16:14 ` kernel test robot
  0 siblings, 0 replies; 2+ messages in thread
From: kernel test robot @ 2022-08-09 16:14 UTC (permalink / raw)
  To: zhangsong, mingo, peterz, juri.lelli, vincent.guittot
  Cc: kbuild-all, dietmar.eggemann, rostedt, bsegall, mgorman, bristot,
	vschneid, linux-kernel, zhangsong

Hi zhangsong,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on linus/master v5.19 next-20220809]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/zhangsong/sched-fair-Introduce-priority-load-balance-to-reduce-interference-from-IDLE-tasks/20220809-213204
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 8648f92a66a323ed01903d2cbb248cdbe2f312d9
config: um-x86_64_defconfig (https://download.01.org/0day-ci/archive/20220810/202208100012.psioE872-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-3) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/20ac252fc4280e5b1a45070d722c7edc0695088b
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review zhangsong/sched-fair-Introduce-priority-load-balance-to-reduce-interference-from-IDLE-tasks/20220809-213204
        git checkout 20ac252fc4280e5b1a45070d722c7edc0695088b
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=um SUBARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   kernel/sched/fair.c:672:5: warning: no previous prototype for 'sched_update_scaling' [-Wmissing-prototypes]
     672 | int sched_update_scaling(void)
         |     ^~~~~~~~~~~~~~~~~~~~
   kernel/sched/fair.c: In function 'adjust_rq_cfs_tasks':
>> kernel/sched/fair.c:3045:48: error: 'struct rq' has no member named 'cfs_idle_tasks'
    3045 |                 (*list_op)(&se->group_node, &rq->cfs_idle_tasks);
         |                                                ^~
>> kernel/sched/fair.c:3047:48: error: 'struct rq' has no member named 'cfs_tasks'
    3047 |                 (*list_op)(&se->group_node, &rq->cfs_tasks);
         |                                                ^~
   At top level:
   kernel/sched/fair.c:3038:1: warning: 'adjust_rq_cfs_tasks' defined but not used [-Wunused-function]
    3038 | adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *),
         | ^~~~~~~~~~~~~~~~~~~


vim +3045 kernel/sched/fair.c

  3036	
  3037	static void
  3038	adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *),
  3039		struct rq *rq,
  3040		struct sched_entity *se)
  3041	{
  3042		struct cfs_rq *cfs_rq = cfs_rq_of(se);
  3043	
  3044		if (task_has_idle_policy(task_of(se)) || tg_is_idle(cfs_rq->tg))
> 3045			(*list_op)(&se->group_node, &rq->cfs_idle_tasks);
  3046		else
> 3047			(*list_op)(&se->group_node, &rq->cfs_tasks);
  3048	}
  3049	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-08-09 16:15 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-09 13:29 [PATCH] sched/fair: Introduce priority load balance to reduce interference from IDLE tasks zhangsong
2022-08-09 16:14 ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.