* [PATCH V2] sched/fair: Make sched-idle CPU selection consistent throughout
@ 2019-11-14 10:49 Viresh Kumar
2019-11-15 8:42 ` Vincent Guittot
0 siblings, 1 reply; 2+ messages in thread
From: Viresh Kumar @ 2019-11-14 10:49 UTC (permalink / raw)
To: Ingo Molnar, Peter Zijlstra, Vincent Guittot, Juri Lelli,
Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman
Cc: Viresh Kumar, linux-kernel, Parth Shah
There are instances where we keep searching for an idle CPU despite
already having a sched-idle CPU (in find_idlest_group_cpu(),
select_idle_smt() and select_idle_cpu() and then there are places where
we don't necessarily do that and return a sched-idle CPU as soon as we
find one (in select_idle_sibling()). This looks a bit inconsistent and
it may be worth having the same policy everywhere.
On the other hand, choosing a sched-idle CPU over a idle one shall be
beneficial from performance and power point of view as well, as we don't
need to get the CPU online from a deep idle state which wastes quite a
lot of time and energy and delays the scheduling of the newly woken up
task.
This patch tries to simplify code around sched-idle CPU selection and
make it consistent throughout.
Testing is done with the help of rt-app on hikey board (ARM64 octa-core,
2 clusters, 0-3 and 4-7). The cpufreq governor was set to performance to
avoid any side affects from CPU frequency. Following are the tests
performed:
Test 1: 1-cfs-task:
A single SCHED_NORMAL task is pinned to CPU5 which runs for 2333 us
out of 7777 us (so gives time for the cluster to go in deep idle
state).
Test 2: 1-cfs-1-idle-task:
A single SCHED_NORMAL task is pinned on CPU5 and single SCHED_IDLE
task is pinned on CPU6 (to make sure cluster 1 doesn't go in deep idle
state).
Test 3: 1-cfs-8-idle-task:
A single SCHED_NORMAL task is pinned on CPU5 and eight SCHED_IDLE
tasks are created which run forever (not pinned anywhere, so they run
on all CPUs). Checked with kernelshark that as soon as NORMAL task
sleeps, the SCHED_IDLE task starts running on CPU5.
And here are the results on mean latency (in us), using the "st" tool.
$ st 1-cfs-task/rt-app-cfs_thread-0.log
N min max sum mean stddev
642 90 592 197180 307.134 109.906
$ st 1-cfs-1-idle-task/rt-app-cfs_thread-0.log
N min max sum mean stddev
642 67 311 113850 177.336 41.4251
$ st 1-cfs-8-idle-task/rt-app-cfs_thread-0.log
N min max sum mean stddev
643 29 173 41364 64.3297 13.2344
The mean latency when we need to:
- wakeup from deep idle state is 307 us.
- wakeup from shallow idle state is 177 us.
- preempt a SCHED_IDLE task is 64 us.
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
V1->V2:
- Updated commit log with the numbers received from rt-app tests.
kernel/sched/fair.c | 34 ++++++++++++----------------------
1 file changed, 12 insertions(+), 22 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a81c36472822..bb367f48c1ef 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5545,7 +5545,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
unsigned int min_exit_latency = UINT_MAX;
u64 latest_idle_timestamp = 0;
int least_loaded_cpu = this_cpu;
- int shallowest_idle_cpu = -1, si_cpu = -1;
+ int shallowest_idle_cpu = -1;
int i;
/* Check if we have any choice: */
@@ -5554,6 +5554,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+ if (sched_idle_cpu(i))
+ return i;
+
if (available_idle_cpu(i)) {
struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
@@ -5576,12 +5579,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
latest_idle_timestamp = rq->idle_stamp;
shallowest_idle_cpu = i;
}
- } else if (shallowest_idle_cpu == -1 && si_cpu == -1) {
- if (sched_idle_cpu(i)) {
- si_cpu = i;
- continue;
- }
-
+ } else if (shallowest_idle_cpu == -1) {
load = cpu_load(cpu_rq(i));
if (load < min_load) {
min_load = load;
@@ -5590,11 +5588,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
}
}
- if (shallowest_idle_cpu != -1)
- return shallowest_idle_cpu;
- if (si_cpu != -1)
- return si_cpu;
- return least_loaded_cpu;
+ return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
}
static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
@@ -5747,7 +5741,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
*/
static int select_idle_smt(struct task_struct *p, int target)
{
- int cpu, si_cpu = -1;
+ int cpu;
if (!static_branch_likely(&sched_smt_present))
return -1;
@@ -5755,13 +5749,11 @@ static int select_idle_smt(struct task_struct *p, int target)
for_each_cpu(cpu, cpu_smt_mask(target)) {
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
- if (available_idle_cpu(cpu))
+ if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
return cpu;
- if (si_cpu == -1 && sched_idle_cpu(cpu))
- si_cpu = cpu;
}
- return si_cpu;
+ return -1;
}
#else /* CONFIG_SCHED_SMT */
@@ -5790,7 +5782,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
u64 time, cost;
s64 delta;
int this = smp_processor_id();
- int cpu, nr = INT_MAX, si_cpu = -1;
+ int cpu, nr = INT_MAX;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
if (!this_sd)
@@ -5818,13 +5810,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
if (!--nr)
- return si_cpu;
+ return -1;
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
- if (available_idle_cpu(cpu))
+ if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
break;
- if (si_cpu == -1 && sched_idle_cpu(cpu))
- si_cpu = cpu;
}
time = cpu_clock(this) - time;
--
2.21.0.rc0.269.g1a574e7a288b
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH V2] sched/fair: Make sched-idle CPU selection consistent throughout
2019-11-14 10:49 [PATCH V2] sched/fair: Make sched-idle CPU selection consistent throughout Viresh Kumar
@ 2019-11-15 8:42 ` Vincent Guittot
0 siblings, 0 replies; 2+ messages in thread
From: Vincent Guittot @ 2019-11-15 8:42 UTC (permalink / raw)
To: Viresh Kumar
Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Dietmar Eggemann,
Steven Rostedt, Ben Segall, Mel Gorman, linux-kernel, Parth Shah
On Thu, 14 Nov 2019 at 11:49, Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> There are instances where we keep searching for an idle CPU despite
> already having a sched-idle CPU (in find_idlest_group_cpu(),
> select_idle_smt() and select_idle_cpu() and then there are places where
> we don't necessarily do that and return a sched-idle CPU as soon as we
> find one (in select_idle_sibling()). This looks a bit inconsistent and
> it may be worth having the same policy everywhere.
>
> On the other hand, choosing a sched-idle CPU over a idle one shall be
> beneficial from performance and power point of view as well, as we don't
> need to get the CPU online from a deep idle state which wastes quite a
> lot of time and energy and delays the scheduling of the newly woken up
> task.
>
> This patch tries to simplify code around sched-idle CPU selection and
> make it consistent throughout.
>
> Testing is done with the help of rt-app on hikey board (ARM64 octa-core,
> 2 clusters, 0-3 and 4-7). The cpufreq governor was set to performance to
> avoid any side affects from CPU frequency. Following are the tests
> performed:
>
> Test 1: 1-cfs-task:
>
> A single SCHED_NORMAL task is pinned to CPU5 which runs for 2333 us
> out of 7777 us (so gives time for the cluster to go in deep idle
> state).
>
> Test 2: 1-cfs-1-idle-task:
>
> A single SCHED_NORMAL task is pinned on CPU5 and single SCHED_IDLE
> task is pinned on CPU6 (to make sure cluster 1 doesn't go in deep idle
> state).
>
> Test 3: 1-cfs-8-idle-task:
>
> A single SCHED_NORMAL task is pinned on CPU5 and eight SCHED_IDLE
> tasks are created which run forever (not pinned anywhere, so they run
> on all CPUs). Checked with kernelshark that as soon as NORMAL task
> sleeps, the SCHED_IDLE task starts running on CPU5.
>
> And here are the results on mean latency (in us), using the "st" tool.
>
> $ st 1-cfs-task/rt-app-cfs_thread-0.log
> N min max sum mean stddev
> 642 90 592 197180 307.134 109.906
>
> $ st 1-cfs-1-idle-task/rt-app-cfs_thread-0.log
> N min max sum mean stddev
> 642 67 311 113850 177.336 41.4251
>
> $ st 1-cfs-8-idle-task/rt-app-cfs_thread-0.log
> N min max sum mean stddev
> 643 29 173 41364 64.3297 13.2344
>
> The mean latency when we need to:
> - wakeup from deep idle state is 307 us.
> - wakeup from shallow idle state is 177 us.
> - preempt a SCHED_IDLE task is 64 us.
Make sense to me
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
> V1->V2:
> - Updated commit log with the numbers received from rt-app tests.
>
> kernel/sched/fair.c | 34 ++++++++++++----------------------
> 1 file changed, 12 insertions(+), 22 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a81c36472822..bb367f48c1ef 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5545,7 +5545,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
> unsigned int min_exit_latency = UINT_MAX;
> u64 latest_idle_timestamp = 0;
> int least_loaded_cpu = this_cpu;
> - int shallowest_idle_cpu = -1, si_cpu = -1;
> + int shallowest_idle_cpu = -1;
> int i;
>
> /* Check if we have any choice: */
> @@ -5554,6 +5554,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
>
> /* Traverse only the allowed CPUs */
> for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
> + if (sched_idle_cpu(i))
> + return i;
> +
> if (available_idle_cpu(i)) {
> struct rq *rq = cpu_rq(i);
> struct cpuidle_state *idle = idle_get_state(rq);
> @@ -5576,12 +5579,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
> latest_idle_timestamp = rq->idle_stamp;
> shallowest_idle_cpu = i;
> }
> - } else if (shallowest_idle_cpu == -1 && si_cpu == -1) {
> - if (sched_idle_cpu(i)) {
> - si_cpu = i;
> - continue;
> - }
> -
> + } else if (shallowest_idle_cpu == -1) {
> load = cpu_load(cpu_rq(i));
> if (load < min_load) {
> min_load = load;
> @@ -5590,11 +5588,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
> }
> }
>
> - if (shallowest_idle_cpu != -1)
> - return shallowest_idle_cpu;
> - if (si_cpu != -1)
> - return si_cpu;
> - return least_loaded_cpu;
> + return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
> }
>
> static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
> @@ -5747,7 +5741,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
> */
> static int select_idle_smt(struct task_struct *p, int target)
> {
> - int cpu, si_cpu = -1;
> + int cpu;
>
> if (!static_branch_likely(&sched_smt_present))
> return -1;
> @@ -5755,13 +5749,11 @@ static int select_idle_smt(struct task_struct *p, int target)
> for_each_cpu(cpu, cpu_smt_mask(target)) {
> if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> continue;
> - if (available_idle_cpu(cpu))
> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> return cpu;
> - if (si_cpu == -1 && sched_idle_cpu(cpu))
> - si_cpu = cpu;
> }
>
> - return si_cpu;
> + return -1;
> }
>
> #else /* CONFIG_SCHED_SMT */
> @@ -5790,7 +5782,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> u64 time, cost;
> s64 delta;
> int this = smp_processor_id();
> - int cpu, nr = INT_MAX, si_cpu = -1;
> + int cpu, nr = INT_MAX;
>
> this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
> if (!this_sd)
> @@ -5818,13 +5810,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
>
> for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
> if (!--nr)
> - return si_cpu;
> + return -1;
> if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> continue;
> - if (available_idle_cpu(cpu))
> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> break;
> - if (si_cpu == -1 && sched_idle_cpu(cpu))
> - si_cpu = cpu;
> }
>
> time = cpu_clock(this) - time;
> --
> 2.21.0.rc0.269.g1a574e7a288b
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-11-15 8:42 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-14 10:49 [PATCH V2] sched/fair: Make sched-idle CPU selection consistent throughout Viresh Kumar
2019-11-15 8:42 ` Vincent Guittot
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).