All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@techsingularity.net>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Aubrey Li <aubrey.li@linux.intel.com>,
	Barry Song <song.bao.hua@hisilicon.com>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Ziljstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Valentin Schneider <valentin.schneider@arm.com>,
	Linux-ARM <linux-arm-kernel@lists.infradead.org>,
	Mel Gorman <mgorman@techsingularity.net>
Subject: [PATCH 07/10] sched/fair: Account for the idle cpu/smt search cost
Date: Thu,  3 Dec 2020 14:11:21 +0000	[thread overview]
Message-ID: <20201203141124.7391-8-mgorman@techsingularity.net> (raw)
In-Reply-To: <20201203141124.7391-1-mgorman@techsingularity.net>

select_idle_cpu() accounts average search cost for the purposes of
conducting a limited proportional search if SIS_PROP is enabled. The issue
is that select_idle_cpu() does not account for the cost if a candidate
is found and select_idle_smt() is ignored.

This patch moves the accounting of avg_cost to cover the cpu/smt search
costs. select_idle_core() costs could be accounted for but it has its
own throttling mechanism by tracking depending on whether idle cores are
expected to exist.

This patch is a bisection hazard becuse SIS_PROP and how it balances
avg_cost vs avg_idle was probably guided by the fact that avg_cost was
not always accounted for.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 kernel/sched/fair.c | 82 +++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1d8f5c4b4936..185fc6e28f8e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6006,6 +6006,29 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 	return new_cpu;
 }
 
+static int sis_search_depth(struct sched_domain *sd, struct sched_domain *this_sd)
+{
+	u64 avg_cost, avg_idle, span_avg;
+	int nr = INT_MAX;
+
+	if (sched_feat(SIS_PROP)) {
+		/*
+		 * Due to large variance we need a large fuzz factor; hackbench in
+		 * particularly is sensitive here.
+		 */
+		avg_idle = this_rq()->avg_idle / 512;
+		avg_cost = this_sd->avg_scan_cost + 1;
+
+		span_avg = sd->span_weight * avg_idle;
+		if (span_avg > 4*avg_cost)
+			nr = div_u64(span_avg, avg_cost);
+		else
+			nr = 4;
+	}
+
+	return nr;
+}
+
 #ifdef CONFIG_SCHED_SMT
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
 EXPORT_SYMBOL_GPL(sched_smt_present);
@@ -6151,35 +6174,11 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
  * average idle time for this rq (as found in rq->avg_idle).
  */
-static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
+static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd,
+							int target, int nr)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	struct sched_domain *this_sd;
-	u64 avg_cost, avg_idle;
-	u64 time;
-	int this = smp_processor_id();
-	int cpu, nr = INT_MAX;
-
-	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
-	if (!this_sd)
-		return -1;
-
-	/*
-	 * Due to large variance we need a large fuzz factor; hackbench in
-	 * particularly is sensitive here.
-	 */
-	avg_idle = this_rq()->avg_idle / 512;
-	avg_cost = this_sd->avg_scan_cost + 1;
-
-	if (sched_feat(SIS_PROP)) {
-		u64 span_avg = sd->span_weight * avg_idle;
-		if (span_avg > 4*avg_cost)
-			nr = div_u64(span_avg, avg_cost);
-		else
-			nr = 4;
-	}
-
-	time = cpu_clock(this);
+	int cpu;
 
 	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
 	__cpumask_clear_cpu(target, cpus);
@@ -6192,9 +6191,6 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 			break;
 	}
 
-	time = cpu_clock(this) - time;
-	update_avg(&this_sd->avg_scan_cost, time);
-
 	return cpu;
 }
 
@@ -6245,9 +6241,10 @@ static inline bool asym_fits_capacity(int task_util, int cpu)
  */
 static int select_idle_sibling(struct task_struct *p, int prev, int target)
 {
-	struct sched_domain *sd;
+	struct sched_domain *sd, *this_sd;
 	unsigned long task_util;
-	int i, recent_used_cpu;
+	int i, recent_used_cpu, depth;
+	u64 time;
 
 	schedstat_inc(this_rq()->sis_search);
 
@@ -6337,21 +6334,34 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	if (!sd)
 		return target;
 
+	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
+	if (!this_sd)
+		return target;
+
+	depth = sis_search_depth(sd, this_sd);
+
 	schedstat_inc(this_rq()->sis_domain_search);
 	i = select_idle_core(p, sd, target);
 	if ((unsigned)i < nr_cpumask_bits)
 		return i;
 
-	i = select_idle_cpu(p, sd, target);
+	time = cpu_clock(smp_processor_id());
+	i = select_idle_cpu(p, sd, target, depth);
 	if ((unsigned)i < nr_cpumask_bits)
-		return i;
+		goto acct_cost;
 
 	i = select_idle_smt(p, sd, target);
 	if ((unsigned)i < nr_cpumask_bits)
-		return i;
+		goto acct_cost;
 
 	schedstat_inc(this_rq()->sis_failed);
-	return target;
+	i = target;
+
+acct_cost:
+	time = cpu_clock(smp_processor_id()) - time;
+	update_avg(&this_sd->avg_scan_cost, time);
+
+	return i;
 }
 
 /**
-- 
2.26.2


WARNING: multiple messages have this Message-ID (diff)
From: Mel Gorman <mgorman@techsingularity.net>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Barry Song <song.bao.hua@hisilicon.com>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Peter Ziljstra <peterz@infradead.org>,
	Aubrey Li <aubrey.li@linux.intel.com>,
	Ingo Molnar <mingo@redhat.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Valentin Schneider <valentin.schneider@arm.com>,
	Linux-ARM <linux-arm-kernel@lists.infradead.org>
Subject: [PATCH 07/10] sched/fair: Account for the idle cpu/smt search cost
Date: Thu,  3 Dec 2020 14:11:21 +0000	[thread overview]
Message-ID: <20201203141124.7391-8-mgorman@techsingularity.net> (raw)
In-Reply-To: <20201203141124.7391-1-mgorman@techsingularity.net>

select_idle_cpu() accounts average search cost for the purposes of
conducting a limited proportional search if SIS_PROP is enabled. The issue
is that select_idle_cpu() does not account for the cost if a candidate
is found and select_idle_smt() is ignored.

This patch moves the accounting of avg_cost to cover the cpu/smt search
costs. select_idle_core() costs could be accounted for but it has its
own throttling mechanism by tracking depending on whether idle cores are
expected to exist.

This patch is a bisection hazard becuse SIS_PROP and how it balances
avg_cost vs avg_idle was probably guided by the fact that avg_cost was
not always accounted for.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 kernel/sched/fair.c | 82 +++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1d8f5c4b4936..185fc6e28f8e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6006,6 +6006,29 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 	return new_cpu;
 }
 
+static int sis_search_depth(struct sched_domain *sd, struct sched_domain *this_sd)
+{
+	u64 avg_cost, avg_idle, span_avg;
+	int nr = INT_MAX;
+
+	if (sched_feat(SIS_PROP)) {
+		/*
+		 * Due to large variance we need a large fuzz factor; hackbench in
+		 * particularly is sensitive here.
+		 */
+		avg_idle = this_rq()->avg_idle / 512;
+		avg_cost = this_sd->avg_scan_cost + 1;
+
+		span_avg = sd->span_weight * avg_idle;
+		if (span_avg > 4*avg_cost)
+			nr = div_u64(span_avg, avg_cost);
+		else
+			nr = 4;
+	}
+
+	return nr;
+}
+
 #ifdef CONFIG_SCHED_SMT
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
 EXPORT_SYMBOL_GPL(sched_smt_present);
@@ -6151,35 +6174,11 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
  * average idle time for this rq (as found in rq->avg_idle).
  */
-static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
+static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd,
+							int target, int nr)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	struct sched_domain *this_sd;
-	u64 avg_cost, avg_idle;
-	u64 time;
-	int this = smp_processor_id();
-	int cpu, nr = INT_MAX;
-
-	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
-	if (!this_sd)
-		return -1;
-
-	/*
-	 * Due to large variance we need a large fuzz factor; hackbench in
-	 * particularly is sensitive here.
-	 */
-	avg_idle = this_rq()->avg_idle / 512;
-	avg_cost = this_sd->avg_scan_cost + 1;
-
-	if (sched_feat(SIS_PROP)) {
-		u64 span_avg = sd->span_weight * avg_idle;
-		if (span_avg > 4*avg_cost)
-			nr = div_u64(span_avg, avg_cost);
-		else
-			nr = 4;
-	}
-
-	time = cpu_clock(this);
+	int cpu;
 
 	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
 	__cpumask_clear_cpu(target, cpus);
@@ -6192,9 +6191,6 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 			break;
 	}
 
-	time = cpu_clock(this) - time;
-	update_avg(&this_sd->avg_scan_cost, time);
-
 	return cpu;
 }
 
@@ -6245,9 +6241,10 @@ static inline bool asym_fits_capacity(int task_util, int cpu)
  */
 static int select_idle_sibling(struct task_struct *p, int prev, int target)
 {
-	struct sched_domain *sd;
+	struct sched_domain *sd, *this_sd;
 	unsigned long task_util;
-	int i, recent_used_cpu;
+	int i, recent_used_cpu, depth;
+	u64 time;
 
 	schedstat_inc(this_rq()->sis_search);
 
@@ -6337,21 +6334,34 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	if (!sd)
 		return target;
 
+	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
+	if (!this_sd)
+		return target;
+
+	depth = sis_search_depth(sd, this_sd);
+
 	schedstat_inc(this_rq()->sis_domain_search);
 	i = select_idle_core(p, sd, target);
 	if ((unsigned)i < nr_cpumask_bits)
 		return i;
 
-	i = select_idle_cpu(p, sd, target);
+	time = cpu_clock(smp_processor_id());
+	i = select_idle_cpu(p, sd, target, depth);
 	if ((unsigned)i < nr_cpumask_bits)
-		return i;
+		goto acct_cost;
 
 	i = select_idle_smt(p, sd, target);
 	if ((unsigned)i < nr_cpumask_bits)
-		return i;
+		goto acct_cost;
 
 	schedstat_inc(this_rq()->sis_failed);
-	return target;
+	i = target;
+
+acct_cost:
+	time = cpu_clock(smp_processor_id()) - time;
+	update_avg(&this_sd->avg_scan_cost, time);
+
+	return i;
 }
 
 /**
-- 
2.26.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2020-12-03 14:12 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-03 14:11 [RFC PATCH 00/10] Reduce time complexity of select_idle_sibling Mel Gorman
2020-12-03 14:11 ` Mel Gorman
2020-12-03 14:11 ` [PATCH 01/10] sched/fair: Track efficiency " Mel Gorman
2020-12-03 14:11   ` Mel Gorman
2020-12-03 14:11 ` [PATCH 02/10] sched/fair: Track efficiency of task recent_used_cpu Mel Gorman
2020-12-03 14:11   ` Mel Gorman
2020-12-03 14:11 ` [PATCH 03/10] sched/fair: Remove SIS_AVG_CPU Mel Gorman
2020-12-03 14:11   ` Mel Gorman
2020-12-03 14:11 ` [PATCH 04/10] sched/fair: Return an idle cpu if one is found after a failed search for an idle core Mel Gorman
2020-12-03 14:11   ` Mel Gorman
2020-12-03 16:35   ` Vincent Guittot
2020-12-03 16:35     ` Vincent Guittot
2020-12-03 17:50     ` Mel Gorman
2020-12-03 17:50       ` Mel Gorman
2020-12-03 14:11 ` [PATCH 05/10] sched/fair: Do not replace recent_used_cpu with the new target Mel Gorman
2020-12-03 14:11   ` Mel Gorman
2020-12-03 14:11 ` [PATCH 06/10] sched/fair: Clear the target CPU from the cpumask of CPUs searched Mel Gorman
2020-12-03 14:11   ` Mel Gorman
2020-12-03 16:38   ` Vincent Guittot
2020-12-03 16:38     ` Vincent Guittot
2020-12-03 17:52     ` Mel Gorman
2020-12-03 17:52       ` Mel Gorman
2020-12-04 10:56       ` Vincent Guittot
2020-12-04 10:56         ` Vincent Guittot
2020-12-04 11:30         ` Mel Gorman
2020-12-04 11:30           ` Mel Gorman
2020-12-04 13:13           ` Vincent Guittot
2020-12-04 13:13             ` Vincent Guittot
2020-12-04 13:17             ` Vincent Guittot
2020-12-04 13:17               ` Vincent Guittot
2020-12-04 13:40               ` Li, Aubrey
2020-12-04 13:40                 ` Li, Aubrey
2020-12-04 13:47                 ` Li, Aubrey
2020-12-04 13:47                   ` Li, Aubrey
2020-12-04 13:47                 ` Vincent Guittot
2020-12-04 13:47                   ` Vincent Guittot
2020-12-04 14:07                   ` Li, Aubrey
2020-12-04 14:07                     ` Li, Aubrey
2020-12-04 14:31                   ` Mel Gorman
2020-12-04 14:31                     ` Mel Gorman
2020-12-04 15:23                     ` Vincent Guittot
2020-12-04 15:23                       ` Vincent Guittot
2020-12-04 15:40                       ` Mel Gorman
2020-12-04 15:40                         ` Mel Gorman
2020-12-04 15:43                         ` Vincent Guittot
2020-12-04 15:43                           ` Vincent Guittot
2020-12-04 18:41                           ` Mel Gorman
2020-12-04 18:41                             ` Mel Gorman
2020-12-04 14:27               ` Mel Gorman
2020-12-04 14:27                 ` Mel Gorman
2020-12-03 14:11 ` Mel Gorman [this message]
2020-12-03 14:11   ` [PATCH 07/10] sched/fair: Account for the idle cpu/smt search cost Mel Gorman
2020-12-03 14:11 ` [PATCH 08/10] sched/fair: Reintroduce SIS_AVG_CPU but in the context of SIS_PROP to reduce search depth Mel Gorman
2020-12-03 14:11   ` Mel Gorman
2020-12-03 14:11 ` [PATCH 09/10] sched/fair: Limit the search for an idle core Mel Gorman
2020-12-03 14:11   ` Mel Gorman
2020-12-03 14:19 ` Mel Gorman
2020-12-03 14:19   ` Mel Gorman
2020-12-03 14:20 ` [PATCH 10/10] sched/fair: Avoid revisiting CPUs multiple times during select_idle_sibling Mel Gorman
2020-12-03 14:20   ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201203141124.7391-8-mgorman@techsingularity.net \
    --to=mgorman@techsingularity.net \
    --cc=aubrey.li@linux.intel.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=song.bao.hua@hisilicon.com \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.