linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@techsingularity.net>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Valentin Schneider <valentin.schneider@arm.com>,
	Aubrey Li <aubrey.li@linux.intel.com>,
	Mel Gorman <mgorman@techsingularity.net>
Subject: [PATCH 9/9] sched/core: Delete SIS_PROP and rely on the idle cpu mask
Date: Mon, 26 Jul 2021 11:22:47 +0100	[thread overview]
Message-ID: <20210726102247.21437-10-mgorman@techsingularity.net> (raw)
In-Reply-To: <20210726102247.21437-1-mgorman@techsingularity.net>

Now that there is an idle CPU mask that is approximately up to date, the
proportional scan depth can be removed and the scan depth is limited by
the estimated number of idle CPUs instead.

The plus side of this patch is that the time accounting overhead is gone.
The downside is that in some circumstances, this will scan more than
proportional scanning depending on whether an idle core is being scanned
or the accuracy of the idle CPU mask.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 kernel/sched/core.c     | 22 ++++----------
 kernel/sched/fair.c     | 65 ++---------------------------------------
 kernel/sched/features.h |  5 ----
 kernel/sched/sched.h    |  5 ----
 4 files changed, 8 insertions(+), 89 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2751614ce0cb..9fcf9d1ae21c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3333,9 +3333,6 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
 		if (rq->avg_idle > max)
 			rq->avg_idle = max;
 
-		rq->wake_stamp = jiffies;
-		rq->wake_avg_idle = rq->avg_idle / 2;
-
 		rq->idle_stamp = 0;
 	}
 #endif
@@ -8648,18 +8645,11 @@ int sched_cpu_activate(unsigned int cpu)
 	balance_push_set(cpu, false);
 
 #ifdef CONFIG_SCHED_SMT
-	do {
-		int weight = cpumask_weight(cpu_smt_mask(cpu));
-
-		if (weight > sched_smt_weight)
-			sched_smt_weight = weight;
-
-		/*
-		 * When going up, increment the number of cores with SMT present.
-		 */
-		if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-			static_branch_inc_cpuslocked(&sched_smt_present);
-	} while (0);
+	/*
+	 * When going up, increment the number of cores with SMT present.
+	 */
+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+		static_branch_inc_cpuslocked(&sched_smt_present);
 #endif
 	set_cpu_active(cpu, true);
 
@@ -9029,8 +9019,6 @@ void __init sched_init(void)
 		rq->online = 0;
 		rq->idle_stamp = 0;
 		rq->avg_idle = 2*sysctl_sched_migration_cost;
-		rq->wake_stamp = jiffies;
-		rq->wake_avg_idle = rq->avg_idle;
 		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 		rq->last_idle_state = 1;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fe87af2ccc80..70b6d840426a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6076,8 +6076,6 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p)
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
 EXPORT_SYMBOL_GPL(sched_smt_present);
 
-int __read_mostly sched_smt_weight = 1;
-
 static inline void set_idle_cores(int cpu, int val)
 {
 	struct sched_domain_shared *sds;
@@ -6196,8 +6194,6 @@ static inline bool test_idle_cores(int cpu, bool def)
 	return def;
 }
 
-#define sched_smt_weight 1
-
 static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
 {
 	return __select_idle_cpu(core, p);
@@ -6210,8 +6206,6 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
 
 #endif /* CONFIG_SCHED_SMT */
 
-#define sis_min_cores	2
-
 /*
  * Scan the LLC domain for idle CPUs; this is dynamically regulated by
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
@@ -6220,12 +6214,8 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	int i, cpu, idle_cpu = -1, nr = INT_MAX;
-	struct rq *this_rq = this_rq();
-	int this = smp_processor_id();
+	int i, cpu, idle_cpu = -1;
 	struct sched_domain *this_sd;
-	u64 time = 0;
-
 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
 	if (!this_sd)
 		return -1;
@@ -6237,69 +6227,20 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
 	 */
 	cpumask_and(cpus, sds_idle_cpus(sd->shared), p->cpus_ptr);
 
-	if (sched_feat(SIS_PROP)) {
-		u64 avg_cost, avg_idle, span_avg;
-		unsigned long now = jiffies;
-
-		/*
-		 * If we're busy, the assumption that the last idle period
-		 * predicts the future is flawed; age away the remaining
-		 * predicted idle time.
-		 */
-		if (unlikely(this_rq->wake_stamp < now)) {
-			while (this_rq->wake_stamp < now && this_rq->wake_avg_idle) {
-				this_rq->wake_stamp++;
-				this_rq->wake_avg_idle >>= 1;
-			}
-		}
-
-		avg_idle = this_rq->wake_avg_idle;
-		avg_cost = this_sd->avg_scan_cost + 1;
-
-		span_avg = sd->span_weight * avg_idle;
-		if (span_avg > sis_min_cores * avg_cost)
-			nr = div_u64(span_avg, avg_cost);
-		else
-			nr = sis_min_cores;
-
-		nr *= sched_smt_weight;
-		time = cpu_clock(this);
-	}
-
 	for_each_cpu_wrap(cpu, cpus, target + 1) {
 		if (has_idle_core) {
 			i = select_idle_core(p, cpu, cpus, &idle_cpu);
 			if ((unsigned int)i < nr_cpumask_bits)
 				break;
-
-			nr -= sched_smt_weight;
 		} else {
 			idle_cpu = __select_idle_cpu(cpu, p);
 			if ((unsigned int)idle_cpu < nr_cpumask_bits)
 				break;
-			nr--;
 		}
-
-		if (nr < 0)
-			break;
 	}
 
-	if ((unsigned int)idle_cpu < nr_cpumask_bits) {
-		if (has_idle_core)
-			set_idle_cores(target, false);
-
-		if (sched_feat(SIS_PROP)) {
-			time = cpu_clock(this) - time;
-
-			/*
-			 * Account for the scan cost of wakeups against the average
-			 * idle time.
-			 */
-			this_rq->wake_avg_idle -= min(this_rq->wake_avg_idle, time);
-
-			update_avg(&this_sd->avg_scan_cost, time);
-		}
-	}
+	if ((unsigned int)idle_cpu < nr_cpumask_bits && has_idle_core)
+		set_idle_cores(target, false);
 
 	return idle_cpu;
 }
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 7f8dace0964c..4bb29c830b9d 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -52,11 +52,6 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
  */
 SCHED_FEAT(TTWU_QUEUE, true)
 
-/*
- * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
- */
-SCHED_FEAT(SIS_PROP, true)
-
 /*
  * Issue a WARN when we do multiple update_rq_clock() calls
  * in a single rq->lock section. Default disabled because the
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2d6456fa15cb..35a0b591a2de 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1024,9 +1024,6 @@ struct rq {
 	u64			idle_stamp;
 	u64			avg_idle;
 
-	unsigned long		wake_stamp;
-	u64			wake_avg_idle;
-
 	/* This is used to determine avg_idle's max value */
 	u64			max_idle_balance_cost;
 
@@ -1352,8 +1349,6 @@ do {						\
 #ifdef CONFIG_SCHED_SMT
 extern void __update_idle_core(struct rq *rq);
 
-extern int sched_smt_weight;
-
 static inline void update_idle_core(struct rq *rq)
 {
 	if (static_branch_unlikely(&sched_smt_present))
-- 
2.26.2


      parent reply	other threads:[~2021-07-26 10:24 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-26 10:22 [RFC PATCH 0/9] Modify and/or delete SIS_PROP Mel Gorman
2021-07-26 10:22 ` [PATCH 1/9] sched/fair: Track efficiency of select_idle_sibling Mel Gorman
2021-07-26 10:22 ` [PATCH 2/9] sched/fair: Track efficiency of task recent_used_cpu Mel Gorman
2021-07-26 10:22 ` [PATCH 3/9] sched/fair: Track efficiency of select_idle_core Mel Gorman
2021-07-26 10:22 ` [PATCH 4/9] sched/fair: Use prev instead of new target as recent_used_cpu Mel Gorman
2021-07-26 10:22 ` [PATCH 5/9] sched/fair: Avoid a second scan of target in select_idle_cpu Mel Gorman
2021-07-26 10:22 ` [PATCH 6/9] sched/fair: Make select_idle_cpu() proportional to cores Mel Gorman
2021-07-26 10:22 ` [PATCH 7/9] sched/fair: Enforce proportional scan limits when scanning for an idle core Mel Gorman
2021-08-02 10:52   ` Song Bao Hua (Barry Song)
2021-08-04 10:22     ` Mel Gorman
2021-07-26 10:22 ` [PATCH 8/9] sched/fair: select idle cpu from idle cpumask for task wakeup Mel Gorman
2021-08-02 10:41   ` Song Bao Hua (Barry Song)
2021-08-04 10:26     ` Mel Gorman
2021-08-05  0:23       ` Aubrey Li
2021-09-17  3:44         ` Barry Song
2021-09-17  4:15         ` Barry Song
2021-09-17  9:11           ` Aubrey Li
2021-09-17 13:35             ` Mel Gorman
2021-07-26 10:22 ` Mel Gorman [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210726102247.21437-10-mgorman@techsingularity.net \
    --to=mgorman@techsingularity.net \
    --cc=aubrey.li@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).