All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: joel@joelfernandes.org, chris.hyser@oracle.com,
	joshdon@google.com, mingo@kernel.org, vincent.guittot@linaro.org,
	valentin.schneider@arm.com, mgorman@suse.de
Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, tglx@linutronix.de
Subject: [PATCH 05/19] sched: Core-wide rq->lock
Date: Thu, 22 Apr 2021 14:05:04 +0200	[thread overview]
Message-ID: <20210422123308.256677625@infradead.org> (raw)
In-Reply-To: 20210422120459.447350175@infradead.org

Introduce the basic infrastructure to have a core wide rq->lock.

This relies on the rq->__lock order being in increasing CPU number. It
is also constrained to SMT8 per lockdep (and SMT256 per preempt_count).

Luckily SMT8 is the max supported SMT count for Linux (Mips, Sparc and
Power are known to have this).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/Kconfig.preempt |    6 ++
 kernel/sched/core.c    |  139 +++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h   |   37 +++++++++++++
 3 files changed, 182 insertions(+)

--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -99,3 +99,9 @@ config PREEMPT_DYNAMIC
 
 	  Interesting if you want the same pre-built kernel should be used for
 	  both Server and Desktop workloads.
+
+config SCHED_CORE
+	bool "Core Scheduling for SMT"
+	default y
+	depends on SCHED_SMT
+
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -84,6 +84,103 @@ unsigned int sysctl_sched_rt_period = 10
 
 __read_mostly int scheduler_running;
 
+#ifdef CONFIG_SCHED_CORE
+
+DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
+
+/*
+ * Magic required such that:
+ *
+ *	raw_spin_rq_lock(rq);
+ *	...
+ *	raw_spin_rq_unlock(rq);
+ *
+ * ends up locking and unlocking the _same_ lock, and all CPUs
+ * always agree on what rq has what lock.
+ *
+ * XXX entirely possible to selectively enable cores, don't bother for now.
+ */
+
+static DEFINE_MUTEX(sched_core_mutex);
+static int sched_core_count;
+static struct cpumask sched_core_mask;
+
+static void __sched_core_flip(bool enabled)
+{
+	int cpu, t, i;
+
+	cpus_read_lock();
+
+	/*
+	 * Toggle the online cores, one by one.
+	 */
+	cpumask_copy(&sched_core_mask, cpu_online_mask);
+	for_each_cpu(cpu, &sched_core_mask) {
+		const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+
+		i = 0;
+		local_irq_disable();
+		for_each_cpu(t, smt_mask) {
+			/* supports up to SMT8 */
+			raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
+		}
+
+		for_each_cpu(t, smt_mask)
+			cpu_rq(t)->core_enabled = enabled;
+
+		for_each_cpu(t, smt_mask)
+			raw_spin_unlock(&cpu_rq(t)->__lock);
+		local_irq_enable();
+
+		cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask);
+	}
+
+	/*
+	 * Toggle the offline CPUs.
+	 */
+	cpumask_copy(&sched_core_mask, cpu_possible_mask);
+	cpumask_andnot(&sched_core_mask, &sched_core_mask, cpu_online_mask);
+
+	for_each_cpu(cpu, &sched_core_mask)
+		cpu_rq(cpu)->core_enabled = enabled;
+
+	cpus_read_unlock();
+}
+
+static void __sched_core_enable(void)
+{
+	// XXX verify there are no cookie tasks (yet)
+
+	static_branch_enable(&__sched_core_enabled);
+	__sched_core_flip(true);
+}
+
+static void __sched_core_disable(void)
+{
+	// XXX verify there are no cookie tasks (left)
+
+	__sched_core_flip(false);
+	static_branch_disable(&__sched_core_enabled);
+}
+
+void sched_core_get(void)
+{
+	mutex_lock(&sched_core_mutex);
+	if (!sched_core_count++)
+		__sched_core_enable();
+	mutex_unlock(&sched_core_mutex);
+}
+
+void sched_core_put(void)
+{
+	mutex_lock(&sched_core_mutex);
+	if (!--sched_core_count)
+		__sched_core_disable();
+	mutex_unlock(&sched_core_mutex);
+}
+
+#endif /* CONFIG_SCHED_CORE */
+
 /*
  * part of the period that we allow rt tasks to run in us.
  * default: 0.95s
@@ -5042,6 +5139,40 @@ pick_next_task(struct rq *rq, struct tas
 	BUG();
 }
 
+#ifdef CONFIG_SCHED_CORE
+
+static inline void sched_core_cpu_starting(unsigned int cpu)
+{
+	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+	struct rq *rq, *core_rq = NULL;
+	int i;
+
+	core_rq = cpu_rq(cpu)->core;
+
+	if (!core_rq) {
+		for_each_cpu(i, smt_mask) {
+			rq = cpu_rq(i);
+			if (rq->core && rq->core == rq)
+				core_rq = rq;
+		}
+
+		if (!core_rq)
+			core_rq = cpu_rq(cpu);
+
+		for_each_cpu(i, smt_mask) {
+			rq = cpu_rq(i);
+
+			WARN_ON_ONCE(rq->core && rq->core != core_rq);
+			rq->core = core_rq;
+		}
+	}
+}
+#else /* !CONFIG_SCHED_CORE */
+
+static inline void sched_core_cpu_starting(unsigned int cpu) {}
+
+#endif /* CONFIG_SCHED_CORE */
+
 /*
  * __schedule() is the main scheduler function.
  *
@@ -8006,6 +8137,7 @@ static void sched_rq_cpu_starting(unsign
 
 int sched_cpu_starting(unsigned int cpu)
 {
+	sched_core_cpu_starting(cpu);
 	sched_rq_cpu_starting(cpu);
 	sched_tick_start(cpu);
 	return 0;
@@ -8290,6 +8424,11 @@ void __init sched_init(void)
 #endif /* CONFIG_SMP */
 		hrtick_rq_init(rq);
 		atomic_set(&rq->nr_iowait, 0);
+
+#ifdef CONFIG_SCHED_CORE
+		rq->core = NULL;
+		rq->core_enabled = 0;
+#endif
 	}
 
 	set_load_weight(&init_task, false);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1075,6 +1075,12 @@ struct rq {
 #endif
 	unsigned int		push_busy;
 	struct cpu_stop_work	push_work;
+
+#ifdef CONFIG_SCHED_CORE
+	/* per rq */
+	struct rq		*core;
+	unsigned int		core_enabled;
+#endif
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1113,6 +1119,35 @@ static inline bool is_migration_disabled
 #endif
 }
 
+#ifdef CONFIG_SCHED_CORE
+
+DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
+
+static inline bool sched_core_enabled(struct rq *rq)
+{
+	return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled;
+}
+
+static inline bool sched_core_disabled(void)
+{
+	return !static_branch_unlikely(&__sched_core_enabled);
+}
+
+static inline raw_spinlock_t *rq_lockp(struct rq *rq)
+{
+	if (sched_core_enabled(rq))
+		return &rq->core->__lock;
+
+	return &rq->__lock;
+}
+
+#else /* !CONFIG_SCHED_CORE */
+
+static inline bool sched_core_enabled(struct rq *rq)
+{
+	return false;
+}
+
 static inline bool sched_core_disabled(void)
 {
 	return true;
@@ -1123,6 +1158,8 @@ static inline raw_spinlock_t *rq_lockp(s
 	return &rq->__lock;
 }
 
+#endif /* CONFIG_SCHED_CORE */
+
 static inline void lockdep_assert_rq_held(struct rq *rq)
 {
 	lockdep_assert_held(rq_lockp(rq));



  parent reply	other threads:[~2021-04-22 12:38 UTC|newest]

Thread overview: 103+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-22 12:04 [PATCH 00/19] sched: Core Scheduling Peter Zijlstra
2021-04-22 12:05 ` [PATCH 01/19] sched/fair: Add a few assertions Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-05-13  8:56     ` Ning, Hongyu
2021-04-22 12:05 ` [PATCH 02/19] sched: Provide raw_spin_rq_*lock*() helpers Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 03/19] sched: Wrap rq::lock access Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 04/19] sched: Prepare for Core-wide rq->lock Peter Zijlstra
2021-04-24  1:22   ` Josh Don
2021-04-26  8:31     ` Peter Zijlstra
2021-04-26 22:21       ` Josh Don
2021-04-27 17:10         ` Don Hiatt
2021-04-27 23:35           ` Josh Don
2021-04-28  1:03             ` Aubrey Li
2021-04-28  6:05               ` Aubrey Li
2021-04-28 10:57                 ` Aubrey Li
2021-04-28 16:41                   ` Don Hiatt
2021-04-29 20:48                     ` Josh Don
2021-04-29 21:09                       ` Don Hiatt
2021-04-29 23:22                         ` Josh Don
2021-04-30 16:18                           ` Don Hiatt
2021-04-30  8:26                         ` Aubrey Li
2021-04-28 16:04             ` Don Hiatt
2021-04-27 23:30         ` Josh Don
2021-04-28  9:13           ` Peter Zijlstra
2021-04-28 10:35             ` Aubrey Li
2021-04-28 11:03               ` Peter Zijlstra
2021-04-28 14:18                 ` Paul E. McKenney
2021-04-29 20:11             ` Josh Don
2021-05-03 19:17               ` Peter Zijlstra
2021-04-28  7:13         ` Peter Zijlstra
2021-04-28  6:02   ` Aubrey Li
2021-04-29  8:03   ` Aubrey Li
2021-04-29 20:39     ` Josh Don
2021-04-30  8:20       ` Aubrey Li
2021-04-30  8:48         ` Josh Don
2021-04-30 14:15           ` Aubrey Li
2021-05-04  7:38       ` Peter Zijlstra
2021-05-05 16:20         ` Don Hiatt
2021-05-06 10:25           ` Peter Zijlstra
2021-05-07  9:50   ` [PATCH v2 " Peter Zijlstra
2021-05-08  8:07     ` Aubrey Li
2021-05-12  9:07       ` Peter Zijlstra
2021-04-22 12:05 ` Peter Zijlstra [this message]
2021-05-07  9:50   ` [PATCH v2 05/19] sched: " Peter Zijlstra
2021-05-12 10:28     ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 06/19] sched: Optimize rq_lockp() usage Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 07/19] sched: Allow sched_core_put() from atomic context Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 08/19] sched: Introduce sched_class::pick_task() Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 09/19] sched: Basic tracking of matching tasks Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 10/19] sched: Add core wide task selection and scheduling Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 11/19] sched/fair: Fix forced idle sibling starvation corner case Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Vineeth Pillai
2021-04-22 12:05 ` [PATCH 12/19] sched: Fix priority inversion of cookied task with sibling Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Joel Fernandes (Google)
2021-04-22 12:05 ` [PATCH 13/19] sched/fair: Snapshot the min_vruntime of CPUs on force idle Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Joel Fernandes (Google)
2021-04-22 12:05 ` [PATCH 14/19] sched: Trivial forced-newidle balancer Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 15/19] sched: Migration changes for core scheduling Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Aubrey Li
2021-04-22 12:05 ` [PATCH 16/19] sched: Trivial core scheduling cookie management Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 17/19] sched: Inherit task cookie on fork() Peter Zijlstra
2021-05-10 16:06   ` Joel Fernandes
2021-05-10 16:22     ` Chris Hyser
2021-05-10 20:47       ` Joel Fernandes
2021-05-10 21:38         ` Chris Hyser
2021-05-12  9:05           ` Peter Zijlstra
2021-05-12 20:20             ` Josh Don
2021-05-12 21:07               ` Don Hiatt
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 18/19] sched: prctl() core-scheduling interface Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Chris Hyser
2021-06-14 23:36   ` [PATCH 18/19] " Josh Don
2021-06-15 11:31     ` Joel Fernandes
2021-08-05 16:53   ` Eugene Syromiatnikov
2021-08-05 17:00     ` Peter Zijlstra
2021-08-17 15:15   ` Eugene Syromiatnikov
2021-08-17 15:52     ` Peter Zijlstra
2021-08-17 23:17       ` Eugene Syromiatnikov
2021-08-19 11:09         ` [PATCH] sched: Fix Core-wide rq->lock for uninitialized CPUs Peter Zijlstra
2021-08-19 15:50           ` Tao Zhou
2021-08-19 16:19           ` Eugene Syromiatnikov
2021-08-20  0:18           ` Josh Don
2021-08-20 10:02             ` Peter Zijlstra
2021-08-23  9:07           ` [tip: sched/urgent] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 19/19] kselftest: Add test for core sched prctl interface Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Chris Hyser
2021-04-22 16:43 ` [PATCH 00/19] sched: Core Scheduling Don Hiatt
2021-04-22 17:29   ` Peter Zijlstra
2021-04-30  6:47 ` Ning, Hongyu
2021-05-06 10:29   ` Peter Zijlstra
2021-05-06 12:53     ` Ning, Hongyu
2021-05-07 18:02 ` Joel Fernandes
2021-05-10 16:16 ` Vincent Guittot
2021-05-11  7:00   ` Vincent Guittot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210422123308.256677625@infradead.org \
    --to=peterz@infradead.org \
    --cc=chris.hyser@oracle.com \
    --cc=joel@joelfernandes.org \
    --cc=joshdon@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.