LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: joel@joelfernandes.org, chris.hyser@oracle.com,
	joshdon@google.com, mingo@kernel.org, vincent.guittot@linaro.org,
	valentin.schneider@arm.com, mgorman@suse.de
Cc: linux-kernel@vger.kernel.org, tglx@linutronix.de
Subject: [PATCH v2 04/19] sched: Prepare for Core-wide rq->lock
Date: Fri, 7 May 2021 11:50:27 +0200
Message-ID: <YJUNY0dmrJMD/BIm@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20210422123308.196692074@infradead.org>


When switching on core-sched, CPUs need to agree which lock to use for
their RQ.

The new rule will be that rq->core_enabled will be toggled while
holding all rq->__locks that belong to a core. This means we need to
double check the rq->core_enabled value after each lock acquire and
retry if it changed.

This also has implications for those sites that take multiple RQ
locks, they need to be careful that the second lock doesn't end up
being the first lock.

Verify the lock pointer after acquiring the first lock, because if
they're on the same core, holding any of the rq->__lock instances will
pin the core state.

While there, change the rq->__lock order to CPU number, instead of rq
address, this greatly simplifies the next patch.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
---
 kernel/sched/core.c  |   48 ++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/sched.h |   48 +++++++++++++++++-------------------------------
 2 files changed, 63 insertions(+), 33 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -186,12 +186,37 @@ int sysctl_sched_rt_runtime = 950000;
 
 void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
 {
-	raw_spin_lock_nested(rq_lockp(rq), subclass);
+	raw_spinlock_t *lock;
+
+	if (sched_core_disabled()) {
+		raw_spin_lock_nested(&rq->__lock, subclass);
+		return;
+	}
+
+	for (;;) {
+		lock = rq_lockp(rq);
+		raw_spin_lock_nested(lock, subclass);
+		if (likely(lock == rq_lockp(rq)))
+			return;
+		raw_spin_unlock(lock);
+	}
 }
 
 bool raw_spin_rq_trylock(struct rq *rq)
 {
-	return raw_spin_trylock(rq_lockp(rq));
+	raw_spinlock_t *lock;
+	bool ret;
+
+	if (sched_core_disabled())
+		return raw_spin_trylock(&rq->__lock);
+
+	for (;;) {
+		lock = rq_lockp(rq);
+		ret = raw_spin_trylock(lock);
+		if (!ret || (likely(lock == rq_lockp(rq))))
+			return ret;
+		raw_spin_unlock(lock);
+	}
 }
 
 void raw_spin_rq_unlock(struct rq *rq)
@@ -199,6 +224,25 @@ void raw_spin_rq_unlock(struct rq *rq)
 	raw_spin_unlock(rq_lockp(rq));
 }
 
+#ifdef CONFIG_SMP
+/*
+ * double_rq_lock - safely lock two runqueues
+ */
+void double_rq_lock(struct rq *rq1, struct rq *rq2)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (rq_order_less(rq2, rq1))
+		swap(rq1, rq2);
+
+	raw_spin_rq_lock(rq1);
+	if (rq_lockp(rq1) == rq_lockp(rq2))
+		return;
+
+	raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
+}
+#endif
+
 /*
  * __task_rq_lock - lock the rq @p resides on.
  */
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1113,6 +1113,11 @@ static inline bool is_migration_disabled
 #endif
 }
 
+static inline bool sched_core_disabled(void)
+{
+	return true;
+}
+
 static inline raw_spinlock_t *rq_lockp(struct rq *rq)
 {
 	return &rq->__lock;
@@ -2231,10 +2236,17 @@ unsigned long arch_scale_freq_capacity(i
 }
 #endif
 
+
 #ifdef CONFIG_SMP
-#ifdef CONFIG_PREEMPTION
 
-static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
+static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
+{
+	return rq1->cpu < rq2->cpu;
+}
+
+extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
+#ifdef CONFIG_PREEMPTION
 
 /*
  * fair double_lock_balance: Safely acquires both rq->locks in a fair
@@ -2274,14 +2286,13 @@ static inline int _double_lock_balance(s
 	if (likely(raw_spin_rq_trylock(busiest)))
 		return 0;
 
-	if (rq_lockp(busiest) >= rq_lockp(this_rq)) {
+	if (rq_order_less(this_rq, busiest)) {
 		raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
 		return 0;
 	}
 
 	raw_spin_rq_unlock(this_rq);
-	raw_spin_rq_lock(busiest);
-	raw_spin_rq_lock_nested(this_rq, SINGLE_DEPTH_NESTING);
+	double_rq_lock(this_rq, busiest);
 
 	return 1;
 }
@@ -2334,31 +2345,6 @@ static inline void double_raw_lock(raw_s
 }
 
 /*
- * double_rq_lock - safely lock two runqueues
- *
- * Note this does not disable interrupts like task_rq_lock,
- * you need to do so manually before calling.
- */
-static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
-	__acquires(rq1->lock)
-	__acquires(rq2->lock)
-{
-	BUG_ON(!irqs_disabled());
-	if (rq_lockp(rq1) == rq_lockp(rq2)) {
-		raw_spin_rq_lock(rq1);
-		__acquire(rq2->lock);	/* Fake it out ;) */
-	} else {
-		if (rq_lockp(rq1) < rq_lockp(rq2)) {
-			raw_spin_rq_lock(rq1);
-			raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
-		} else {
-			raw_spin_rq_lock(rq2);
-			raw_spin_rq_lock_nested(rq1, SINGLE_DEPTH_NESTING);
-		}
-	}
-}
-
-/*
  * double_rq_unlock - safely unlock two runqueues
  *
  * Note this does not restore interrupts like task_rq_unlock,
@@ -2368,11 +2354,11 @@ static inline void double_rq_unlock(stru
 	__releases(rq1->lock)
 	__releases(rq2->lock)
 {
-	raw_spin_rq_unlock(rq1);
 	if (rq_lockp(rq1) != rq_lockp(rq2))
 		raw_spin_rq_unlock(rq2);
 	else
 		__release(rq2->lock);
+	raw_spin_rq_unlock(rq1);
 }
 
 extern void set_rq_online (struct rq *rq);

  parent reply index

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-22 12:04 [PATCH 00/19] sched: Core Scheduling Peter Zijlstra
2021-04-22 12:05 ` [PATCH 01/19] sched/fair: Add a few assertions Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-05-13  8:56     ` Ning, Hongyu
2021-04-22 12:05 ` [PATCH 02/19] sched: Provide raw_spin_rq_*lock*() helpers Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 03/19] sched: Wrap rq::lock access Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 04/19] sched: Prepare for Core-wide rq->lock Peter Zijlstra
2021-04-24  1:22   ` Josh Don
2021-04-26  8:31     ` Peter Zijlstra
2021-04-26 22:21       ` Josh Don
2021-04-27 17:10         ` Don Hiatt
2021-04-27 23:35           ` Josh Don
2021-04-28  1:03             ` Aubrey Li
2021-04-28  6:05               ` Aubrey Li
2021-04-28 10:57                 ` Aubrey Li
2021-04-28 16:41                   ` Don Hiatt
2021-04-29 20:48                     ` Josh Don
2021-04-29 21:09                       ` Don Hiatt
2021-04-29 23:22                         ` Josh Don
2021-04-30 16:18                           ` Don Hiatt
2021-04-30  8:26                         ` Aubrey Li
2021-04-28 16:04             ` Don Hiatt
2021-04-27 23:30         ` Josh Don
2021-04-28  9:13           ` Peter Zijlstra
2021-04-28 10:35             ` Aubrey Li
2021-04-28 11:03               ` Peter Zijlstra
2021-04-28 14:18                 ` Paul E. McKenney
2021-04-29 20:11             ` Josh Don
2021-05-03 19:17               ` Peter Zijlstra
2021-04-28  7:13         ` Peter Zijlstra
2021-04-28  6:02   ` Aubrey Li
2021-04-29  8:03   ` Aubrey Li
2021-04-29 20:39     ` Josh Don
2021-04-30  8:20       ` Aubrey Li
2021-04-30  8:48         ` Josh Don
2021-04-30 14:15           ` Aubrey Li
2021-05-04  7:38       ` Peter Zijlstra
2021-05-05 16:20         ` Don Hiatt
2021-05-06 10:25           ` Peter Zijlstra
2021-05-07  9:50   ` Peter Zijlstra [this message]
2021-05-08  8:07     ` [PATCH v2 " Aubrey Li
2021-05-12  9:07       ` Peter Zijlstra
2021-04-22 12:05 ` [PATCH 05/19] sched: " Peter Zijlstra
2021-05-07  9:50   ` [PATCH v2 " Peter Zijlstra
2021-05-12 10:28     ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 06/19] sched: Optimize rq_lockp() usage Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 07/19] sched: Allow sched_core_put() from atomic context Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 08/19] sched: Introduce sched_class::pick_task() Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 09/19] sched: Basic tracking of matching tasks Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 10/19] sched: Add core wide task selection and scheduling Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 11/19] sched/fair: Fix forced idle sibling starvation corner case Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Vineeth Pillai
2021-04-22 12:05 ` [PATCH 12/19] sched: Fix priority inversion of cookied task with sibling Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Joel Fernandes (Google)
2021-04-22 12:05 ` [PATCH 13/19] sched/fair: Snapshot the min_vruntime of CPUs on force idle Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Joel Fernandes (Google)
2021-04-22 12:05 ` [PATCH 14/19] sched: Trivial forced-newidle balancer Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 15/19] sched: Migration changes for core scheduling Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Aubrey Li
2021-04-22 12:05 ` [PATCH 16/19] sched: Trivial core scheduling cookie management Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 17/19] sched: Inherit task cookie on fork() Peter Zijlstra
2021-05-10 16:06   ` Joel Fernandes
2021-05-10 16:22     ` Chris Hyser
2021-05-10 20:47       ` Joel Fernandes
2021-05-10 21:38         ` Chris Hyser
2021-05-12  9:05           ` Peter Zijlstra
2021-05-12 20:20             ` Josh Don
2021-05-12 21:07               ` Don Hiatt
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2021-04-22 12:05 ` [PATCH 18/19] sched: prctl() core-scheduling interface Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Chris Hyser
2021-06-14 23:36   ` [PATCH 18/19] " Josh Don
2021-06-15 11:31     ` Joel Fernandes
2021-04-22 12:05 ` [PATCH 19/19] kselftest: Add test for core sched prctl interface Peter Zijlstra
2021-05-12 10:28   ` [tip: sched/core] " tip-bot2 for Chris Hyser
2021-04-22 16:43 ` [PATCH 00/19] sched: Core Scheduling Don Hiatt
2021-04-22 17:29   ` Peter Zijlstra
2021-04-30  6:47 ` Ning, Hongyu
2021-05-06 10:29   ` Peter Zijlstra
2021-05-06 12:53     ` Ning, Hongyu
2021-05-07 18:02 ` Joel Fernandes
2021-05-10 16:16 ` Vincent Guittot
2021-05-11  7:00   ` Vincent Guittot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YJUNY0dmrJMD/BIm@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=chris.hyser@oracle.com \
    --cc=joel@joelfernandes.org \
    --cc=joshdon@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git
	git clone --mirror https://lore.kernel.org/lkml/10 lkml/git/10.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git