All of lore.kernel.org
 help / color / mirror / Atom feed
From: "tip-bot2 for Peter Zijlstra" <tip-bot2@linutronix.de>
To: linux-tip-commits@vger.kernel.org
Cc: Scott Wood <swood@redhat.com>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>,
	Valentin Schneider <valentin.schneider@arm.com>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	x86@kernel.org, linux-kernel@vger.kernel.org
Subject: [tip: sched/core] sched: Fix balance_callback()
Date: Wed, 11 Nov 2020 08:23:23 -0000	[thread overview]
Message-ID: <160508300397.11244.13967684821070428528.tip-bot2@tip-bot2> (raw)
In-Reply-To: <20201023102346.203901269@infradead.org>

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     565790d28b1e33ee2f77bad5348b99f6dfc366fd
Gitweb:        https://git.kernel.org/tip/565790d28b1e33ee2f77bad5348b99f6dfc366fd
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Mon, 11 May 2020 14:13:00 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 10 Nov 2020 18:38:57 +01:00

sched: Fix balance_callback()

The intent of balance_callback() has always been to delay executing
balancing operations until the end of the current rq->lock section.
This is because balance operations must often drop rq->lock, and that
isn't safe in general.

However, as noted by Scott, there were a few holes in that scheme;
balance_callback() was called after rq->lock was dropped, which means
another CPU can interleave and touch the callback list.

Rework code to call the balance callbacks before dropping rq->lock
where possible, and otherwise splice the balance list onto a local
stack.

This guarantees that the balance list must be empty when we take
rq->lock. IOW, we'll only ever run our own balance callbacks.

Reported-by: Scott Wood <swood@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Link: https://lkml.kernel.org/r/20201023102346.203901269@infradead.org
---
 kernel/sched/core.c  | 119 ++++++++++++++++++++++++++----------------
 kernel/sched/sched.h |   3 +-
 2 files changed, 78 insertions(+), 44 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5e24104..0196a3f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3485,6 +3485,69 @@ static inline void finish_task(struct task_struct *prev)
 #endif
 }
 
+#ifdef CONFIG_SMP
+
+static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
+{
+	void (*func)(struct rq *rq);
+	struct callback_head *next;
+
+	lockdep_assert_held(&rq->lock);
+
+	while (head) {
+		func = (void (*)(struct rq *))head->func;
+		next = head->next;
+		head->next = NULL;
+		head = next;
+
+		func(rq);
+	}
+}
+
+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+{
+	struct callback_head *head = rq->balance_callback;
+
+	lockdep_assert_held(&rq->lock);
+	if (head)
+		rq->balance_callback = NULL;
+
+	return head;
+}
+
+static void __balance_callbacks(struct rq *rq)
+{
+	do_balance_callbacks(rq, splice_balance_callbacks(rq));
+}
+
+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
+{
+	unsigned long flags;
+
+	if (unlikely(head)) {
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		do_balance_callbacks(rq, head);
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
+}
+
+#else
+
+static inline void __balance_callbacks(struct rq *rq)
+{
+}
+
+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+{
+	return NULL;
+}
+
+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
+{
+}
+
+#endif
+
 static inline void
 prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
 {
@@ -3510,6 +3573,7 @@ static inline void finish_lock_switch(struct rq *rq)
 	 * prev into current:
 	 */
 	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+	__balance_callbacks(rq);
 	raw_spin_unlock_irq(&rq->lock);
 }
 
@@ -3651,43 +3715,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	return rq;
 }
 
-#ifdef CONFIG_SMP
-
-/* rq->lock is NOT held, but preemption is disabled */
-static void __balance_callback(struct rq *rq)
-{
-	struct callback_head *head, *next;
-	void (*func)(struct rq *rq);
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&rq->lock, flags);
-	head = rq->balance_callback;
-	rq->balance_callback = NULL;
-	while (head) {
-		func = (void (*)(struct rq *))head->func;
-		next = head->next;
-		head->next = NULL;
-		head = next;
-
-		func(rq);
-	}
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-
-static inline void balance_callback(struct rq *rq)
-{
-	if (unlikely(rq->balance_callback))
-		__balance_callback(rq);
-}
-
-#else
-
-static inline void balance_callback(struct rq *rq)
-{
-}
-
-#endif
-
 /**
  * schedule_tail - first thing a freshly forked thread must call.
  * @prev: the thread we just switched away from.
@@ -3707,7 +3734,6 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 	 */
 
 	rq = finish_task_switch(prev);
-	balance_callback(rq);
 	preempt_enable();
 
 	if (current->set_child_tid)
@@ -4523,10 +4549,11 @@ static void __sched notrace __schedule(bool preempt)
 		rq = context_switch(rq, prev, next, &rf);
 	} else {
 		rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
-		rq_unlock_irq(rq, &rf);
-	}
 
-	balance_callback(rq);
+		rq_unpin_lock(rq, &rf);
+		__balance_callbacks(rq);
+		raw_spin_unlock_irq(&rq->lock);
+	}
 }
 
 void __noreturn do_task_dead(void)
@@ -4937,9 +4964,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 out_unlock:
 	/* Avoid rq from going away on us: */
 	preempt_disable();
-	__task_rq_unlock(rq, &rf);
 
-	balance_callback(rq);
+	rq_unpin_lock(rq, &rf);
+	__balance_callbacks(rq);
+	raw_spin_unlock(&rq->lock);
+
 	preempt_enable();
 }
 #else
@@ -5213,6 +5242,7 @@ static int __sched_setscheduler(struct task_struct *p,
 	int retval, oldprio, oldpolicy = -1, queued, running;
 	int new_effective_prio, policy = attr->sched_policy;
 	const struct sched_class *prev_class;
+	struct callback_head *head;
 	struct rq_flags rf;
 	int reset_on_fork;
 	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
@@ -5451,6 +5481,7 @@ change:
 
 	/* Avoid rq from going away on us: */
 	preempt_disable();
+	head = splice_balance_callbacks(rq);
 	task_rq_unlock(rq, p, &rf);
 
 	if (pi) {
@@ -5459,7 +5490,7 @@ change:
 	}
 
 	/* Run balance callbacks after we've adjusted the PI chain: */
-	balance_callback(rq);
+	balance_callbacks(rq, head);
 	preempt_enable();
 
 	return 0;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index df80bfc..738a00b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1221,6 +1221,9 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
 	rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
 	rf->clock_update_flags = 0;
 #endif
+#ifdef CONFIG_SMP
+	SCHED_WARN_ON(rq->balance_callback);
+#endif
 }
 
 static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)

  reply	other threads:[~2020-11-11  8:23 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-23 10:11 [PATCH v4 00/19] sched: Migrate disable support Peter Zijlstra
2020-10-23 10:11 ` [PATCH v4 01/19] stop_machine: Add function and caller debug info Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 02/19] sched: Fix balance_callback() Peter Zijlstra
2020-11-11  8:23   ` tip-bot2 for Peter Zijlstra [this message]
2020-11-11 20:30     ` [tip: sched/core] " Paul Bolle
2020-11-11 20:45       ` Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 03/19] sched/hotplug: Ensure only per-cpu kthreads run during hotplug Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 04/19] sched/core: Wait for tasks being pushed away on hotplug Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Thomas Gleixner
2020-10-23 10:12 ` [PATCH v4 05/19] workqueue: Manually break affinity " Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 06/19] sched/hotplug: Consolidate task migration on CPU unplug Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Thomas Gleixner
2020-10-23 10:12 ` [PATCH v4 07/19] sched: Fix hotplug vs CPU bandwidth control Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 08/19] sched: Massage set_cpus_allowed() Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 09/19] sched: Add migrate_disable() Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 10/19] sched: Fix migrate_disable() vs set_cpus_allowed_ptr() Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-11-12 16:38   ` [PATCH v4 10/19] " Qian Cai
2020-11-12 17:26     ` Valentin Schneider
2020-11-12 18:01       ` Qian Cai
2020-11-12 19:31         ` Valentin Schneider
2020-11-12 19:41           ` Qian Cai
2020-11-12 20:37           ` Qian Cai
2020-11-12 21:26             ` Valentin Schneider
2020-11-13 10:27           ` Peter Zijlstra
2020-11-12 18:35       ` Qian Cai
2020-11-20 12:34     ` [tip: sched/core] sched/core: Add missing completion for affine_move_task() waiters tip-bot2 for Valentin Schneider
2020-10-23 10:12 ` [PATCH v4 11/19] sched/core: Make migrate disable and CPU hotplug cooperative Peter Zijlstra
2020-10-29 16:27   ` Valentin Schneider
2020-10-29 17:34     ` Peter Zijlstra
2020-10-29 17:55       ` Valentin Schneider
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Thomas Gleixner
2020-11-13 15:06   ` [PATCH v4 11/19] " Qian Cai
2020-11-17 19:28     ` Valentin Schneider
2020-11-18 14:44       ` Qian Cai
2020-11-23 18:13         ` Sebastian Andrzej Siewior
2020-12-02 21:59           ` Qian Cai
2020-12-03 12:31           ` Qian Cai
2020-12-04  0:23       ` Qian Cai
2020-12-04 21:19       ` Qian Cai
2020-12-05 18:37         ` Valentin Schneider
2020-12-06  1:17           ` Qian Cai
2020-12-07 19:27         ` Valentin Schneider
2020-12-08 13:46           ` Qian Cai
2020-12-09 19:16             ` Valentin Schneider
2020-10-23 10:12 ` [PATCH v4 12/19] sched,rt: Use cpumask_any*_distribute() Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 13/19] sched,rt: Use the full cpumask for balancing Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 14/19] sched, lockdep: Annotate ->pi_lock recursion Peter Zijlstra
2020-10-29 16:27   ` Valentin Schneider
2020-10-29 17:38     ` Peter Zijlstra
2020-10-29 18:09       ` Valentin Schneider
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 15/19] sched: Fix migrate_disable() vs rt/dl balancing Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-12-26 13:54   ` [PATCH v4 15/19] " Qais Yousef
2021-03-05 14:56     ` Peter Zijlstra
2021-03-05 15:41       ` Valentin Schneider
2021-03-05 17:11         ` Qais Yousef
2021-03-10 14:44         ` Qais Yousef
2021-03-05 16:48       ` Qais Yousef
2020-10-23 10:12 ` [PATCH v4 16/19] sched/proc: Print accurate cpumask vs migrate_disable() Peter Zijlstra
2020-11-11  8:23   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 17/19] sched: Add migrate_disable() tracepoints Peter Zijlstra
2020-10-29 16:27   ` Valentin Schneider
2020-10-29 17:43     ` Peter Zijlstra
2020-10-29 17:56       ` Valentin Schneider
2020-10-29 17:59         ` Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 18/19] sched: Deny self-issued __set_cpus_allowed_ptr() when migrate_disable() Peter Zijlstra
2020-10-23 10:12 ` [PATCH v4 19/19] sched: Comment affine_move_task() Peter Zijlstra
2020-10-29 16:27   ` Valentin Schneider
2020-10-29 17:44     ` Peter Zijlstra
2020-10-29 19:03 ` [PATCH v4 00/19] sched: Migrate disable support Valentin Schneider
2020-11-09 16:39 ` Daniel Bristot de Oliveira

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=160508300397.11244.13967684821070428528.tip-bot2@tip-bot2 \
    --to=tip-bot2@linutronix.de \
    --cc=bristot@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=swood@redhat.com \
    --cc=valentin.schneider@arm.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.