All of lore.kernel.org
 help / color / mirror / Atom feed
From: "tip-bot2 for Peter Zijlstra" <tip-bot2@linutronix.de>
To: linux-tip-commits@vger.kernel.org
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>, x86 <x86@kernel.org>,
	LKML <linux-kernel@vger.kernel.org>
Subject: [tip: sched/core] sched: Replace rq::wake_list
Date: Mon, 01 Jun 2020 09:52:18 -0000	[thread overview]
Message-ID: <159100513859.17951.5366888281495604529.tip-bot2@tip-bot2> (raw)
In-Reply-To: <20200526161908.129371594@infradead.org>

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     a148866489fbe243c936fe43e4525d8dbfa0318f
Gitweb:        https://git.kernel.org/tip/a148866489fbe243c936fe43e4525d8dbfa0318f
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Tue, 26 May 2020 18:11:04 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 28 May 2020 10:54:16 +02:00

sched: Replace rq::wake_list

The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
got smp_call_function_single_async() subtly wrong. Even though it will
return -EBUSY when trying to re-use a csd, that condition is not
atomic and still requires external serialization.

The change in ttwu_queue_remote() got this wrong.

While on first reading ttwu_queue_remote() has an atomic test-and-set
that appears to serialize the use, the matching 'release' is not in
the right place to actually guarantee this serialization.

The actual race is vs the sched_ttwu_pending() call in the idle loop;
that can run the wakeup-list without consuming the CSD.

Instead of trying to chain the lists, merge them.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20200526161908.129371594@infradead.org
---
 include/linux/sched.h |  1 +-
 include/linux/smp.h   |  1 +-
 kernel/sched/core.c   | 25 ++++++----------------
 kernel/sched/idle.c   |  1 +-
 kernel/sched/sched.h  |  8 +-------
 kernel/smp.c          | 47 +++++++++++++++++++++++++++++++++++-------
 6 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ebc6870..e0f5f41 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -654,6 +654,7 @@ struct task_struct {
 
 #ifdef CONFIG_SMP
 	struct llist_node		wake_entry;
+	unsigned int			wake_entry_type;
 	int				on_cpu;
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/* Current CPU: */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 45ad6e3..84f90e2 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -25,6 +25,7 @@ enum {
 	CSD_TYPE_ASYNC		= 0x00,
 	CSD_TYPE_SYNC		= 0x10,
 	CSD_TYPE_IRQ_WORK	= 0x20,
+	CSD_TYPE_TTWU		= 0x30,
 	CSD_FLAG_TYPE_MASK	= 0xF0,
 };
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b71ed5e..b3c64c6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1538,7 +1538,7 @@ static int migration_cpu_stop(void *data)
 	 * __migrate_task() such that we will not miss enforcing cpus_ptr
 	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
 	 */
-	sched_ttwu_pending();
+	flush_smp_call_function_from_idle();
 
 	raw_spin_lock(&p->pi_lock);
 	rq_lock(rq, &rf);
@@ -2272,14 +2272,13 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
 }
 
 #ifdef CONFIG_SMP
-void sched_ttwu_pending(void)
+void sched_ttwu_pending(void *arg)
 {
+	struct llist_node *llist = arg;
 	struct rq *rq = this_rq();
-	struct llist_node *llist;
 	struct task_struct *p, *t;
 	struct rq_flags rf;
 
-	llist = llist_del_all(&rq->wake_list);
 	if (!llist)
 		return;
 
@@ -2299,11 +2298,6 @@ void sched_ttwu_pending(void)
 	rq_unlock_irqrestore(rq, &rf);
 }
 
-static void wake_csd_func(void *info)
-{
-	sched_ttwu_pending();
-}
-
 void send_call_function_single_ipi(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
@@ -2327,12 +2321,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
 	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
 
 	WRITE_ONCE(rq->ttwu_pending, 1);
-	if (llist_add(&p->wake_entry, &rq->wake_list)) {
-		if (!set_nr_if_polling(rq->idle))
-			smp_call_function_single_async(cpu, &rq->wake_csd);
-		else
-			trace_sched_wake_idle_without_ipi(cpu);
-	}
+	__smp_call_single_queue(cpu, &p->wake_entry);
 }
 
 void wake_up_if_idle(int cpu)
@@ -2772,6 +2761,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->capture_control = NULL;
 #endif
 	init_numa_balancing(clone_flags, p);
+#ifdef CONFIG_SMP
+	p->wake_entry_type = CSD_TYPE_TTWU;
+#endif
 }
 
 DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -6564,7 +6556,6 @@ int sched_cpu_dying(unsigned int cpu)
 	struct rq_flags rf;
 
 	/* Handle pending wakeups and then migrate everything off */
-	sched_ttwu_pending();
 	sched_tick_stop(cpu);
 
 	rq_lock_irqsave(rq, &rf);
@@ -6763,8 +6754,6 @@ void __init sched_init(void)
 		rq->avg_idle = 2*sysctl_sched_migration_cost;
 		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 
-		rq_csd_init(rq, &rq->wake_csd, wake_csd_func);
-
 		INIT_LIST_HEAD(&rq->cfs_tasks);
 
 		rq_attach_root(rq, &def_root_domain);
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 387fd75..05deb81 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -294,7 +294,6 @@ static void do_idle(void)
 	 * critical section.
 	 */
 	flush_smp_call_function_from_idle();
-	sched_ttwu_pending();
 	schedule_idle();
 
 	if (unlikely(klp_patch_pending(current)))
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c86fc94..1d4e94c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1023,11 +1023,6 @@ struct rq {
 	unsigned int		ttwu_local;
 #endif
 
-#ifdef CONFIG_SMP
-	call_single_data_t	wake_csd;
-	struct llist_head	wake_list;
-#endif
-
 #ifdef CONFIG_CPU_IDLE
 	/* Must be inspected within a rcu lock section */
 	struct cpuidle_state	*idle_state;
@@ -1371,8 +1366,6 @@ queue_balance_callback(struct rq *rq,
 	rq->balance_callback = head;
 }
 
-extern void sched_ttwu_pending(void);
-
 #define rcu_dereference_check_sched_domain(p) \
 	rcu_dereference_check((p), \
 			      lockdep_is_held(&sched_domains_mutex))
@@ -1512,7 +1505,6 @@ extern void flush_smp_call_function_from_idle(void);
 
 #else /* !CONFIG_SMP: */
 static inline void flush_smp_call_function_from_idle(void) { }
-static inline void sched_ttwu_pending(void) { }
 #endif
 
 #include "stats.h"
diff --git a/kernel/smp.c b/kernel/smp.c
index 856562b..0d61dc0 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -196,6 +196,7 @@ void generic_smp_call_function_single_interrupt(void)
 	flush_smp_call_function_queue(true);
 }
 
+extern void sched_ttwu_pending(void *);
 extern void irq_work_single(void *);
 
 /**
@@ -244,6 +245,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 					csd->func);
 				break;
 
+			case CSD_TYPE_TTWU:
+				pr_warn("IPI task-wakeup sent to offline CPU\n");
+				break;
+
 			default:
 				pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
 					CSD_TYPE(csd));
@@ -275,22 +280,43 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 		}
 	}
 
+	if (!entry)
+		return;
+
 	/*
 	 * Second; run all !SYNC callbacks.
 	 */
+	prev = NULL;
 	llist_for_each_entry_safe(csd, csd_next, entry, llist) {
 		int type = CSD_TYPE(csd);
 
-		if (type == CSD_TYPE_ASYNC) {
-			smp_call_func_t func = csd->func;
-			void *info = csd->info;
+		if (type != CSD_TYPE_TTWU) {
+			if (prev) {
+				prev->next = &csd_next->llist;
+			} else {
+				entry = &csd_next->llist;
+			}
 
-			csd_unlock(csd);
-			func(info);
-		} else if (type == CSD_TYPE_IRQ_WORK) {
-			irq_work_single(csd);
+			if (type == CSD_TYPE_ASYNC) {
+				smp_call_func_t func = csd->func;
+				void *info = csd->info;
+
+				csd_unlock(csd);
+				func(info);
+			} else if (type == CSD_TYPE_IRQ_WORK) {
+				irq_work_single(csd);
+			}
+
+		} else {
+			prev = &csd->llist;
 		}
 	}
+
+	/*
+	 * Third; only CSD_TYPE_TTWU is left, issue those.
+	 */
+	if (entry)
+		sched_ttwu_pending(entry);
 }
 
 void flush_smp_call_function_from_idle(void)
@@ -659,6 +685,13 @@ void __init smp_init(void)
 	BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
 		     offsetof(struct __call_single_data, flags));
 
+	/*
+	 * Assert the CSD_TYPE_TTWU layout is similar enough
+	 * for task_struct to be on the @call_single_queue.
+	 */
+	BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
+		     offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
+
 	idle_threads_init();
 	cpuhp_threads_init();
 

  parent reply	other threads:[~2020-06-01  9:53 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-26 16:10 [RFC][PATCH 0/7] Fix the scheduler-IPI mess Peter Zijlstra
2020-05-26 16:10 ` [RFC][PATCH 1/7] sched: Fix smp_call_function_single_async() usage for ILB Peter Zijlstra
2020-05-26 23:56   ` Frederic Weisbecker
2020-05-27 10:23   ` Vincent Guittot
2020-05-27 11:28     ` Frederic Weisbecker
2020-05-27 12:07       ` Vincent Guittot
2020-05-29 15:26   ` Valentin Schneider
2020-06-01  9:52   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-06-01 11:40     ` Frederic Weisbecker
2020-05-26 16:10 ` [RFC][PATCH 2/7] smp: Optimize flush_smp_call_function_queue() Peter Zijlstra
2020-05-28 12:28   ` Frederic Weisbecker
2020-06-01  9:52   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-05-26 16:11 ` [RFC][PATCH 3/7] smp: Move irq_work_run() out of flush_smp_call_function_queue() Peter Zijlstra
2020-05-29 13:04   ` Frederic Weisbecker
2020-06-01  9:52   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-05-26 16:11 ` [RFC][PATCH 4/7] smp: Optimize send_call_function_single_ipi() Peter Zijlstra
2020-05-27  7:01   ` kbuild test robot
2020-05-27  9:56   ` Peter Zijlstra
2020-05-27 10:15     ` Peter Zijlstra
2020-05-27 15:56       ` Paul E. McKenney
2020-05-27 16:35         ` Peter Zijlstra
2020-05-27 17:12           ` Peter Zijlstra
2020-05-27 19:39             ` Paul E. McKenney
2020-05-28  1:35               ` Joel Fernandes
2020-05-28  8:59             ` [tip: core/rcu] rcu: Allow for smp_call_function() running callbacks from idle tip-bot2 for Peter Zijlstra
2021-01-21 16:56             ` [RFC][PATCH 4/7] smp: Optimize send_call_function_single_ipi() Peter Zijlstra
2021-01-22  0:20               ` Paul E. McKenney
2021-01-22  8:31                 ` Peter Zijlstra
2021-01-22 15:35                   ` Paul E. McKenney
2020-05-29  1:19   ` kbuild test robot
2020-05-29 11:18     ` Peter Zijlstra
2020-05-30  1:07       ` Philip Li
2020-05-29 13:01   ` Frederic Weisbecker
2020-06-01  9:52   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-05-26 16:11 ` [RFC][PATCH 5/7] irq_work, smp: Allow irq_work on call_single_queue Peter Zijlstra
2020-05-28 23:40   ` Frederic Weisbecker
2020-05-29 13:36     ` Peter Zijlstra
2020-06-05  9:37       ` Peter Zijlstra
2020-06-05 15:02         ` Frederic Weisbecker
2020-06-05 16:17           ` Peter Zijlstra
2020-06-05 15:24         ` Kees Cook
2020-06-10 13:24         ` Frederic Weisbecker
2020-06-01  9:52   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-05-26 16:11 ` [RFC][PATCH 6/7] sched: Add rq::ttwu_pending Peter Zijlstra
2020-06-01  9:52   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2020-05-26 16:11 ` [RFC][PATCH 7/7] sched: Replace rq::wake_list Peter Zijlstra
2020-05-29 15:10   ` Valdis Klētnieks
2020-06-01  9:52   ` tip-bot2 for Peter Zijlstra [this message]
2020-06-02 15:16     ` [tip: sched/core] " Frederic Weisbecker
2020-06-04 14:18   ` [RFC][PATCH 7/7] " Guenter Roeck
2020-06-05  0:24     ` Eric Biggers
2020-06-05  7:41       ` Peter Zijlstra
2020-06-05 16:15         ` Eric Biggers
2020-06-06 23:13           ` Guenter Roeck
2020-06-09 20:21             ` Eric Biggers
2020-06-09 21:25               ` Guenter Roeck
2020-06-09 21:38                 ` Eric Biggers
2020-06-09 22:06                   ` Peter Zijlstra
2020-06-09 23:03                     ` Guenter Roeck
2020-06-10  9:09                       ` Peter Zijlstra
2020-06-18 17:57                 ` Steven Rostedt
2020-06-18 19:06                   ` Guenter Roeck
2020-06-09 22:07               ` Peter Zijlstra
2020-06-05  8:10     ` Peter Zijlstra
2020-06-05 13:33       ` Guenter Roeck
2020-06-05 14:09         ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=159100513859.17951.5366888281495604529.tip-bot2@tip-bot2 \
    --to=tip-bot2@linutronix.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.