linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 00/10] sched: Migrate disable support for RT
@ 2020-09-17  9:42 Thomas Gleixner
  2020-09-17  9:42 ` [patch 01/10] sched: Fix balance_callback() Thomas Gleixner
                   ` (9 more replies)
  0 siblings, 10 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Scott Wood,
	Peter Zijlstra (Intel),
	Valentin Schneider, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira, Vincent Donnefort

On RT enabled kernels most of the code including spin/rw lock held sections
are preemptible, which also makes the tasks migrateable. That violates the
per CPU constraints. RT needs therefore a mechanism to control migration
independent of preemption.

This series is providing a mostly rewritten version of the RT code based on
the newest scheduler bits and includes and depends on Peter's 'let tasks
kick themself off the outgoing CPU' feature.

While migrate disable/enable are trivial in principle the tricky part is
the coordination and serialization against concurrent affinity changes,
e.g. sched_setaffinity(2), and CPU hotplug.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 01/10] sched: Fix balance_callback()
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  2020-09-17  9:42 ` [patch 02/10] sched/hotplug: Ensure only per-cpu kthreads run during hotplug Thomas Gleixner
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Scott Wood,
	Peter Zijlstra (Intel),
	Valentin Schneider, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira, Vincent Donnefort

From: Peter Zijlstra <peterz@infradead.org>

The intent of balance_callback() has always been to delay executing
balancing operations until the end of the current rq->lock section.  This
is because balance operations must often drop rq->lock, and that isn't safe
in general.

However, as noted by Scott, there were a few holes in that scheme;
balance_callback() was called after rq->lock was dropped, which means
another CPU can interleave and touch the callback list.

Rework code to call the balance callbacks before dropping rq->lock where
possible, and otherwise splice the balance list onto a local stack.

This guarantees that the balance list must be empty when rq->lock is
acquired. IOW, this will only ever run balance callbacks which are queued
while holding rq->lock.

Reported-by: Scott Wood <swood@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Link: https://lore.kernel.org/r/20200911082536.470013100@infradead.org

---
 kernel/sched/core.c  |  119 ++++++++++++++++++++++++++++++++-------------------
 kernel/sched/sched.h |    2 
 2 files changed, 77 insertions(+), 44 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3489,6 +3489,69 @@ static inline void finish_task(struct ta
 #endif
 }
 
+#ifdef CONFIG_SMP
+
+static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
+{
+	void (*func)(struct rq *rq);
+	struct callback_head *next;
+
+	lockdep_assert_held(&rq->lock);
+
+	while (head) {
+		func = (void (*)(struct rq *))head->func;
+		next = head->next;
+		head->next = NULL;
+		head = next;
+
+		func(rq);
+	}
+}
+
+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+{
+	struct callback_head *head = rq->balance_callback;
+
+	lockdep_assert_held(&rq->lock);
+	if (head)
+		rq->balance_callback = NULL;
+
+	return head;
+}
+
+static void __balance_callbacks(struct rq *rq)
+{
+	do_balance_callbacks(rq, splice_balance_callbacks(rq));
+}
+
+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
+{
+	unsigned long flags;
+
+	if (unlikely(head)) {
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		do_balance_callbacks(rq, head);
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
+}
+
+#else
+
+static inline void __balance_callbacks(struct rq *rq)
+{
+}
+
+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+{
+	return NULL;
+}
+
+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
+{
+}
+
+#endif
+
 static inline void
 prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
 {
@@ -3514,6 +3577,7 @@ static inline void finish_lock_switch(st
 	 * prev into current:
 	 */
 	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+	__balance_callbacks(rq);
 	raw_spin_unlock_irq(&rq->lock);
 }
 
@@ -3655,43 +3719,6 @@ static struct rq *finish_task_switch(str
 	return rq;
 }
 
-#ifdef CONFIG_SMP
-
-/* rq->lock is NOT held, but preemption is disabled */
-static void __balance_callback(struct rq *rq)
-{
-	struct callback_head *head, *next;
-	void (*func)(struct rq *rq);
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&rq->lock, flags);
-	head = rq->balance_callback;
-	rq->balance_callback = NULL;
-	while (head) {
-		func = (void (*)(struct rq *))head->func;
-		next = head->next;
-		head->next = NULL;
-		head = next;
-
-		func(rq);
-	}
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-
-static inline void balance_callback(struct rq *rq)
-{
-	if (unlikely(rq->balance_callback))
-		__balance_callback(rq);
-}
-
-#else
-
-static inline void balance_callback(struct rq *rq)
-{
-}
-
-#endif
-
 /**
  * schedule_tail - first thing a freshly forked thread must call.
  * @prev: the thread we just switched away from.
@@ -3711,7 +3738,6 @@ asmlinkage __visible void schedule_tail(
 	 */
 
 	rq = finish_task_switch(prev);
-	balance_callback(rq);
 	preempt_enable();
 
 	if (current->set_child_tid)
@@ -4527,10 +4553,11 @@ static void __sched notrace __schedule(b
 		rq = context_switch(rq, prev, next, &rf);
 	} else {
 		rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
-		rq_unlock_irq(rq, &rf);
-	}
 
-	balance_callback(rq);
+		rq_unpin_lock(rq, &rf);
+		__balance_callbacks(rq);
+		raw_spin_unlock_irq(&rq->lock);
+	}
 }
 
 void __noreturn do_task_dead(void)
@@ -4938,9 +4965,11 @@ void rt_mutex_setprio(struct task_struct
 out_unlock:
 	/* Avoid rq from going away on us: */
 	preempt_disable();
-	__task_rq_unlock(rq, &rf);
 
-	balance_callback(rq);
+	rq_unpin_lock(rq, &rf);
+	__balance_callbacks(rq);
+	raw_spin_unlock(&rq->lock);
+
 	preempt_enable();
 }
 #else
@@ -5214,6 +5243,7 @@ static int __sched_setscheduler(struct t
 	int retval, oldprio, oldpolicy = -1, queued, running;
 	int new_effective_prio, policy = attr->sched_policy;
 	const struct sched_class *prev_class;
+	struct callback_head *head;
 	struct rq_flags rf;
 	int reset_on_fork;
 	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
@@ -5452,6 +5482,7 @@ static int __sched_setscheduler(struct t
 
 	/* Avoid rq from going away on us: */
 	preempt_disable();
+	head = splice_balance_callbacks(rq);
 	task_rq_unlock(rq, p, &rf);
 
 	if (pi) {
@@ -5460,7 +5491,7 @@ static int __sched_setscheduler(struct t
 	}
 
 	/* Run balance callbacks after we've adjusted the PI chain: */
-	balance_callback(rq);
+	balance_callbacks(rq, head);
 	preempt_enable();
 
 	return 0;
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1220,6 +1220,8 @@ static inline void rq_pin_lock(struct rq
 #ifdef CONFIG_SCHED_DEBUG
 	rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
 	rf->clock_update_flags = 0;
+
+	SCHED_WARN_ON(rq->balance_callback);
 #endif
 }
 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 02/10] sched/hotplug: Ensure only per-cpu kthreads run during hotplug
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
  2020-09-17  9:42 ` [patch 01/10] sched: Fix balance_callback() Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  2020-09-17  9:42 ` [patch 03/10] sched/core: Wait for tasks being pushed away on hotplug Thomas Gleixner
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Peter Zijlstra (Intel),
	Scott Wood, Valentin Schneider, Ingo Molnar, Peter Zijlstra,
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira,
	Vincent Donnefort

In preparation for migrate_disable(), make sure only per-cpu kthreads
are allowed to run on !active CPUs.

This is ran (as one of the very first steps) from the cpu-hotplug
task which is a per-cpu kthread and completion of the hotplug
operation only requires such tasks.

This constraint enables the migrate_disable() implementation to wait
for completion of all migrate_disable regions on this CPU at hotplug
time without fear of any new ones starting.

This replaces the unlikely(rq->balance_callbacks) test at the tail of
context_switch with an unlikely(rq->balance_work), the fast path is
not affected.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20200916121020.GP2674@hirez.programming.kicks-ass.net

---
 kernel/sched/core.c  |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/sched.h |    5 ++
 2 files changed, 115 insertions(+), 2 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3513,8 +3513,10 @@ static inline struct callback_head *spli
 	struct callback_head *head = rq->balance_callback;
 
 	lockdep_assert_held(&rq->lock);
-	if (head)
+	if (head) {
 		rq->balance_callback = NULL;
+		rq->balance_flags &= ~BALANCE_WORK;
+	}
 
 	return head;
 }
@@ -3535,6 +3537,22 @@ static inline void balance_callbacks(str
 	}
 }
 
+static bool balance_push(struct rq *rq);
+
+static inline void balance_switch(struct rq *rq)
+{
+	if (unlikely(rq->balance_flags)) {
+		/*
+		 * Run the balance_callbacks, except on hotplug
+		 * when we need to push the current task away.
+		 */
+		if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
+		    !(rq->balance_flags & BALANCE_PUSH) ||
+		    !balance_push(rq))
+			__balance_callbacks(rq);
+	}
+}
+
 #else
 
 static inline void __balance_callbacks(struct rq *rq)
@@ -3550,6 +3568,8 @@ static inline void balance_callbacks(str
 {
 }
 
+static inline void balance_switch(struct rq *rq) { }
+
 #endif
 
 static inline void
@@ -3577,7 +3597,7 @@ static inline void finish_lock_switch(st
 	 * prev into current:
 	 */
 	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-	__balance_callbacks(rq);
+	balance_switch(rq);
 	raw_spin_unlock_irq(&rq->lock);
 }
 
@@ -6833,6 +6853,89 @@ static void migrate_tasks(struct rq *dea
 
 	rq->stop = stop;
 }
+
+static int __balance_push_cpu_stop(void *arg)
+{
+	struct task_struct *p = arg;
+	struct rq *rq = this_rq();
+	struct rq_flags rf;
+	int cpu;
+
+	raw_spin_lock_irq(&p->pi_lock);
+	rq_lock(rq, &rf);
+
+	update_rq_clock(rq);
+
+	if (task_rq(p) == rq && task_on_rq_queued(p)) {
+		cpu = select_fallback_rq(rq->cpu, p);
+		rq = __migrate_task(rq, &rf, p, cpu);
+	}
+
+	rq_unlock(rq, &rf);
+	raw_spin_unlock_irq(&p->pi_lock);
+
+	put_task_struct(p);
+
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
+
+/*
+ * Ensure we only run per-cpu kthreads once the CPU goes !active.
+ */
+static bool balance_push(struct rq *rq)
+{
+	struct task_struct *push_task = rq->curr;
+
+	lockdep_assert_held(&rq->lock);
+	SCHED_WARN_ON(rq->cpu != smp_processor_id());
+
+	/*
+	 * Both the cpu-hotplug and stop task are in this case and are
+	 * required to complete the hotplug process.
+	 */
+	if (is_per_cpu_kthread(push_task))
+		return false;
+
+	get_task_struct(push_task);
+	/*
+	 * Temporarily drop rq->lock such that we can wake-up the stop task.
+	 * Both preemption and IRQs are still disabled.
+	 */
+	raw_spin_unlock(&rq->lock);
+	stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
+			    this_cpu_ptr(&push_work));
+	/*
+	 * At this point need_resched() is true and we'll take the loop in
+	 * schedule(). The next pick is obviously going to be the stop task
+	 * which is_per_cpu_kthread() and will push this task away.
+	 */
+	raw_spin_lock(&rq->lock);
+
+	return true;
+}
+
+static void balance_push_set(int cpu, bool on)
+{
+	struct rq *rq = cpu_rq(cpu);
+	struct rq_flags rf;
+
+	rq_lock_irqsave(rq, &rf);
+	if (on)
+		rq->balance_flags |= BALANCE_PUSH;
+	else
+		rq->balance_flags &= ~BALANCE_PUSH;
+	rq_unlock_irqrestore(rq, &rf);
+}
+
+#else
+
+static inline bool balance_push(struct rq *rq)
+{
+	return false;
+}
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 void set_rq_online(struct rq *rq)
@@ -6918,6 +7021,8 @@ int sched_cpu_activate(unsigned int cpu)
 	struct rq *rq = cpu_rq(cpu);
 	struct rq_flags rf;
 
+	balance_push_set(cpu, false);
+
 #ifdef CONFIG_SCHED_SMT
 	/*
 	 * When going up, increment the number of cores with SMT present.
@@ -6965,6 +7070,8 @@ int sched_cpu_deactivate(unsigned int cp
 	 */
 	synchronize_rcu();
 
+	balance_push_set(cpu, true);
+
 #ifdef CONFIG_SCHED_SMT
 	/*
 	 * When going down, decrement the number of cores with SMT present.
@@ -6978,6 +7085,7 @@ int sched_cpu_deactivate(unsigned int cp
 
 	ret = cpuset_cpu_inactive(cpu);
 	if (ret) {
+		balance_push_set(cpu, false);
 		set_cpu_active(cpu, true);
 		return ret;
 	}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -973,6 +973,7 @@ struct rq {
 	unsigned long		cpu_capacity_orig;
 
 	struct callback_head	*balance_callback;
+	unsigned char		balance_flags;
 
 	unsigned char		nohz_idle_balance;
 	unsigned char		idle_balance;
@@ -1384,6 +1385,9 @@ init_numa_balancing(unsigned long clone_
 
 #ifdef CONFIG_SMP
 
+#define BALANCE_WORK	0x01
+#define BALANCE_PUSH	0x02
+
 static inline void
 queue_balance_callback(struct rq *rq,
 		       struct callback_head *head,
@@ -1397,6 +1401,7 @@ queue_balance_callback(struct rq *rq,
 	head->func = (void (*)(struct callback_head *))func;
 	head->next = rq->balance_callback;
 	rq->balance_callback = head;
+	rq->balance_flags |= BALANCE_WORK;
 }
 
 #define rcu_dereference_check_sched_domain(p) \


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 03/10] sched/core: Wait for tasks being pushed away on hotplug
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
  2020-09-17  9:42 ` [patch 01/10] sched: Fix balance_callback() Thomas Gleixner
  2020-09-17  9:42 ` [patch 02/10] sched/hotplug: Ensure only per-cpu kthreads run during hotplug Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  2020-09-17  9:42 ` [patch 04/10] sched/hotplug: Consolidate task migration on CPU unplug Thomas Gleixner
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Ingo Molnar, Peter Zijlstra,
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira,
	Valentin Schneider, Scott Wood, Peter Zijlstra (Intel)

RT kernels need to ensure that all tasks which are not per CPU kthreads
have left the outgoing CPU to guarantee that no tasks are force migrated
within a migrate disabled section.

There is also some desire to (ab)use fine grained CPU hotplug control to
clear a CPU from active state to force migrate tasks which are not per CPU
kthreads away for power control purposes.

Add a mechanism which waits until all tasks which should leave the CPU
after the CPU active flag is cleared have moved to a different online CPU.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Valentin Schneider <valentin.schneider@arm.com>
---
 kernel/sched/core.c  |   44 +++++++++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h |    4 ++++
 2 files changed, 45 insertions(+), 3 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6892,11 +6892,24 @@ static bool balance_push(struct rq *rq)
 	SCHED_WARN_ON(rq->cpu != smp_processor_id());
 
 	/*
-	 * Both the cpu-hotplug and stop task are in this case and are
+	 * Both the cpu-hotplug and stop task are in this class and are
 	 * required to complete the hotplug process.
 	 */
-	if (is_per_cpu_kthread(push_task))
+	if (is_per_cpu_kthread(push_task)) {
+		/*
+		 * If this is the idle task on the outgoing CPU try to wake
+		 * up the hotplug control thread which might wait for the
+		 * last task to vanish. The rcuwait_active() check is
+		 * accurate here because the waiter is pinned on this CPU
+		 * and can't obviously be running in parallel.
+		 */
+		if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) {
+			raw_spin_unlock(&rq->lock);
+			rcuwait_wake_up(&rq->hotplug_wait);
+			raw_spin_lock(&rq->lock);
+		}
 		return false;
+	}
 
 	get_task_struct(push_task);
 	/*
@@ -6929,13 +6942,31 @@ static void balance_push_set(int cpu, bo
 	rq_unlock_irqrestore(rq, &rf);
 }
 
-#else
+/*
+ * Invoked from a CPUs hotplug control thread after the CPU has been marked
+ * inactive. All tasks which are not per CPU kernel threads are either
+ * pushed off this CPU now via balance_push() or placed on a different CPU
+ * during wakeup. Wait until the CPU is quiescent.
+ */
+static void balance_hotplug_wait(void)
+{
+	struct rq *rq = this_rq();
+
+	rcuwait_wait_event(&rq->hotplug_wait, rq->nr_running == 1,
+			   TASK_UNINTERRUPTIBLE);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static inline void balance_push_set(int cpu, bool on) { }
 
 static inline bool balance_push(struct rq *rq)
 {
 	return false;
 }
 
+static inline void balance_hotplug_wait(void) { }
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 void set_rq_online(struct rq *rq)
@@ -7090,6 +7121,10 @@ int sched_cpu_deactivate(unsigned int cp
 		return ret;
 	}
 	sched_domains_numa_masks_clear(cpu);
+
+	/* Wait for all non per CPU kernel threads to vanish. */
+	balance_hotplug_wait();
+
 	return 0;
 }
 
@@ -7330,6 +7365,9 @@ void __init sched_init(void)
 
 		rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
 #endif
+#ifdef CONFIG_HOTPLUG_CPU
+		rcuwait_init(&rq->hotplug_wait);
+#endif
 #endif /* CONFIG_SMP */
 		hrtick_rq_init(rq);
 		atomic_set(&rq->nr_iowait, 0);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1004,6 +1004,10 @@ struct rq {
 
 	/* This is used to determine avg_idle's max value */
 	u64			max_idle_balance_cost;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	struct rcuwait		hotplug_wait;
+#endif
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 04/10] sched/hotplug: Consolidate task migration on CPU unplug
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
                   ` (2 preceding siblings ...)
  2020-09-17  9:42 ` [patch 03/10] sched/core: Wait for tasks being pushed away on hotplug Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  2020-09-17  9:42 ` [patch 05/10] sched/core: Split __set_cpus_allowed_ptr() Thomas Gleixner
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Scott Wood,
	Peter Zijlstra (Intel),
	Valentin Schneider, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira, Vincent Donnefort

With the new mechanism which kicks tasks off the outgoing CPU at the end of
schedule() the situation on an outgoing CPU right before the stopper thread
brings it down completely is:

 - All user tasks and all unbound kernel threads have either been migrated
   away or are not running and the next wakeup will move them to a online CPU.

-- All per CPU kernel threads, except cpu hotplug thread and the stopper
   thread have either been unbound or parked by the responsible CPU hotplug
   callback.

That means that at the last step before the stopper thread is invoked the
cpu hotplug thread is the last legitimate running task on the outgoing
CPU.

Add a final wait step right before the stopper thread is kicked which
ensures that any still running tasks on the way to park or on the way to
kick themself of the CPU are either sleeping or gone.

This allows to remove the migrate_tasks() crutch in sched_cpu_dying(). If
sched_cpu_dying() detects that there is still another running task aside of
the stopper thread then it will explode with the appropriate fireworks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h    |    1 
 include/linux/sched/hotplug.h |    2 
 kernel/cpu.c                  |    9 ++
 kernel/sched/core.c           |  150 +++++++++---------------------------------
 4 files changed, 46 insertions(+), 116 deletions(-)

--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -152,6 +152,7 @@ enum cpuhp_state {
 	CPUHP_AP_ONLINE,
 	CPUHP_TEARDOWN_CPU,
 	CPUHP_AP_ONLINE_IDLE,
+	CPUHP_AP_SCHED_WAIT_EMPTY,
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
 	CPUHP_AP_IRQ_AFFINITY_ONLINE,
--- a/include/linux/sched/hotplug.h
+++ b/include/linux/sched/hotplug.h
@@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned i
 extern int sched_cpu_deactivate(unsigned int cpu);
 
 #ifdef CONFIG_HOTPLUG_CPU
+extern int sched_cpu_wait_empty(unsigned int cpu);
 extern int sched_cpu_dying(unsigned int cpu);
 #else
+# define sched_cpu_wait_empty	NULL
 # define sched_cpu_dying	NULL
 #endif
 
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1602,7 +1602,7 @@ static struct cpuhp_step cpuhp_hp_states
 		.name			= "ap:online",
 	},
 	/*
-	 * Handled on controll processor until the plugged processor manages
+	 * Handled on control processor until the plugged processor manages
 	 * this itself.
 	 */
 	[CPUHP_TEARDOWN_CPU] = {
@@ -1611,6 +1611,13 @@ static struct cpuhp_step cpuhp_hp_states
 		.teardown.single	= takedown_cpu,
 		.cant_stop		= true,
 	},
+
+	[CPUHP_AP_SCHED_WAIT_EMPTY] = {
+		.name			= "sched:waitempty",
+		.startup.single		= NULL,
+		.teardown.single	= sched_cpu_wait_empty,
+	},
+
 	/* Handle smpboot threads park/unpark */
 	[CPUHP_AP_SMPBOOT_THREADS] = {
 		.name			= "smpboot/threads:online",
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6740,120 +6740,6 @@ void idle_task_exit(void)
 	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
 }
 
-/*
- * Since this CPU is going 'away' for a while, fold any nr_active delta
- * we might have. Assumes we're called after migrate_tasks() so that the
- * nr_active count is stable. We need to take the teardown thread which
- * is calling this into account, so we hand in adjust = 1 to the load
- * calculation.
- *
- * Also see the comment "Global load-average calculations".
- */
-static void calc_load_migrate(struct rq *rq)
-{
-	long delta = calc_load_fold_active(rq, 1);
-	if (delta)
-		atomic_long_add(delta, &calc_load_tasks);
-}
-
-static struct task_struct *__pick_migrate_task(struct rq *rq)
-{
-	const struct sched_class *class;
-	struct task_struct *next;
-
-	for_each_class(class) {
-		next = class->pick_next_task(rq);
-		if (next) {
-			next->sched_class->put_prev_task(rq, next);
-			return next;
-		}
-	}
-
-	/* The idle class should always have a runnable task */
-	BUG();
-}
-
-/*
- * Migrate all tasks from the rq, sleeping tasks will be migrated by
- * try_to_wake_up()->select_task_rq().
- *
- * Called with rq->lock held even though we'er in stop_machine() and
- * there's no concurrency possible, we hold the required locks anyway
- * because of lock validation efforts.
- */
-static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
-{
-	struct rq *rq = dead_rq;
-	struct task_struct *next, *stop = rq->stop;
-	struct rq_flags orf = *rf;
-	int dest_cpu;
-
-	/*
-	 * Fudge the rq selection such that the below task selection loop
-	 * doesn't get stuck on the currently eligible stop task.
-	 *
-	 * We're currently inside stop_machine() and the rq is either stuck
-	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
-	 * either way we should never end up calling schedule() until we're
-	 * done here.
-	 */
-	rq->stop = NULL;
-
-	/*
-	 * put_prev_task() and pick_next_task() sched
-	 * class method both need to have an up-to-date
-	 * value of rq->clock[_task]
-	 */
-	update_rq_clock(rq);
-
-	for (;;) {
-		/*
-		 * There's this thread running, bail when that's the only
-		 * remaining thread:
-		 */
-		if (rq->nr_running == 1)
-			break;
-
-		next = __pick_migrate_task(rq);
-
-		/*
-		 * Rules for changing task_struct::cpus_mask are holding
-		 * both pi_lock and rq->lock, such that holding either
-		 * stabilizes the mask.
-		 *
-		 * Drop rq->lock is not quite as disastrous as it usually is
-		 * because !cpu_active at this point, which means load-balance
-		 * will not interfere. Also, stop-machine.
-		 */
-		rq_unlock(rq, rf);
-		raw_spin_lock(&next->pi_lock);
-		rq_relock(rq, rf);
-
-		/*
-		 * Since we're inside stop-machine, _nothing_ should have
-		 * changed the task, WARN if weird stuff happened, because in
-		 * that case the above rq->lock drop is a fail too.
-		 */
-		if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
-			raw_spin_unlock(&next->pi_lock);
-			continue;
-		}
-
-		/* Find suitable destination for @next, with force if needed. */
-		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
-		rq = __migrate_task(rq, rf, next, dest_cpu);
-		if (rq != dead_rq) {
-			rq_unlock(rq, rf);
-			rq = dead_rq;
-			*rf = orf;
-			rq_relock(rq, rf);
-		}
-		raw_spin_unlock(&next->pi_lock);
-	}
-
-	rq->stop = stop;
-}
-
 static int __balance_push_cpu_stop(void *arg)
 {
 	struct task_struct *p = arg;
@@ -7144,6 +7030,41 @@ int sched_cpu_starting(unsigned int cpu)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Invoked immediately before the stopper thread is invoked to bring the
+ * CPU down completely. At this point all per CPU kthreads except the
+ * hotplug thread (current) and the stopper thread (inactive) have been
+ * either parked or have been unbound from the outgoing CPU. Ensure that
+ * any of those which might be on the way out are gone.
+ *
+ * If after this point a bound task is being woken on this CPU then the
+ * responsible hotplug callback has failed to do it's job.
+ * sched_cpu_dying() will catch it with the appropriate fireworks.
+ */
+int sched_cpu_wait_empty(unsigned int cpu)
+{
+	balance_hotplug_wait();
+	return 0;
+}
+
+/*
+ * Since this CPU is going 'away' for a while, fold any nr_active delta we
+ * might have. Called from the CPU stopper task after ensuring that the
+ * stopper is the last running task on the CPU, so nr_active count is
+ * stable. We need to take the teardown thread which is calling this into
+ * account, so we hand in adjust = 1 to the load calculation.
+ *
+ * Also see the comment "Global load-average calculations".
+ */
+static void calc_load_migrate(struct rq *rq)
+{
+	long delta = calc_load_fold_active(rq, 1);
+
+	if (delta)
+		atomic_long_add(delta, &calc_load_tasks);
+}
+
 int sched_cpu_dying(unsigned int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
@@ -7157,7 +7078,6 @@ int sched_cpu_dying(unsigned int cpu)
 		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 		set_rq_offline(rq);
 	}
-	migrate_tasks(rq, &rf);
 	BUG_ON(rq->nr_running != 1);
 	rq_unlock_irqrestore(rq, &rf);
 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 05/10] sched/core: Split __set_cpus_allowed_ptr()
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
                   ` (3 preceding siblings ...)
  2020-09-17  9:42 ` [patch 04/10] sched/hotplug: Consolidate task migration on CPU unplug Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  2020-09-17  9:42 ` [patch 06/10] sched: Add task components for migration control Thomas Gleixner
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Scott Wood,
	Peter Zijlstra (Intel),
	Valentin Schneider, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira, Vincent Donnefort

Split the function so the actual work part can be reused and called from
places which hold rq::lock already.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/core.c |   64 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 38 insertions(+), 26 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1863,34 +1863,17 @@ void do_set_cpus_allowed(struct task_str
 		set_next_task(rq, p);
 }
 
-/*
- * Change a given task's CPU affinity. Migrate the thread to a
- * proper CPU and schedule it away if the CPU it's executing on
- * is removed from the allowed bitmask.
- *
- * NOTE: the caller must have a valid reference to the task, the
- * task must not exit() & deallocate itself prematurely. The
- * call is not atomic; no spinlocks may be held.
- */
-static int __set_cpus_allowed_ptr(struct task_struct *p,
-				  const struct cpumask *new_mask, bool check)
+static int set_cpus_allowed_ptr_locked(struct task_struct *p,
+				       const struct cpumask *new_mask,
+				       bool check,
+				       struct rq *rq, struct rq_flags *rf)
 {
 	const struct cpumask *cpu_valid_mask = cpu_active_mask;
 	unsigned int dest_cpu;
-	struct rq_flags rf;
-	struct rq *rq;
 	int ret = 0;
 
-	rq = task_rq_lock(p, &rf);
 	update_rq_clock(rq);
 
-	if (p->flags & PF_KTHREAD) {
-		/*
-		 * Kernel threads are allowed on online && !active CPUs
-		 */
-		cpu_valid_mask = cpu_online_mask;
-	}
-
 	/*
 	 * Must re-check here, to close a race against __kthread_bind(),
 	 * sched_setaffinity() is not guaranteed to observe the flag.
@@ -1900,8 +1883,12 @@ static int __set_cpus_allowed_ptr(struct
 		goto out;
 	}
 
-	if (cpumask_equal(&p->cpus_mask, new_mask))
-		goto out;
+	if (p->flags & PF_KTHREAD) {
+		/*
+		 * Kernel threads are allowed on online && !active CPUs
+		 */
+		cpu_valid_mask = cpu_online_mask;
+	}
 
 	/*
 	 * Picking a ~random cpu helps in cases where we are changing affinity
@@ -1933,7 +1920,7 @@ static int __set_cpus_allowed_ptr(struct
 	if (task_running(rq, p) || p->state == TASK_WAKING) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
-		task_rq_unlock(rq, p, &rf);
+		task_rq_unlock(rq, p, rf);
 		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
 		return 0;
 	} else if (task_on_rq_queued(p)) {
@@ -1941,10 +1928,35 @@ static int __set_cpus_allowed_ptr(struct
 		 * OK, since we're going to drop the lock immediately
 		 * afterwards anyway.
 		 */
-		rq = move_queued_task(rq, &rf, p, dest_cpu);
+		rq = move_queued_task(rq, rf, p, dest_cpu);
 	}
 out:
-	task_rq_unlock(rq, p, &rf);
+	task_rq_unlock(rq, p, rf);
+	return ret;
+}
+
+/*
+ * Change a given task's CPU affinity. Migrate the thread to a
+ * proper CPU and schedule it away if the CPU it's executing on
+ * is removed from the allowed bitmask.
+ *
+ * NOTE: the caller must have a valid reference to the task, the
+ * task must not exit() & deallocate itself prematurely. The
+ * call is not atomic; no spinlocks may be held.
+ */
+static int __set_cpus_allowed_ptr(struct task_struct *p,
+				  const struct cpumask *new_mask, bool check)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+	int ret = 0;
+
+	rq = task_rq_lock(p, &rf);
+
+	if (cpumask_equal(&p->cpus_mask, new_mask))
+		task_rq_unlock(rq, p, &rf);
+	else
+		ret = set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
 
 	return ret;
 }


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 06/10] sched: Add task components for migration control
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
                   ` (4 preceding siblings ...)
  2020-09-17  9:42 ` [patch 05/10] sched/core: Split __set_cpus_allowed_ptr() Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  2020-09-17  9:42 ` [patch 07/10] sched/core: Add mechanism to wait for affinity setting to complete Thomas Gleixner
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Scott Wood,
	Peter Zijlstra (Intel),
	Valentin Schneider, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira, Vincent Donnefort

The upcoming RT migrate_enable/disable() support will track migrate
disabled state in task_struct.

Add a new migration_ctrl struct to hold all necessary information and add
the required initializers.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h |   30 +++++++++++++++++++++++++++---
 init/init_task.c      |    3 +++
 kernel/fork.c         |    1 +
 kernel/sched/debug.c  |    4 ++++
 4 files changed, 35 insertions(+), 3 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -628,6 +628,20 @@ struct wake_q_node {
 	struct wake_q_node *next;
 };
 
+#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
+struct task_migration_ctrl {
+	struct mutex			mutex;
+	int				disable_cnt;
+};
+
+#define INIT_TASK_MIGRATION_CTRL_INITIALIZER				\
+{									\
+	.mutex = __MUTEX_INITIALIZER(init_task.migration_ctrl.mutex),	\
+}
+#else /* CONFIG_PREEMPT_RT && CONFIG_SMP */
+struct task_migration_ctrl { };
+#endif /* !(CONFIG_PREEMPT_RT && CONFIG_SMP) */
+
 struct task_struct {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/*
@@ -713,6 +727,7 @@ struct task_struct {
 	int				nr_cpus_allowed;
 	const cpumask_t			*cpus_ptr;
 	cpumask_t			cpus_mask;
+	struct task_migration_ctrl	migration_ctrl;
 
 #ifdef CONFIG_PREEMPT_RCU
 	int				rcu_read_lock_nesting;
@@ -1865,7 +1880,7 @@ static __always_inline bool need_resched
 }
 
 /*
- * Wrappers for p->thread_info->cpu access. No-op on UP.
+ * Various SMP helper functions. No-ops on UP.
  */
 #ifdef CONFIG_SMP
 
@@ -1880,7 +1895,14 @@ static inline unsigned int task_cpu(cons
 
 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
 
-#else
+static inline void task_migration_ctrl_init(struct task_struct *p)
+{
+#ifdef CONFIG_PREEMPT_RT
+	mutex_init(&p->migration_ctrl.mutex);
+#endif
+}
+
+#else /* CONFIG_SMP */
 
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
@@ -1891,7 +1913,9 @@ static inline void set_task_cpu(struct t
 {
 }
 
-#endif /* CONFIG_SMP */
+static inline void task_migration_ctrl_init(struct task_struct *p) { }
+
+#endif /* !CONFIG_SMP */
 
 /*
  * In order to reduce various lock holder preemption latencies provide an
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -209,6 +209,9 @@ struct task_struct init_task
 #ifdef CONFIG_SECCOMP
 	.seccomp	= { .filter_count = ATOMIC_INIT(0) },
 #endif
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
+	.migration_ctrl = INIT_TASK_MIGRATION_CTRL_INITIALIZER,
+#endif
 };
 EXPORT_SYMBOL(init_task);
 
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2119,6 +2119,7 @@ static __latent_entropy struct task_stru
 #ifdef CONFIG_BLOCK
 	p->plug = NULL;
 #endif
+	task_migration_ctrl_init(p);
 	futex_init_task(p);
 
 	/*
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -958,6 +958,10 @@ void proc_sched_show_task(struct task_st
 		P(dl.runtime);
 		P(dl.deadline);
 	}
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
+	P(migration_ctrl.disable_cnt);
+	P(nr_cpus_allowed);
+#endif
 #undef PN_SCHEDSTAT
 #undef P_SCHEDSTAT
 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 07/10] sched/core: Add mechanism to wait for affinity setting to complete
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
                   ` (5 preceding siblings ...)
  2020-09-17  9:42 ` [patch 06/10] sched: Add task components for migration control Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  2020-09-17  9:42 ` [patch 08/10] sched: Add update_migratory() callback to scheduler classes Thomas Gleixner
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Scott Wood,
	Peter Zijlstra (Intel),
	Valentin Schneider, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira, Vincent Donnefort

RT kernels allow to disable migration while being preemptible. Tasks which
have migration disabled cannot be moved to a different CPU when the
affinity mask is changed until they leave the migrate disabled section.

Add a mechanism to queue the migration request in the task and wait for it
to complete. The task will handle it when it leaves the migrate disabled
section.

This ensures that __set_cpus_allowed_ptr() is guaranteed to return only after
the new affinity mask has taken effect.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h |   19 ++++++++++++
 kernel/sched/core.c   |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -629,8 +629,16 @@ struct wake_q_node {
 };
 
 #if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
+struct task_migrate_data {
+	const cpumask_t		*mask;
+	struct completion	*done;
+	bool			check;
+	int			res;
+};
+
 struct task_migration_ctrl {
 	struct mutex			mutex;
+	struct task_migrate_data	*pending;
 	int				disable_cnt;
 };
 
@@ -638,8 +646,19 @@ struct task_migration_ctrl {
 {									\
 	.mutex = __MUTEX_INITIALIZER(init_task.migration_ctrl.mutex),	\
 }
+
+static inline int task_self_migrate_result(struct task_migrate_data *data)
+{
+	return data->res;
+}
+
 #else /* CONFIG_PREEMPT_RT && CONFIG_SMP */
+struct task_migrate_data { };
 struct task_migration_ctrl { };
+static inline int task_self_migrate_result(struct task_migrate_data *data)
+{
+	return -ENOSYS;
+}
 #endif /* !(CONFIG_PREEMPT_RT && CONFIG_SMP) */
 
 struct task_struct {
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -442,6 +442,70 @@ static inline void hrtick_rq_init(struct
 }
 #endif	/* CONFIG_SCHED_HRTICK */
 
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
+static inline void task_lock_migration_ctrl(struct task_struct *p)
+{
+	mutex_lock(&p->migration_ctrl.mutex);
+}
+
+static inline void task_unlock_migration_ctrl(struct task_struct *p)
+{
+	mutex_unlock(&p->migration_ctrl.mutex);
+}
+
+/*
+ * If the affinity of a task should be set and the task is in a migrate
+ * disabled region then the operation has to wait until the task leaves the
+ * migrate disabled region and takes care of setting it's affinity on its
+ * own.
+ */
+static bool task_self_migration(struct task_struct *p,
+				const struct cpumask *new_mask, bool check,
+				struct rq *rq, struct rq_flags *rf,
+				struct task_migrate_data *data)
+{
+	DECLARE_COMPLETION_ONSTACK(done);
+
+	lockdep_assert_held(&p->migration_ctrl.mutex);
+	lockdep_assert_held(&rq->lock);
+	lockdep_assert_held(&p->pi_lock);
+
+	if (!p->migration_ctrl.disable_cnt)
+		return false;
+
+	BUG_ON(p == current);
+
+	/*
+	 * Store a pointer to migration data in the migration control
+	 * struct, which will be used by the task to set its own affinity
+	 * when it leaves the migrate disabled section. The result is
+	 * returned in @data::res.
+	 */
+	data->mask = new_mask;
+	data->check = check;
+	data->done = &done;
+	p->migration_ctrl.pending = data;
+
+	/* Get a reference on @p, drop the locks and wait for it to complete */
+	get_task_struct(p);
+	task_rq_unlock(rq, p, rf);
+	wait_for_completion(&done);
+	put_task_struct(p);
+	return true;
+}
+
+#else /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) */
+static inline void task_lock_migration_ctrl(struct task_struct *p) { }
+static inline void task_unlock_migration_ctrl(struct task_struct *p) { }
+static bool task_self_migration(struct task_struct *p,
+				const struct cpumask *new_mask, bool check,
+				struct rq *rq, struct rq_flags *rf,
+				struct task_migrate_data *data)
+{
+	return false;
+}
+#endif /* !(defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)) */
+
 /*
  * cmpxchg based fetch_or, macro so it works for different integer types
  */
@@ -1947,17 +2011,29 @@ static int set_cpus_allowed_ptr_locked(s
 static int __set_cpus_allowed_ptr(struct task_struct *p,
 				  const struct cpumask *new_mask, bool check)
 {
+	struct task_migrate_data sync_data;
 	struct rq_flags rf;
 	struct rq *rq;
 	int ret = 0;
 
+	/*
+	 * On RT kernels the affinity setting might be delayed if the task
+	 * is in a migrate disabled region. The request for changing the
+	 * affinity is queued in the target task which acts upon it when
+	 * leaving the migrate disabled sections. This requires
+	 * serialization to protect the relevant data structures.
+	 */
+	task_lock_migration_ctrl(p);
 	rq = task_rq_lock(p, &rf);
 
 	if (cpumask_equal(&p->cpus_mask, new_mask))
 		task_rq_unlock(rq, p, &rf);
+	else if (task_self_migration(p, new_mask, check, rq, &rf, &sync_data))
+		ret = task_self_migrate_result(&sync_data);
 	else
 		ret = set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
 
+	task_unlock_migration_ctrl(p);
 	return ret;
 }
 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 08/10] sched: Add update_migratory() callback to scheduler classes
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
                   ` (6 preceding siblings ...)
  2020-09-17  9:42 ` [patch 07/10] sched/core: Add mechanism to wait for affinity setting to complete Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  2020-09-17  9:42 ` [patch 09/10] sched/core: Add migrate_disable/enable() Thomas Gleixner
  2020-09-17  9:42 ` [patch 10/10] sched/core: Make migrate disable and CPU hotplug cooperative Thomas Gleixner
  9 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Scott Wood,
	Peter Zijlstra (Intel),
	Valentin Schneider, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira, Vincent Donnefort

Provide a mechanism to update the number or migratory tasks in the RT and
deadline scheduler classes.

This will be used by the upcoming migrate_disable/enable() functionality on
RT kernels.

Originally-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/deadline.c |   10 ++++++++++
 kernel/sched/rt.c       |   10 ++++++++++
 kernel/sched/sched.h    |    4 ++++
 3 files changed, 24 insertions(+)

--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -479,6 +479,13 @@ static void dec_dl_migration(struct sche
 	update_dl_migration(dl_rq);
 }
 
+#ifdef CONFIG_PREEMPT_RT
+static void update_migratory_dl(struct task_struct *p, long delta)
+{
+	task_rq(p)->dl.dl_nr_migratory += delta;
+}
+#endif
+
 /*
  * The list of pushable -deadline task is not a plist, like in
  * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
@@ -2499,6 +2506,9 @@ const struct sched_class dl_sched_class
 	.rq_online              = rq_online_dl,
 	.rq_offline             = rq_offline_dl,
 	.task_woken		= task_woken_dl,
+#ifdef CONFIG_PREEMPT_RT
+	.update_migratory	= update_migratory_dl,
+#endif
 #endif
 
 	.task_tick		= task_tick_dl,
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2272,6 +2272,13 @@ static void switched_from_rt(struct rq *
 	rt_queue_pull_task(rq);
 }
 
+#ifdef CONFIG_PREEMPT_RT
+static void update_migratory_rt(struct task_struct *p, long delta)
+{
+	task_rq(p)->rt.rt_nr_migratory += delta;
+}
+#endif
+
 void __init init_sched_rt_class(void)
 {
 	unsigned int i;
@@ -2449,6 +2456,9 @@ const struct sched_class rt_sched_class
 	.rq_offline             = rq_offline_rt,
 	.task_woken		= task_woken_rt,
 	.switched_from		= switched_from_rt,
+#ifdef CONFIG_PREEMPT_RT
+	.update_migratory	= update_migratory_rt,
+#endif
 #endif
 
 	.task_tick		= task_tick_rt,
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1808,6 +1808,10 @@ struct sched_class {
 	void (*set_cpus_allowed)(struct task_struct *p,
 				 const struct cpumask *newmask);
 
+#ifdef CONFIG_PREEMPT_RT
+	void (*update_migratory)(struct task_struct *p, long delta);
+#endif
+
 	void (*rq_online)(struct rq *rq);
 	void (*rq_offline)(struct rq *rq);
 #endif


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
                   ` (7 preceding siblings ...)
  2020-09-17  9:42 ` [patch 08/10] sched: Add update_migratory() callback to scheduler classes Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  2020-09-17 14:24   ` peterz
  2020-09-17  9:42 ` [patch 10/10] sched/core: Make migrate disable and CPU hotplug cooperative Thomas Gleixner
  9 siblings, 1 reply; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Scott Wood,
	Peter Zijlstra (Intel),
	Valentin Schneider, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira, Vincent Donnefort

On RT enabled kernels most of the code including spin/rw lock held sections
are preemptible, which also makes the tasks migrateable. That violates the
per CPU constraints. RT needs therefore a mechanism to control migration
independent of preemption.

Add a migrate_disable/enable() mechanism which is optimized for the hotpath
operation. migrate_disable() increments the per task disable
counter. migrate_enable() decrements it.

If a task is preempted in a migrate disabled region, the cpumask pointer of
the task is updated to point to the cpumask of the current CPU and the
task::nr_cpus_allowed is set to 1. This prevents the task from being
migrated through balancing. When the task leaves the migrate disabled
region it restores the cpumask pointer and task::nr_cpus_allowed.

In the case that the tasks affinity is attempted to be set while the task
is in a migrate disabled region, the affinity setter queues a request to
the task to migrate itself once it leaves the migrate disabled region.

Update relevant places, like balance_push(), is_cpu_allowed() and
smp_processor_id() debug code to take the migration disabled state into
account.

This code has a long history back to the 3.0-rt series and has been
optimized and rewritten several times by Peter Zijlstra, Steven Rostedt and
Scott Woods. This final distillation is heavily based on ideas and concepts
from these previous variants.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/preempt.h |   14 +++++
 include/linux/sched.h   |   33 +++++++++++++
 kernel/sched/core.c     |  117 +++++++++++++++++++++++++++++++++++++++++++++++-
 lib/smp_processor_id.c  |    7 ++
 4 files changed, 169 insertions(+), 2 deletions(-)

--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -322,6 +322,18 @@ static inline void preempt_notifier_init
 
 #endif
 
+#ifdef CONFIG_PREEMPT_RT
+
+# ifdef CONFIG_SMP
+extern void migrate_disable(void);
+extern void migrate_enable(void);
+# else /* CONFIG_SMP */
+static inline void migrate_disable(void) { }
+static inline void migrate_enable(void) { }
+#endif /* !CONFIG_SMP */
+
+#else /* CONFIG_PREEMPT_RT */
+
 /**
  * migrate_disable - Prevent migration of the current task
  *
@@ -352,4 +364,6 @@ static __always_inline void migrate_enab
 	preempt_enable();
 }
 
+#endif /* !CONFIG_PREEMPT_RT */
+
 #endif /* __LINUX_PREEMPT_H */
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1921,6 +1921,34 @@ static inline void task_migration_ctrl_i
 #endif
 }
 
+/**
+ * task_migrate_disabled - Check whether a task has migration disabled
+ * @task:	Task pointer to check
+ *
+ * On !PREEMPT_RT SMP kernels this returns always 0 as migrate disable is
+ * mapped to preempt disable and not individually controllable.
+ *
+ * On PREEMPT_RT SMP kernels migration control is distinct and this function
+ * returns task::migration_ctrl::disable_cnt. A non zero value indicates
+ * that migration is disabled.
+ *
+ * On UP kernels this returns always 0 because migration control is
+ * pointless there.
+ *
+ * This function is only to be used in scheduler and debug code and not
+ * meant for general consumption as the return value has only a meaning
+ * under specific configurations. The always return 0 stubs are also used
+ * to optimize code out if PREEMPT_RT is disabled.
+ */
+static inline int task_migrate_disabled(struct task_struct *p)
+{
+#ifdef CONFIG_PREEMPT_RT
+	return p->migration_ctrl.disable_cnt;
+#else
+	return 0;
+#endif
+}
+
 #else /* CONFIG_SMP */
 
 static inline unsigned int task_cpu(const struct task_struct *p)
@@ -1934,6 +1962,11 @@ static inline void set_task_cpu(struct t
 
 static inline void task_migration_ctrl_init(struct task_struct *p) { }
 
+static inline int task_migrate_disabled(struct task_struct *p)
+{
+	return 0;
+}
+
 #endif /* !CONFIG_SMP */
 
 /*
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1767,13 +1767,17 @@ void check_preempt_curr(struct rq *rq, s
 /*
  * Per-CPU kthreads are allowed to run on !active && online CPUs, see
  * __set_cpus_allowed_ptr() and select_fallback_rq().
+ *
+ * On PREEMPT_RT tasks can run on !active && online CPUs if they are in a
+ * migrate disabled section. Once they leave the section they are migrated
+ * away.
  */
 static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
 {
 	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
 		return false;
 
-	if (is_per_cpu_kthread(p))
+	if (is_per_cpu_kthread(p) || task_migrate_disabled(p))
 		return cpu_online(cpu);
 
 	return cpu_active(cpu);
@@ -3546,6 +3550,109 @@ fire_sched_out_preempt_notifiers(struct
 
 #endif /* CONFIG_PREEMPT_NOTIFIERS */
 
+/*
+ * Migrate disable control for PREEMPT_RT
+ */
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
+
+static inline void update_nr_migratory(struct task_struct *p, long delta)
+{
+	if (p->nr_cpus_allowed > 1 && p->sched_class->update_migratory)
+		p->sched_class->update_migratory(p, delta);
+}
+
+/*
+ * The migrate_disable/enable() fastpath updates only the tasks migrate
+ * disable count which is sufficient as long as the task stays on the CPU.
+ *
+ * When a migrate disabled task is scheduled out it can become subject to
+ * load balancing. To prevent this, update task::cpus_ptr to point to the
+ * current CPUs cpumask and set task::nr_cpus_allowed to 1.
+ *
+ * If task::cpus_ptr does not point to task::cpus_mask then the update has
+ * been done already. This check is also used in in migrate_enable() as an
+ * indicator to restore task::cpus_ptr to point to task::cpus_mask
+ */
+static inline void sched_migration_ctrl(struct task_struct *prev, int cpu)
+{
+	if (!prev->migration_ctrl.disable_cnt ||
+	    prev->cpus_ptr != &prev->cpus_mask)
+		return;
+
+	prev->cpus_ptr = cpumask_of(cpu);
+	update_nr_migratory(prev, -1);
+	prev->nr_cpus_allowed = 1;
+}
+
+void migrate_disable(void)
+{
+	unsigned long flags;
+
+	if (!current->migration_ctrl.disable_cnt) {
+		raw_spin_lock_irqsave(&current->pi_lock, flags);
+		current->migration_ctrl.disable_cnt++;
+		raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+	} else {
+		current->migration_ctrl.disable_cnt++;
+	}
+}
+EXPORT_SYMBOL(migrate_disable);
+
+void migrate_enable(void)
+{
+	struct task_migrate_data *pending;
+	struct task_struct *p = current;
+	struct rq_flags rf;
+	struct rq *rq;
+
+	if (WARN_ON_ONCE(p->migration_ctrl.disable_cnt <= 0))
+		return;
+
+	if (p->migration_ctrl.disable_cnt > 1) {
+		p->migration_ctrl.disable_cnt--;
+		return;
+	}
+
+	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
+	p->migration_ctrl.disable_cnt = 0;
+	pending = p->migration_ctrl.pending;
+	p->migration_ctrl.pending = NULL;
+
+	/*
+	 * If the task was never scheduled out while in the migrate
+	 * disabled region and there is no migration request pending,
+	 * return.
+	 */
+	if (!pending && p->cpus_ptr == &p->cpus_mask) {
+		raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
+		return;
+	}
+
+	rq = __task_rq_lock(p, &rf);
+	/* Was it scheduled out while in a migrate disabled region? */
+	if (p->cpus_ptr != &p->cpus_mask) {
+		/* Restore the tasks CPU mask and update the weight */
+		p->cpus_ptr = &p->cpus_mask;
+		p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
+		update_nr_migratory(p, 1);
+	}
+
+	/* If no migration request is pending, no further action required. */
+	if (!pending) {
+		task_rq_unlock(rq, p, &rf);
+		return;
+	}
+
+	/* Migrate self to the requested target */
+	pending->res = set_cpus_allowed_ptr_locked(p, pending->mask,
+						   pending->check, rq, &rf);
+	complete(pending->done);
+}
+EXPORT_SYMBOL(migrate_enable);
+#else /* CONFIG_SMP && CONFIG_PREEMPT_RT */
+static inline void sched_migration_ctrl(struct task_struct *prev, unsigned int cpu) { }
+#endif /* !(CONFIG_SMP && CONFIG_PREEMPT_RT) */
+
 static inline void prepare_task(struct task_struct *next)
 {
 #ifdef CONFIG_SMP
@@ -4579,6 +4686,9 @@ static void __sched notrace __schedule(b
 	rq_lock(rq, &rf);
 	smp_mb__after_spinlock();
 
+	/* Handle migrate disabled tasks being scheduled out */
+	sched_migration_ctrl(prev, cpu);
+
 	/* Promote REQ to ACT */
 	rq->clock_update_flags <<= 1;
 	update_rq_clock(rq);
@@ -6868,8 +6978,11 @@ static bool balance_push(struct rq *rq)
 	/*
 	 * Both the cpu-hotplug and stop task are in this class and are
 	 * required to complete the hotplug process.
+	 *
+	 * On RT kernels also regular tasks which are in a migrate disabled
+	 * section must stay on the CPU until they left the section.
 	 */
-	if (is_per_cpu_kthread(push_task)) {
+	if (is_per_cpu_kthread(push_task) || task_migrate_disabled(push_task)) {
 		/*
 		 * If this is the idle task on the outgoing CPU try to wake
 		 * up the hotplug control thread which might wait for the
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -27,6 +27,13 @@ unsigned int check_preemption_disabled(c
 		goto out;
 
 	/*
+	 * Tasks which have migration disabled on a RT kernel can
+	 * safely use smp_processor_id() even in preemptible code.
+	 */
+	if (task_migrate_disabled(current))
+		goto out;
+
+	/*
 	 * It is valid to assume CPU-locality during early bootup:
 	 */
 	if (system_state < SYSTEM_SCHEDULING)


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [patch 10/10] sched/core: Make migrate disable and CPU hotplug cooperative
  2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
                   ` (8 preceding siblings ...)
  2020-09-17  9:42 ` [patch 09/10] sched/core: Add migrate_disable/enable() Thomas Gleixner
@ 2020-09-17  9:42 ` Thomas Gleixner
  9 siblings, 0 replies; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-17  9:42 UTC (permalink / raw)
  To: LKML
  Cc: Sebastian Siewior, Qais Yousef, Scott Wood,
	Peter Zijlstra (Intel),
	Valentin Schneider, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira, Vincent Donnefort

On CPU unplug tasks which are in a migrate disabled region cannot be pushed
to a different CPU until they returned to migrateable state.

Account the number of tasks on a runqueue which are in a migrate disabled
section and make the hotplug wait mechanism respect that.

Originally-by: Scott Wood <swood@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched/core.c  |   38 ++++++++++++++++++++++++++++++++++----
 kernel/sched/sched.h |    4 ++++
 2 files changed, 38 insertions(+), 4 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -494,6 +494,11 @@ static bool task_self_migration(struct t
 	return true;
 }
 
+static inline bool rq_has_pinned_tasks(struct rq *rq)
+{
+	return rq->nr_pinned > 0;
+}
+
 #else /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) */
 static inline void task_lock_migration_ctrl(struct task_struct *p) { }
 static inline void task_unlock_migration_ctrl(struct task_struct *p) { }
@@ -504,6 +509,10 @@ static bool task_self_migration(struct t
 {
 	return false;
 }
+static inline bool rq_has_pinned_tasks(struct rq *rq)
+{
+	return false;
+}
 #endif /* !(defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)) */
 
 /*
@@ -3591,6 +3600,12 @@ void migrate_disable(void)
 	if (!current->migration_ctrl.disable_cnt) {
 		raw_spin_lock_irqsave(&current->pi_lock, flags);
 		current->migration_ctrl.disable_cnt++;
+		/*
+		 * Account the pinned task in the runqueue so that an
+		 * eventual CPU hot unplug operation will wait until
+		 * this task left the migrate disabled section.
+		 */
+		this_rq()->nr_pinned++;
 		raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 	} else {
 		current->migration_ctrl.disable_cnt++;
@@ -3619,6 +3634,13 @@ void migrate_enable(void)
 	p->migration_ctrl.pending = NULL;
 
 	/*
+	 * Adjust the number of pinned tasks in the runqueue. No further
+	 * action required here. An eventually waiting CPU hot unplug
+	 * operation will be woken up once the CPU goes through idle.
+	 */
+	this_rq()->nr_pinned--;
+
+	/*
 	 * If the task was never scheduled out while in the migrate
 	 * disabled region and there is no migration request pending,
 	 * return.
@@ -6989,8 +7011,13 @@ static bool balance_push(struct rq *rq)
 		 * last task to vanish. The rcuwait_active() check is
 		 * accurate here because the waiter is pinned on this CPU
 		 * and can't obviously be running in parallel.
+		 *
+		 * On RT kernels this also has to check whether there are
+		 * pinned and scheduled out tasks on the runqueue. They
+		 * need to leave the migrate disabled section first.
 		 */
-		if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) {
+		if (!rq->nr_running && !rq_has_pinned_tasks(rq) &&
+		    rcuwait_active(&rq->hotplug_wait)) {
 			raw_spin_unlock(&rq->lock);
 			rcuwait_wake_up(&rq->hotplug_wait);
 			raw_spin_lock(&rq->lock);
@@ -7033,13 +7060,16 @@ static void balance_push_set(int cpu, bo
  * Invoked from a CPUs hotplug control thread after the CPU has been marked
  * inactive. All tasks which are not per CPU kernel threads are either
  * pushed off this CPU now via balance_push() or placed on a different CPU
- * during wakeup. Wait until the CPU is quiescent.
+ * during wakeup. Wait until the CPU is quiescent.  On RT kernels this also
+ * waits for pinned non-runnable tasks to leave the migrate disabled
+ * section.
  */
 static void balance_hotplug_wait(void)
 {
 	struct rq *rq = this_rq();
 
-	rcuwait_wait_event(&rq->hotplug_wait, rq->nr_running == 1,
+	rcuwait_wait_event(&rq->hotplug_wait,
+			   rq->nr_running == 1 && !rq_has_pinned_tasks(rq),
 			   TASK_UNINTERRUPTIBLE);
 }
 
@@ -7279,7 +7309,7 @@ int sched_cpu_dying(unsigned int cpu)
 		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 		set_rq_offline(rq);
 	}
-	BUG_ON(rq->nr_running != 1);
+	BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq));
 	rq_unlock_irqrestore(rq, &rf);
 
 	calc_load_migrate(rq);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1053,6 +1053,10 @@ struct rq {
 	/* Must be inspected within a rcu lock section */
 	struct cpuidle_state	*idle_state;
 #endif
+
+#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
+	unsigned int		nr_pinned;
+#endif
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-17  9:42 ` [patch 09/10] sched/core: Add migrate_disable/enable() Thomas Gleixner
@ 2020-09-17 14:24   ` peterz
  2020-09-17 14:38     ` Sebastian Siewior
  2020-09-18  7:00     ` Thomas Gleixner
  0 siblings, 2 replies; 21+ messages in thread
From: peterz @ 2020-09-17 14:24 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Sebastian Siewior, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On Thu, Sep 17, 2020 at 11:42:11AM +0200, Thomas Gleixner wrote:

> +static inline void update_nr_migratory(struct task_struct *p, long delta)
> +{
> +	if (p->nr_cpus_allowed > 1 && p->sched_class->update_migratory)
> +		p->sched_class->update_migratory(p, delta);
> +}

Right, so as you know, I totally hate this thing :-) It adds a second
(and radically different) version of changing affinity. I'm working on a
version that uses the normal *set_cpus_allowed*() interface.

> +/*
> + * The migrate_disable/enable() fastpath updates only the tasks migrate
> + * disable count which is sufficient as long as the task stays on the CPU.
> + *
> + * When a migrate disabled task is scheduled out it can become subject to
> + * load balancing. To prevent this, update task::cpus_ptr to point to the
> + * current CPUs cpumask and set task::nr_cpus_allowed to 1.
> + *
> + * If task::cpus_ptr does not point to task::cpus_mask then the update has
> + * been done already. This check is also used in in migrate_enable() as an
> + * indicator to restore task::cpus_ptr to point to task::cpus_mask
> + */
> +static inline void sched_migration_ctrl(struct task_struct *prev, int cpu)
> +{
> +	if (!prev->migration_ctrl.disable_cnt ||
> +	    prev->cpus_ptr != &prev->cpus_mask)
> +		return;
> +
> +	prev->cpus_ptr = cpumask_of(cpu);
> +	update_nr_migratory(prev, -1);
> +	prev->nr_cpus_allowed = 1;
> +}

So this thing is called from schedule(), with only rq->lock held, and
that violates the locking rules for changing the affinity.

I have a comment that explains how it's broken and why it's sort-of
working.

> +void migrate_disable(void)
> +{
> +	unsigned long flags;
> +
> +	if (!current->migration_ctrl.disable_cnt) {
> +		raw_spin_lock_irqsave(&current->pi_lock, flags);
> +		current->migration_ctrl.disable_cnt++;
> +		raw_spin_unlock_irqrestore(&current->pi_lock, flags);
> +	} else {
> +		current->migration_ctrl.disable_cnt++;
> +	}
> +}

That pi_lock seems unfortunate, and it isn't obvious what the point of
it is.

> +void migrate_enable(void)
> +{
> +	struct task_migrate_data *pending;
> +	struct task_struct *p = current;
> +	struct rq_flags rf;
> +	struct rq *rq;
> +
> +	if (WARN_ON_ONCE(p->migration_ctrl.disable_cnt <= 0))
> +		return;
> +
> +	if (p->migration_ctrl.disable_cnt > 1) {
> +		p->migration_ctrl.disable_cnt--;
> +		return;
> +	}
> +
> +	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
> +	p->migration_ctrl.disable_cnt = 0;
> +	pending = p->migration_ctrl.pending;
> +	p->migration_ctrl.pending = NULL;
> +
> +	/*
> +	 * If the task was never scheduled out while in the migrate
> +	 * disabled region and there is no migration request pending,
> +	 * return.
> +	 */
> +	if (!pending && p->cpus_ptr == &p->cpus_mask) {
> +		raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
> +		return;
> +	}
> +
> +	rq = __task_rq_lock(p, &rf);
> +	/* Was it scheduled out while in a migrate disabled region? */
> +	if (p->cpus_ptr != &p->cpus_mask) {
> +		/* Restore the tasks CPU mask and update the weight */
> +		p->cpus_ptr = &p->cpus_mask;
> +		p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
> +		update_nr_migratory(p, 1);
> +	}
> +
> +	/* If no migration request is pending, no further action required. */
> +	if (!pending) {
> +		task_rq_unlock(rq, p, &rf);
> +		return;
> +	}
> +
> +	/* Migrate self to the requested target */
> +	pending->res = set_cpus_allowed_ptr_locked(p, pending->mask,
> +						   pending->check, rq, &rf);
> +	complete(pending->done);
> +}

So, what I'm missing with all this are the design contraints for this
trainwreck. Because the 'sane' solution was having migrate_disable()
imply cpus_read_lock(). But that didn't fly because we can't have
migrate_disable() / migrate_enable() schedule for raisins.

And if I'm not mistaken, the above migrate_enable() *does* require being
able to schedule, and our favourite piece of futex:

	raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
	spin_unlock(q.lock_ptr);

is broken. Consider that spin_unlock() doing migrate_enable() with a
pending sched_setaffinity().

Let me ponder this more..

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-17 14:24   ` peterz
@ 2020-09-17 14:38     ` Sebastian Siewior
  2020-09-17 14:49       ` peterz
  2020-09-18  7:00     ` Thomas Gleixner
  1 sibling, 1 reply; 21+ messages in thread
From: Sebastian Siewior @ 2020-09-17 14:38 UTC (permalink / raw)
  To: peterz
  Cc: Thomas Gleixner, LKML, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On 2020-09-17 16:24:38 [+0200], peterz@infradead.org wrote:
> And if I'm not mistaken, the above migrate_enable() *does* require being
> able to schedule, and our favourite piece of futex:
> 
> 	raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
> 	spin_unlock(q.lock_ptr);
> 
> is broken. Consider that spin_unlock() doing migrate_enable() with a
> pending sched_setaffinity().

There are two instances of the above and only in the futex code and we
have sort of duct tape for that by manually balancing the migrate
counter so that it does not come to this.
But yes, not having to do the manual balance is a plus.

Sebastian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-17 14:38     ` Sebastian Siewior
@ 2020-09-17 14:49       ` peterz
  2020-09-17 15:13         ` Sebastian Siewior
  0 siblings, 1 reply; 21+ messages in thread
From: peterz @ 2020-09-17 14:49 UTC (permalink / raw)
  To: Sebastian Siewior
  Cc: Thomas Gleixner, LKML, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On Thu, Sep 17, 2020 at 04:38:50PM +0200, Sebastian Siewior wrote:
> On 2020-09-17 16:24:38 [+0200], peterz@infradead.org wrote:
> > And if I'm not mistaken, the above migrate_enable() *does* require being
> > able to schedule, and our favourite piece of futex:
> > 
> > 	raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
> > 	spin_unlock(q.lock_ptr);
> > 
> > is broken. Consider that spin_unlock() doing migrate_enable() with a
> > pending sched_setaffinity().
> 
> There are two instances of the above and only in the futex code and we
> have sort of duct tape for that by manually balancing the migrate
> counter so that it does not come to this.
> But yes, not having to do the manual balance is a plus.

I'm aware of the duct-tape :-) But I was under the impression that we
didn't want the duct-tape, and that there was lots of issues with the
FPU code, or was that another issue?


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-17 14:49       ` peterz
@ 2020-09-17 15:13         ` Sebastian Siewior
  2020-09-17 15:54           ` peterz
  0 siblings, 1 reply; 21+ messages in thread
From: Sebastian Siewior @ 2020-09-17 15:13 UTC (permalink / raw)
  To: peterz
  Cc: Thomas Gleixner, LKML, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On 2020-09-17 16:49:37 [+0200], peterz@infradead.org wrote:
> I'm aware of the duct-tape :-) But I was under the impression that we
> didn't want the duct-tape, and that there was lots of issues with the
> FPU code, or was that another issue?

Of course it would be better not to need the duct tape. 
Also symmetrical locking is what you want but clearly futex is one of
a kind.

I'm currently not aware of any issues in the FPU code in regard to this.
A few weeks ago, I was looking for this kind of usage and only futex
popped up.

Sebastian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-17 15:13         ` Sebastian Siewior
@ 2020-09-17 15:54           ` peterz
  2020-09-17 16:30             ` Sebastian Siewior
  0 siblings, 1 reply; 21+ messages in thread
From: peterz @ 2020-09-17 15:54 UTC (permalink / raw)
  To: Sebastian Siewior
  Cc: Thomas Gleixner, LKML, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On Thu, Sep 17, 2020 at 05:13:41PM +0200, Sebastian Siewior wrote:
> On 2020-09-17 16:49:37 [+0200], peterz@infradead.org wrote:
> > I'm aware of the duct-tape :-) But I was under the impression that we
> > didn't want the duct-tape, and that there was lots of issues with the
> > FPU code, or was that another issue?
> 
> Of course it would be better not to need the duct tape. 
> Also symmetrical locking is what you want but clearly futex is one of
> a kind.
> 
> I'm currently not aware of any issues in the FPU code in regard to this.
> A few weeks ago, I was looking for this kind of usage and only futex
> popped up.

I'm not sure what the problem with FPU was, I was throwing alternatives
at tglx to see what would stick, in part to (re)discover the design
constraints of this thing.

One reason for not allowing migrate_disable() to sleep was: FPU code.

Could it be it does something like:

	preempt_disable();
	spin_lock();

	spin_unlock();
	preempt_enable();

Where we'll never get preempted while migrate_disable()'d and thus never
trigger any of the sleep paths?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-17 15:54           ` peterz
@ 2020-09-17 16:30             ` Sebastian Siewior
  2020-09-18  8:22               ` peterz
  0 siblings, 1 reply; 21+ messages in thread
From: Sebastian Siewior @ 2020-09-17 16:30 UTC (permalink / raw)
  To: peterz
  Cc: Thomas Gleixner, LKML, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On 2020-09-17 17:54:10 [+0200], peterz@infradead.org wrote:
> I'm not sure what the problem with FPU was, I was throwing alternatives
> at tglx to see what would stick, in part to (re)discover the design
> constraints of this thing.

was this recent or distant in the time line?

> One reason for not allowing migrate_disable() to sleep was: FPU code.
> 
> Could it be it does something like:
> 
> 	preempt_disable();
> 	spin_lock();
> 
> 	spin_unlock();
> 	preempt_enable();
> 
> Where we'll never get preempted while migrate_disable()'d and thus never
> trigger any of the sleep paths?

I try to get rid of something like that. This doesn't work either way
because the spin_lock() may block which it can't with disabled
preemption.

Looking at my queue, FPU related is crypto. And here we break the loops
mostly due to the construct:
	kernel_fpu_begin();
	while (bytes)
		crypto_thingy();
		skcipher_walk_done()

and skcipher_walk_done() could allocate/free/map memory. This is
independent.

Ah. We used to have migrate_disable() in pagefault_disable(). The x86
FPU code does
	preempt_disable();
	…
	pagefault_disable();

but that migrate_disable() was moved from pagefault_disable() to
kmap_atomic(). We shouldn't have
	preempt_disable(); || local_irq_disable();
	kmap_atomic();

on RT. I've been running around removing those. See
   a10dcebacdb0c ("fs/ntfs/aops.c: don't disable interrupts during kmap_atomic()")
   ce1e518190ea7 ("ide: don't disable interrupts during kmap_atomic()")
   f3a1075e5fc34 ("block: don't disable interrupts during kmap_atomic()")

Sebastian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-17 14:24   ` peterz
  2020-09-17 14:38     ` Sebastian Siewior
@ 2020-09-18  7:00     ` Thomas Gleixner
  2020-09-18  8:28       ` peterz
  1 sibling, 1 reply; 21+ messages in thread
From: Thomas Gleixner @ 2020-09-18  7:00 UTC (permalink / raw)
  To: peterz
  Cc: LKML, Sebastian Siewior, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On Thu, Sep 17 2020 at 16:24, peterz wrote:
> On Thu, Sep 17, 2020 at 11:42:11AM +0200, Thomas Gleixner wrote:
>
>> +static inline void update_nr_migratory(struct task_struct *p, long delta)
>> +{
>> +	if (p->nr_cpus_allowed > 1 && p->sched_class->update_migratory)
>> +		p->sched_class->update_migratory(p, delta);
>> +}
>
> Right, so as you know, I totally hate this thing :-) It adds a second
> (and radically different) version of changing affinity. I'm working on a
> version that uses the normal *set_cpus_allowed*() interface.

Tried that back and forth and ended either up in locking hell or with
race conditions of sorts, but my scheduler foo is rusty.

>> +static inline void sched_migration_ctrl(struct task_struct *prev, int cpu)
>> +{
>> +	if (!prev->migration_ctrl.disable_cnt ||
>> +	    prev->cpus_ptr != &prev->cpus_mask)
>> +		return;
>> +
>> +	prev->cpus_ptr = cpumask_of(cpu);
>> +	update_nr_migratory(prev, -1);
>> +	prev->nr_cpus_allowed = 1;
>> +}
>
> So this thing is called from schedule(), with only rq->lock held, and
> that violates the locking rules for changing the affinity.
>
> I have a comment that explains how it's broken and why it's sort-of
> working.

Yeah :(

>> +void migrate_disable(void)
>> +{
>> +	unsigned long flags;
>> +
>> +	if (!current->migration_ctrl.disable_cnt) {
>> +		raw_spin_lock_irqsave(&current->pi_lock, flags);
>> +		current->migration_ctrl.disable_cnt++;
>> +		raw_spin_unlock_irqrestore(&current->pi_lock, flags);
>> +	} else {
>> +		current->migration_ctrl.disable_cnt++;
>> +	}
>> +}
>
> That pi_lock seems unfortunate, and it isn't obvious what the point of
> it is.

Indeed. That obviously lacks a big fat comment.

current->migration_ctrl.disable_cnt++ is obviously a RMW operation. So
you end up with the following:

CPU0                                            CPU1
migrate_disable()
   R = current->migration_ctrl.disable_cnt;
                                                set_cpus_allowed_ptr()
                                                  task_rq_lock();
                                                  if
                                                  (!p->migration_ctrl.disable_cnt) {
   current->migration_ctrl.disable_cnt = R + 1;
   							stop_one_cpu();
---> stopper_thread()
        BUG_ON(task->migration_ctrl.disable_cnt);

I tried to back out from that instead of BUG(), but that ended up being
even more of a hacky trainwreck than just biting the bullet and taking
pi_lock.

>
> So, what I'm missing with all this are the design contraints for this
> trainwreck. Because the 'sane' solution was having migrate_disable()
> imply cpus_read_lock(). But that didn't fly because we can't have
> migrate_disable() / migrate_enable() schedule for raisins.

Yeah. The original code had some magic

      if (preemptible())
      	    cpus_read_lock();
      else
            p->atomic_migrate_disable++;

but that caused another set of horrors with asymetric code like the
below and stuff like try_lock().

> And if I'm not mistaken, the above migrate_enable() *does* require being
> able to schedule, and our favourite piece of futex:
>
> 	raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
> 	spin_unlock(q.lock_ptr);
>
> is broken. Consider that spin_unlock() doing migrate_enable() with a
> pending sched_setaffinity().

Yes, we have the extra migrate_disable()/enable() pair around that.

The other way I solved that was to have a spin_[un]lock() variant which
does not have a migrate disable/enable. That works in that code because
there is no per CPUness requirement. Not pretty either...

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-17 16:30             ` Sebastian Siewior
@ 2020-09-18  8:22               ` peterz
  2020-09-18  8:48                 ` Sebastian Siewior
  0 siblings, 1 reply; 21+ messages in thread
From: peterz @ 2020-09-18  8:22 UTC (permalink / raw)
  To: Sebastian Siewior
  Cc: Thomas Gleixner, LKML, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On Thu, Sep 17, 2020 at 06:30:01PM +0200, Sebastian Siewior wrote:
> On 2020-09-17 17:54:10 [+0200], peterz@infradead.org wrote:
> > I'm not sure what the problem with FPU was, I was throwing alternatives
> > at tglx to see what would stick, in part to (re)discover the design
> > constraints of this thing.
> 
> was this recent or distant in the time line?

The past few weeks :-) Thomas and me have been bickering about this
stuff on IRC on and off.

> > One reason for not allowing migrate_disable() to sleep was: FPU code.
> > 
> > Could it be it does something like:
> > 
> > 	preempt_disable();
> > 	spin_lock();
> > 
> > 	spin_unlock();
> > 	preempt_enable();
> > 
> > Where we'll never get preempted while migrate_disable()'d and thus never
> > trigger any of the sleep paths?
> 
> I try to get rid of something like that. This doesn't work either way
> because the spin_lock() may block which it can't with disabled
> preemption.

Yeah, that obviously should have been migrate_disable/enable instead of
spin_lock/unlock :/

> Ah. We used to have migrate_disable() in pagefault_disable(). The x86
> FPU code does
> 	preempt_disable();
> 	…
> 	pagefault_disable();
> 
> but that migrate_disable() was moved from pagefault_disable() to
> kmap_atomic(). We shouldn't have
> 	preempt_disable(); || local_irq_disable();
> 	kmap_atomic();
> 
> on RT. I've been running around removing those. See
>    a10dcebacdb0c ("fs/ntfs/aops.c: don't disable interrupts during kmap_atomic()")
>    ce1e518190ea7 ("ide: don't disable interrupts during kmap_atomic()")
>    f3a1075e5fc34 ("block: don't disable interrupts during kmap_atomic()")

Hmm, okay.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-18  7:00     ` Thomas Gleixner
@ 2020-09-18  8:28       ` peterz
  0 siblings, 0 replies; 21+ messages in thread
From: peterz @ 2020-09-18  8:28 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Sebastian Siewior, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On Fri, Sep 18, 2020 at 09:00:03AM +0200, Thomas Gleixner wrote:

> >> +void migrate_disable(void)
> >> +{
> >> +	unsigned long flags;
> >> +
> >> +	if (!current->migration_ctrl.disable_cnt) {
> >> +		raw_spin_lock_irqsave(&current->pi_lock, flags);
> >> +		current->migration_ctrl.disable_cnt++;
> >> +		raw_spin_unlock_irqrestore(&current->pi_lock, flags);
> >> +	} else {
> >> +		current->migration_ctrl.disable_cnt++;
> >> +	}
> >> +}
> >
> > That pi_lock seems unfortunate, and it isn't obvious what the point of
> > it is.
> 
> Indeed. That obviously lacks a big fat comment.
> 
> current->migration_ctrl.disable_cnt++ is obviously a RMW operation. So
> you end up with the following:
> 
> CPU0                                            CPU1
> migrate_disable()
>    R = current->migration_ctrl.disable_cnt;
>                                                 set_cpus_allowed_ptr()
>                                                   task_rq_lock();
>                                                   if
>                                                   (!p->migration_ctrl.disable_cnt) {
>    current->migration_ctrl.disable_cnt = R + 1;
>    							stop_one_cpu();
> ---> stopper_thread()
>         BUG_ON(task->migration_ctrl.disable_cnt);
> 
> I tried to back out from that instead of BUG(), but that ended up being
> even more of a hacky trainwreck than just biting the bullet and taking
> pi_lock.

You don't need the load-store for that I think, pure timing will do.
Blergh, lemme prepare more wake-up juice and think about that.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [patch 09/10] sched/core: Add migrate_disable/enable()
  2020-09-18  8:22               ` peterz
@ 2020-09-18  8:48                 ` Sebastian Siewior
  0 siblings, 0 replies; 21+ messages in thread
From: Sebastian Siewior @ 2020-09-18  8:48 UTC (permalink / raw)
  To: peterz
  Cc: Thomas Gleixner, LKML, Qais Yousef, Scott Wood,
	Valentin Schneider, Ingo Molnar, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Vincent Donnefort

On 2020-09-18 10:22:32 [+0200], peterz@infradead.org wrote:
> > > One reason for not allowing migrate_disable() to sleep was: FPU code.
> > > 
> > > Could it be it does something like:
> > > 
> > > 	preempt_disable();
> > > 	spin_lock();
> > > 
> > > 	spin_unlock();
> > > 	preempt_enable();
> > > 
> > > Where we'll never get preempted while migrate_disable()'d and thus never
> > > trigger any of the sleep paths?
> > 
> > I try to get rid of something like that. This doesn't work either way
> > because the spin_lock() may block which it can't with disabled
> > preemption.
> 
> Yeah, that obviously should have been migrate_disable/enable instead of
> spin_lock/unlock :/

Ah. Me stupid. fpregs_lock() does

	preempt_disable();
	local_bh_disable();

which is more or less the "official" pattern. As of today
local_bh_disable() does migrate_disable() / spin_lock(). Not sure what
we end up with for local_bh_disable() in the end.
We used not have a BLK here on RT but ended up in all kind of locking
problems because vanilla treats local_bh_disable() as a BLK and uses it
for locking.
Today we have a per-CPU spinlock_t in local_bh_disable() to emulate the
BKL. But this pattern above isn't working due to the atomic part…

Sebastian

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2020-09-18  8:48 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-17  9:42 [patch 00/10] sched: Migrate disable support for RT Thomas Gleixner
2020-09-17  9:42 ` [patch 01/10] sched: Fix balance_callback() Thomas Gleixner
2020-09-17  9:42 ` [patch 02/10] sched/hotplug: Ensure only per-cpu kthreads run during hotplug Thomas Gleixner
2020-09-17  9:42 ` [patch 03/10] sched/core: Wait for tasks being pushed away on hotplug Thomas Gleixner
2020-09-17  9:42 ` [patch 04/10] sched/hotplug: Consolidate task migration on CPU unplug Thomas Gleixner
2020-09-17  9:42 ` [patch 05/10] sched/core: Split __set_cpus_allowed_ptr() Thomas Gleixner
2020-09-17  9:42 ` [patch 06/10] sched: Add task components for migration control Thomas Gleixner
2020-09-17  9:42 ` [patch 07/10] sched/core: Add mechanism to wait for affinity setting to complete Thomas Gleixner
2020-09-17  9:42 ` [patch 08/10] sched: Add update_migratory() callback to scheduler classes Thomas Gleixner
2020-09-17  9:42 ` [patch 09/10] sched/core: Add migrate_disable/enable() Thomas Gleixner
2020-09-17 14:24   ` peterz
2020-09-17 14:38     ` Sebastian Siewior
2020-09-17 14:49       ` peterz
2020-09-17 15:13         ` Sebastian Siewior
2020-09-17 15:54           ` peterz
2020-09-17 16:30             ` Sebastian Siewior
2020-09-18  8:22               ` peterz
2020-09-18  8:48                 ` Sebastian Siewior
2020-09-18  7:00     ` Thomas Gleixner
2020-09-18  8:28       ` peterz
2020-09-17  9:42 ` [patch 10/10] sched/core: Make migrate disable and CPU hotplug cooperative Thomas Gleixner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).