* [PATCH 0/2] sched: migrate_disable() preparations
@ 2020-09-11 8:17 Peter Zijlstra
2020-09-11 8:17 ` [PATCH 1/2] sched: Fix balance_callback() Peter Zijlstra
2020-09-11 8:17 ` [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug Peter Zijlstra
0 siblings, 2 replies; 13+ messages in thread
From: Peter Zijlstra @ 2020-09-11 8:17 UTC (permalink / raw)
To: mingo, vincent.guittot, tglx
Cc: linux-kernel, dietmar.eggemann, rostedt, bristot, swood,
valentin.schneider, peterz
Hi!
These two patches are the result of Thomas pestering me with migrate_disable()
patches for upstream. The first one is a cleanup/fix for the existing
balance_callback machinery. The second (ab)uses the context_switch() tail
invocation of balance_callbacks() to push away 'undesirables' during CPU
hotplug after we've marked the CPU as !active.
With this in place, Thomas can do his horrible migrate_disable() thing ;-)
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 1/2] sched: Fix balance_callback()
2020-09-11 8:17 [PATCH 0/2] sched: migrate_disable() preparations Peter Zijlstra
@ 2020-09-11 8:17 ` Peter Zijlstra
2020-09-11 12:17 ` Valentin Schneider
2020-09-11 8:17 ` [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug Peter Zijlstra
1 sibling, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2020-09-11 8:17 UTC (permalink / raw)
To: mingo, vincent.guittot, tglx
Cc: linux-kernel, dietmar.eggemann, rostedt, bristot, swood,
valentin.schneider, peterz
The intent of balance_callback() has always been to delay executing
balancing operations until the end of the current rq->lock section.
This is because balance operations must often drop rq->lock, and that
isn't safe in general.
However, as noted by Scott, there were a few holes in that scheme;
balance_callback() was called after rq->lock was dropped, which means
another CPU can interleave and touch the callback list.
Rework code to call the balance callbacks before dropping rq->lock
where possible, and otherwise splice the balance list onto a local
stack.
This guarantees that the balance list must be empty when we take
rq->lock. IOW, we'll only ever run our own balance callbacks.
Reported-by: Scott Wood <swood@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
kernel/sched/core.c | 119 ++++++++++++++++++++++++++++++++-------------------
kernel/sched/sched.h | 2
2 files changed, 77 insertions(+), 44 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3489,6 +3489,69 @@ static inline void finish_task(struct ta
#endif
}
+#ifdef CONFIG_SMP
+
+static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
+{
+ void (*func)(struct rq *rq);
+ struct callback_head *next;
+
+ lockdep_assert_held(&rq->lock);
+
+ while (head) {
+ func = (void (*)(struct rq *))head->func;
+ next = head->next;
+ head->next = NULL;
+ head = next;
+
+ func(rq);
+ }
+}
+
+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+{
+ struct callback_head *head = rq->balance_callback;
+
+ lockdep_assert_held(&rq->lock);
+ if (head)
+ rq->balance_callback = NULL;
+
+ return head;
+}
+
+static void __balance_callbacks(struct rq *rq)
+{
+ do_balance_callbacks(rq, splice_balance_callbacks(rq));
+}
+
+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
+{
+ unsigned long flags;
+
+ if (unlikely(head)) {
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ do_balance_callbacks(rq, head);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ }
+}
+
+#else
+
+static inline void __balance_callbacks(struct rq *rq)
+{
+}
+
+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+{
+ return NULL;
+}
+
+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
+{
+}
+
+#endif
+
static inline void
prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
{
@@ -3514,6 +3577,7 @@ static inline void finish_lock_switch(st
* prev into current:
*/
spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+ __balance_callbacks(rq);
raw_spin_unlock_irq(&rq->lock);
}
@@ -3655,43 +3719,6 @@ static struct rq *finish_task_switch(str
return rq;
}
-#ifdef CONFIG_SMP
-
-/* rq->lock is NOT held, but preemption is disabled */
-static void __balance_callback(struct rq *rq)
-{
- struct callback_head *head, *next;
- void (*func)(struct rq *rq);
- unsigned long flags;
-
- raw_spin_lock_irqsave(&rq->lock, flags);
- head = rq->balance_callback;
- rq->balance_callback = NULL;
- while (head) {
- func = (void (*)(struct rq *))head->func;
- next = head->next;
- head->next = NULL;
- head = next;
-
- func(rq);
- }
- raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-
-static inline void balance_callback(struct rq *rq)
-{
- if (unlikely(rq->balance_callback))
- __balance_callback(rq);
-}
-
-#else
-
-static inline void balance_callback(struct rq *rq)
-{
-}
-
-#endif
-
/**
* schedule_tail - first thing a freshly forked thread must call.
* @prev: the thread we just switched away from.
@@ -3711,7 +3738,6 @@ asmlinkage __visible void schedule_tail(
*/
rq = finish_task_switch(prev);
- balance_callback(rq);
preempt_enable();
if (current->set_child_tid)
@@ -4527,10 +4553,11 @@ static void __sched notrace __schedule(b
rq = context_switch(rq, prev, next, &rf);
} else {
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
- rq_unlock_irq(rq, &rf);
- }
- balance_callback(rq);
+ rq_unpin_lock(rq, &rf);
+ __balance_callbacks(rq);
+ raw_spin_unlock_irq(&rq->lock);
+ }
}
void __noreturn do_task_dead(void)
@@ -4941,9 +4968,11 @@ void rt_mutex_setprio(struct task_struct
out_unlock:
/* Avoid rq from going away on us: */
preempt_disable();
- __task_rq_unlock(rq, &rf);
- balance_callback(rq);
+ rq_unpin_lock(rq, &rf);
+ __balance_callbacks(rq);
+ raw_spin_unlock(&rq->lock);
+
preempt_enable();
}
#else
@@ -5217,6 +5246,7 @@ static int __sched_setscheduler(struct t
int retval, oldprio, oldpolicy = -1, queued, running;
int new_effective_prio, policy = attr->sched_policy;
const struct sched_class *prev_class;
+ struct callback_head *head;
struct rq_flags rf;
int reset_on_fork;
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
@@ -5455,6 +5485,7 @@ static int __sched_setscheduler(struct t
/* Avoid rq from going away on us: */
preempt_disable();
+ head = splice_balance_callbacks(rq);
task_rq_unlock(rq, p, &rf);
if (pi) {
@@ -5463,7 +5494,7 @@ static int __sched_setscheduler(struct t
}
/* Run balance callbacks after we've adjusted the PI chain: */
- balance_callback(rq);
+ balance_callbacks(rq, head);
preempt_enable();
return 0;
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1220,6 +1220,8 @@ static inline void rq_pin_lock(struct rq
#ifdef CONFIG_SCHED_DEBUG
rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
rf->clock_update_flags = 0;
+
+ SCHED_WARN_ON(rq->balance_callback);
#endif
}
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug
2020-09-11 8:17 [PATCH 0/2] sched: migrate_disable() preparations Peter Zijlstra
2020-09-11 8:17 ` [PATCH 1/2] sched: Fix balance_callback() Peter Zijlstra
@ 2020-09-11 8:17 ` Peter Zijlstra
2020-09-11 12:17 ` Valentin Schneider
2020-09-16 10:18 ` Sebastian Andrzej Siewior
1 sibling, 2 replies; 13+ messages in thread
From: Peter Zijlstra @ 2020-09-11 8:17 UTC (permalink / raw)
To: mingo, vincent.guittot, tglx
Cc: linux-kernel, dietmar.eggemann, rostedt, bristot, swood,
valentin.schneider, peterz
In preparation for migrate_disable(), make sure only per-cpu kthreads
are allowed to run on !active CPUs.
This is ran (as one of the very first steps) from the cpu-hotplug
task which is a per-cpu kthread and completion of the hotplug
operation only requires such tasks.
This contraint enables the migrate_disable() implementation to wait
for completion of all migrate_disable regions on this CPU at hotplug
time without fear of any new ones starting.
This replaces the unlikely(rq->balance_callbacks) test at the tail of
context_switch with an unlikely(rq->balance_work), the fast path is
not affected.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
kernel/sched/core.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++-
kernel/sched/sched.h | 5 ++
2 files changed, 106 insertions(+), 2 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3513,8 +3513,10 @@ static inline struct callback_head *spli
struct callback_head *head = rq->balance_callback;
lockdep_assert_held(&rq->lock);
- if (head)
+ if (head) {
rq->balance_callback = NULL;
+ rq->balance_flags &= ~BALANCE_WORK;
+ }
return head;
}
@@ -3569,6 +3571,8 @@ prepare_lock_switch(struct rq *rq, struc
#endif
}
+static bool balance_push(struct rq *rq);
+
static inline void finish_lock_switch(struct rq *rq)
{
/*
@@ -3577,7 +3581,16 @@ static inline void finish_lock_switch(st
* prev into current:
*/
spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
- __balance_callbacks(rq);
+ if (unlikely(rq->balance_flags)) {
+ /*
+ * Run the balance_callbacks, except on hotplug
+ * when we need to push the current task away.
+ */
+ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
+ !(rq->balance_flags & BALANCE_PUSH) ||
+ !balance_push(rq))
+ __balance_callbacks(rq);
+ }
raw_spin_unlock_irq(&rq->lock);
}
@@ -6836,6 +6849,87 @@ static void migrate_tasks(struct rq *dea
rq->stop = stop;
}
+
+static int __balance_push_stop(void *arg)
+{
+ struct task_struct *p = arg;
+ struct rq *rq = this_rq();
+ struct rq_flags rf;
+ int cpu;
+
+ raw_spin_lock_irq(&p->pi_lock);
+ rq_lock(rq, &rf);
+
+ if (task_rq(p) == rq && task_on_rq_queued(p)) {
+ cpu = select_fallback_rq(rq->cpu, p);
+ rq = __migrate_task(rq, &rf, p, cpu);
+ }
+
+ rq_unlock(rq, &rf);
+ raw_spin_unlock_irq(&p->pi_lock);
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
+
+/*
+ * Ensure we only run per-cpu kthreads once the CPU goes !active.
+ */
+static bool balance_push(struct rq *rq)
+{
+ struct task_struct *push_task = rq->curr;
+
+ lockdep_assert_held(&rq->lock);
+ SCHED_WARN_ON(rq->cpu != smp_processor_id());
+
+ /*
+ * Both the cpu-hotplug and stop task are in this class and are
+ * required to complete the hotplug process.
+ */
+ if (is_per_cpu_kthread(push_task))
+ return false;
+
+ get_task_struct(push_task);
+ /*
+ * Temporarily drop rq->lock such that we can wake-up the stop task.
+ * Both preemption and IRQs are still disabled.
+ */
+ raw_spin_unlock(&rq->lock);
+ stop_one_cpu_nowait(rq->cpu, __balance_push_stop, push_task,
+ this_cpu_ptr(&push_work));
+ /*
+ * At this point need_resched() is true and we'll take the loop in
+ * schedule(). The next pick is obviously going to be the stop task
+ * which is_per_cpu_kthread() and will push this task away.
+ */
+ raw_spin_lock(&rq->lock);
+
+ return true;
+}
+
+static void balance_push_set(int cpu, bool on)
+{
+ struct rq *rq = cpu_rq(cpu);
+ struct rq_flags rf;
+
+ rq_lock_irqsave(rq, &rf);
+ if (on)
+ rq->balance_flags |= BALANCE_PUSH;
+ else
+ rq->balance_flags &= ~BALANCE_PUSH;
+ rq_unlock_irqrestore(rq, &rf);
+}
+
+#else
+
+static inline bool balance_push(struct rq *rq)
+{
+ return false;
+}
+
#endif /* CONFIG_HOTPLUG_CPU */
void set_rq_online(struct rq *rq)
@@ -6921,6 +7015,8 @@ int sched_cpu_activate(unsigned int cpu)
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
+ balance_push_set(cpu, false);
+
#ifdef CONFIG_SCHED_SMT
/*
* When going up, increment the number of cores with SMT present.
@@ -6968,6 +7064,8 @@ int sched_cpu_deactivate(unsigned int cp
*/
synchronize_rcu();
+ balance_push_set(cpu, true);
+
#ifdef CONFIG_SCHED_SMT
/*
* When going down, decrement the number of cores with SMT present.
@@ -6981,6 +7079,7 @@ int sched_cpu_deactivate(unsigned int cp
ret = cpuset_cpu_inactive(cpu);
if (ret) {
+ balance_push_set(cpu, false);
set_cpu_active(cpu, true);
return ret;
}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -973,6 +973,7 @@ struct rq {
unsigned long cpu_capacity_orig;
struct callback_head *balance_callback;
+ unsigned char balance_flags;
unsigned char nohz_idle_balance;
unsigned char idle_balance;
@@ -1384,6 +1385,9 @@ init_numa_balancing(unsigned long clone_
#ifdef CONFIG_SMP
+#define BALANCE_WORK 0x01
+#define BALANCE_PUSH 0x02
+
static inline void
queue_balance_callback(struct rq *rq,
struct callback_head *head,
@@ -1397,6 +1401,7 @@ queue_balance_callback(struct rq *rq,
head->func = (void (*)(struct callback_head *))func;
head->next = rq->balance_callback;
rq->balance_callback = head;
+ rq->balance_flags |= BALANCE_WORK;
}
#define rcu_dereference_check_sched_domain(p) \
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] sched: Fix balance_callback()
2020-09-11 8:17 ` [PATCH 1/2] sched: Fix balance_callback() Peter Zijlstra
@ 2020-09-11 12:17 ` Valentin Schneider
2020-09-11 12:25 ` peterz
0 siblings, 1 reply; 13+ messages in thread
From: Valentin Schneider @ 2020-09-11 12:17 UTC (permalink / raw)
To: Peter Zijlstra
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood
On 11/09/20 09:17, Peter Zijlstra wrote:
> The intent of balance_callback() has always been to delay executing
> balancing operations until the end of the current rq->lock section.
> This is because balance operations must often drop rq->lock, and that
> isn't safe in general.
>
> However, as noted by Scott, there were a few holes in that scheme;
> balance_callback() was called after rq->lock was dropped, which means
> another CPU can interleave and touch the callback list.
>
So that can be say __schedule() tail racing with some setprio; what's the
worst that can (currently) happen here? Something like say two consecutive
enqueuing of push_rt_tasks() to the callback list?
> Rework code to call the balance callbacks before dropping rq->lock
> where possible, and otherwise splice the balance list onto a local
> stack.
>
> This guarantees that the balance list must be empty when we take
> rq->lock. IOW, we'll only ever run our own balance callbacks.
>
Makes sense to me.
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
> Reported-by: Scott Wood <swood@redhat.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[...]
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1220,6 +1220,8 @@ static inline void rq_pin_lock(struct rq
> #ifdef CONFIG_SCHED_DEBUG
> rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> rf->clock_update_flags = 0;
> +
> + SCHED_WARN_ON(rq->balance_callback);
Clever!
> #endif
> }
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug
2020-09-11 8:17 ` [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug Peter Zijlstra
@ 2020-09-11 12:17 ` Valentin Schneider
2020-09-11 12:29 ` peterz
2020-09-16 10:18 ` Sebastian Andrzej Siewior
1 sibling, 1 reply; 13+ messages in thread
From: Valentin Schneider @ 2020-09-11 12:17 UTC (permalink / raw)
To: Peter Zijlstra
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood, Vincent Donnefort
(+Cc VincentD, who's been looking at some of this)
On 11/09/20 09:17, Peter Zijlstra wrote:
> In preparation for migrate_disable(), make sure only per-cpu kthreads
> are allowed to run on !active CPUs.
>
> This is ran (as one of the very first steps) from the cpu-hotplug
> task which is a per-cpu kthread and completion of the hotplug
> operation only requires such tasks.
>
> This contraint enables the migrate_disable() implementation to wait
s/contraint/constraint/
> for completion of all migrate_disable regions on this CPU at hotplug
> time without fear of any new ones starting.
>
> This replaces the unlikely(rq->balance_callbacks) test at the tail of
> context_switch with an unlikely(rq->balance_work), the fast path is
> not affected.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> kernel/sched/core.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++-
> kernel/sched/sched.h | 5 ++
> 2 files changed, 106 insertions(+), 2 deletions(-)
>
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6836,6 +6849,87 @@ static void migrate_tasks(struct rq *dea
>
> rq->stop = stop;
> }
> +
> +static int __balance_push_stop(void *arg)
__balance_push_cpu_stop()? _cpu_stop() seems to be the usual suffix.
> +{
> + struct task_struct *p = arg;
> + struct rq *rq = this_rq();
> + struct rq_flags rf;
> + int cpu;
> +
> + raw_spin_lock_irq(&p->pi_lock);
> + rq_lock(rq, &rf);
> +
> + if (task_rq(p) == rq && task_on_rq_queued(p)) {
> + cpu = select_fallback_rq(rq->cpu, p);
> + rq = __migrate_task(rq, &rf, p, cpu);
> + }
> +
> + rq_unlock(rq, &rf);
> + raw_spin_unlock_irq(&p->pi_lock);
> +
> + put_task_struct(p);
> +
> + return 0;
> +}
> +
> +static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
> +
> +/*
> + * Ensure we only run per-cpu kthreads once the CPU goes !active.
> + */
> +static bool balance_push(struct rq *rq)
> +{
> + struct task_struct *push_task = rq->curr;
> +
> + lockdep_assert_held(&rq->lock);
> + SCHED_WARN_ON(rq->cpu != smp_processor_id());
> +
> + /*
> + * Both the cpu-hotplug and stop task are in this class and are
> + * required to complete the hotplug process.
Nit: s/class/case/? I can't not read "class" as "sched_class", and
those two are *not* in the same sched_class, and confusion ensues.
> + */
> + if (is_per_cpu_kthread(push_task))
> + return false;
> +
> + get_task_struct(push_task);
> + /*
> + * Temporarily drop rq->lock such that we can wake-up the stop task.
> + * Both preemption and IRQs are still disabled.
> + */
> + raw_spin_unlock(&rq->lock);
> + stop_one_cpu_nowait(rq->cpu, __balance_push_stop, push_task,
> + this_cpu_ptr(&push_work));
> + /*
> + * At this point need_resched() is true and we'll take the loop in
> + * schedule(). The next pick is obviously going to be the stop task
> + * which is_per_cpu_kthread() and will push this task away.
> + */
> + raw_spin_lock(&rq->lock);
> +
> + return true;
> +}
> +
> @@ -6968,6 +7064,8 @@ int sched_cpu_deactivate(unsigned int cp
> */
> synchronize_rcu();
>
> + balance_push_set(cpu, true);
> +
IIUC this is going to make every subsequent finish_lock_switch()
migrate the switched-to task if it isn't a pcpu kthread. So this is going
to lead to a dance of
switch_to(<task0>) -> switch_to(<stopper>) -> switch_to(<task1>) -> switch_to(<stopper>) ...
Wouldn't it be better to batch all those in a migrate_tasks() sibling that
skips pcpu kthreads?
> #ifdef CONFIG_SCHED_SMT
> /*
> * When going down, decrement the number of cores with SMT present.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] sched: Fix balance_callback()
2020-09-11 12:17 ` Valentin Schneider
@ 2020-09-11 12:25 ` peterz
2020-09-11 13:27 ` Valentin Schneider
0 siblings, 1 reply; 13+ messages in thread
From: peterz @ 2020-09-11 12:25 UTC (permalink / raw)
To: Valentin Schneider
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood
On Fri, Sep 11, 2020 at 01:17:02PM +0100, Valentin Schneider wrote:
> On 11/09/20 09:17, Peter Zijlstra wrote:
> > The intent of balance_callback() has always been to delay executing
> > balancing operations until the end of the current rq->lock section.
> > This is because balance operations must often drop rq->lock, and that
> > isn't safe in general.
> >
> > However, as noted by Scott, there were a few holes in that scheme;
> > balance_callback() was called after rq->lock was dropped, which means
> > another CPU can interleave and touch the callback list.
> >
>
> So that can be say __schedule() tail racing with some setprio; what's the
> worst that can (currently) happen here? Something like say two consecutive
> enqueuing of push_rt_tasks() to the callback list?
Yeah, but that isn't in fact the case I worry most about.
What can happen (and what I've spotted once before) is that someone
attempts to enqueue a balance_callback from a rq->lock region that
doesn't handle the calls.
Currently that 'works', that is, it will get ran _eventually_. But
ideally we'd want that to not work and issue a WARN. We want the
callbacks to be timely.
So basically all of these machinations we in order to add the WARN :-)
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug
2020-09-11 12:17 ` Valentin Schneider
@ 2020-09-11 12:29 ` peterz
2020-09-11 13:48 ` Valentin Schneider
0 siblings, 1 reply; 13+ messages in thread
From: peterz @ 2020-09-11 12:29 UTC (permalink / raw)
To: Valentin Schneider
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood, Vincent Donnefort
On Fri, Sep 11, 2020 at 01:17:45PM +0100, Valentin Schneider wrote:
> > @@ -6968,6 +7064,8 @@ int sched_cpu_deactivate(unsigned int cp
> > */
> > synchronize_rcu();
> >
> > + balance_push_set(cpu, true);
> > +
>
> IIUC this is going to make every subsequent finish_lock_switch()
> migrate the switched-to task if it isn't a pcpu kthread. So this is going
> to lead to a dance of
>
> switch_to(<task0>) -> switch_to(<stopper>) -> switch_to(<task1>) -> switch_to(<stopper>) ...
>
> Wouldn't it be better to batch all those in a migrate_tasks() sibling that
> skips pcpu kthreads?
That's 'difficult', this is hotplug, performance is not a consideration.
Basically we don't have an iterator for the runqueues, so finding these
tasks is hard.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] sched: Fix balance_callback()
2020-09-11 12:25 ` peterz
@ 2020-09-11 13:27 ` Valentin Schneider
0 siblings, 0 replies; 13+ messages in thread
From: Valentin Schneider @ 2020-09-11 13:27 UTC (permalink / raw)
To: peterz
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood
On 11/09/20 13:25, peterz@infradead.org wrote:
> On Fri, Sep 11, 2020 at 01:17:02PM +0100, Valentin Schneider wrote:
>> So that can be say __schedule() tail racing with some setprio; what's the
>> worst that can (currently) happen here? Something like say two consecutive
>> enqueuing of push_rt_tasks() to the callback list?
>
> Yeah, but that isn't in fact the case I worry most about.
>
> What can happen (and what I've spotted once before) is that someone
> attempts to enqueue a balance_callback from a rq->lock region that
> doesn't handle the calls.
>
> Currently that 'works', that is, it will get ran _eventually_. But
> ideally we'd want that to not work and issue a WARN. We want the
> callbacks to be timely.
>
> So basically all of these machinations we in order to add the WARN :-)
Makes sense, thanks!
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug
2020-09-11 12:29 ` peterz
@ 2020-09-11 13:48 ` Valentin Schneider
0 siblings, 0 replies; 13+ messages in thread
From: Valentin Schneider @ 2020-09-11 13:48 UTC (permalink / raw)
To: peterz
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood, Vincent Donnefort
On 11/09/20 13:29, peterz@infradead.org wrote:
> On Fri, Sep 11, 2020 at 01:17:45PM +0100, Valentin Schneider wrote:
>
>> > @@ -6968,6 +7064,8 @@ int sched_cpu_deactivate(unsigned int cp
>> > */
>> > synchronize_rcu();
>> >
>> > + balance_push_set(cpu, true);
>> > +
>>
>> IIUC this is going to make every subsequent finish_lock_switch()
>> migrate the switched-to task if it isn't a pcpu kthread. So this is going
>> to lead to a dance of
>>
>> switch_to(<task0>) -> switch_to(<stopper>) -> switch_to(<task1>) -> switch_to(<stopper>) ...
>>
>> Wouldn't it be better to batch all those in a migrate_tasks() sibling that
>> skips pcpu kthreads?
>
> That's 'difficult', this is hotplug, performance is not a consideration.
>
Aye aye.
The reason I'm trying to care about this is (don't wince too hard) for that
CPU pause thing [1]. Vincent's been the one doing all the actual work so I
should let him pitch in, but TL;DR if we could "relatively quickly" migrate
all !pcpu tasks after clearing the active bit, we could stop hotplug there
and have our "CPU pause" thing.
[1]: https://lwn.net/Articles/820825/
> Basically we don't have an iterator for the runqueues, so finding these
> tasks is hard.
Mmph right, we'd pretty much have to "enjoy" iterating & skipping pcpu
threads every __pick_migrate_task() call...
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug
2020-09-11 8:17 ` [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug Peter Zijlstra
2020-09-11 12:17 ` Valentin Schneider
@ 2020-09-16 10:18 ` Sebastian Andrzej Siewior
2020-09-16 12:10 ` peterz
1 sibling, 1 reply; 13+ messages in thread
From: Sebastian Andrzej Siewior @ 2020-09-16 10:18 UTC (permalink / raw)
To: Peter Zijlstra
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood, valentin.schneider
On 2020-09-11 10:17:47 [+0200], Peter Zijlstra wrote:
> In preparation for migrate_disable(), make sure only per-cpu kthreads
> are allowed to run on !active CPUs.
>
> This is ran (as one of the very first steps) from the cpu-hotplug
> task which is a per-cpu kthread and completion of the hotplug
> operation only requires such tasks.
>
> This contraint enables the migrate_disable() implementation to wait
> for completion of all migrate_disable regions on this CPU at hotplug
> time without fear of any new ones starting.
>
> This replaces the unlikely(rq->balance_callbacks) test at the tail of
> context_switch with an unlikely(rq->balance_work), the fast path is
> not affected.
With this on top of -rc5 I get:
[ 42.678670] process 1816 (hackbench) no longer affine to cpu2
[ 42.678684] process 1817 (hackbench) no longer affine to cpu2
[ 42.710502] ------------[ cut here ]------------
[ 42.711505] rq->clock_update_flags < RQCF_ACT_SKIP
[ 42.711514] WARNING: CPU: 2 PID: 19 at kernel/sched/sched.h:1141 update_curr+0xe3/0x3f0
[ 42.714005] Modules linked in:
[ 42.714582] CPU: 2 PID: 19 Comm: migration/2 Not tainted 5.9.0-rc5+ #107
[ 42.715836] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-1 04/01/2014
[ 42.717367] RIP: 0010:update_curr+0xe3/0x3f0
[ 42.718212] Code: 09 00 00 01 0f 87 6c ff ff ff 80 3d 52 bd 59 01 00 0f 85 5f ff ff ff 48 c7 c7 f8 7e 2b 82 c6 05 3e bd 59 01 01 e8 bc 9a fb ff <0f> 0b e9 45 ff ff ff 8b 05 d8 d4 59 01 48 8d 6b 80 8b 15 ba 5a 5b
[ 42.721839] RSP: 0018:ffffc900000e3d60 EFLAGS: 00010086
[ 42.722827] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[ 42.724166] RDX: ffff888275fa4540 RSI: ffffffff810f37e6 RDI: ffffffff82463940
[ 42.725547] RBP: ffff888276ca9600 R08: 0000000000000000 R09: 0000000000000026
[ 42.726925] R10: 0000000000000046 R11: 0000000000000046 R12: ffff888276ca9580
[ 42.728268] R13: ffff888276ca9580 R14: ffff888276ca9600 R15: ffff88826aa5c5c0
[ 42.729659] FS: 0000000000000000(0000) GS:ffff888276c80000(0000) knlGS:0000000000000000
[ 42.731186] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 42.732272] CR2: 00007f7952603f90 CR3: 000000026aa56000 CR4: 00000000003506e0
[ 42.733657] Call Trace:
[ 42.734136] dequeue_task_fair+0xfa/0x6f0
[ 42.734899] do_set_cpus_allowed+0xbb/0x1c0
[ 42.735692] cpuset_cpus_allowed_fallback+0x73/0x1a0
[ 42.736635] select_fallback_rq+0x129/0x160
[ 42.737450] __balance_push_stop+0x132/0x230
[ 42.738291] ? migration_cpu_stop+0x1f0/0x1f0
[ 42.739118] cpu_stopper_thread+0x76/0x100
[ 42.739897] ? smpboot_thread_fn+0x21/0x1f0
[ 42.740691] smpboot_thread_fn+0x106/0x1f0
[ 42.741487] ? __smpboot_create_thread.part.0+0x100/0x100
[ 42.742539] kthread+0x135/0x150
[ 42.743153] ? kthread_create_worker_on_cpu+0x60/0x60
[ 42.744106] ret_from_fork+0x22/0x30
[ 42.744792] irq event stamp: 100
[ 42.745433] hardirqs last enabled at (99): [<ffffffff81a8d46f>] _raw_spin_unlock_irq+0x1f/0x30
[ 42.747116] hardirqs last disabled at (100): [<ffffffff81a8d27e>] _raw_spin_lock_irq+0x3e/0x40
[ 42.748748] softirqs last enabled at (0): [<ffffffff81077b75>] copy_process+0x665/0x1b00
[ 42.750353] softirqs last disabled at (0): [<0000000000000000>] 0x0
[ 42.751552] ---[ end trace d98bb30ad2616d58 ]---
That comes due to DEQUEUE_NOCLOCK in do_set_cpus_allowed(). And then
there is this:
[ 42.752454] ======================================================
[ 42.752455] WARNING: possible circular locking dependency detected
[ 42.752455] 5.9.0-rc5+ #107 Not tainted
[ 42.752455] ------------------------------------------------------
[ 42.752456] migration/2/19 is trying to acquire lock:
[ 42.752456] ffffffff824639f8 ((console_sem).lock){-.-.}-{2:2}, at: down_trylock+0xa/0x30
[ 42.752458] but task is already holding lock:
[ 42.752458] ffff888276ca9598 (&rq->lock){-.-.}-{2:2}, at: __balance_push_stop+0x50/0x230
[ 42.752460] which lock already depends on the new lock.
[ 42.752460] the existing dependency chain (in reverse order) is:
[ 42.752461] -> #2 (&rq->lock){-.-.}-{2:2}:
[ 42.752462] _raw_spin_lock+0x27/0x40
[ 42.752462] task_fork_fair+0x30/0x1b0
[ 42.752462] sched_fork+0x10b/0x280
[ 42.752463] copy_process+0x702/0x1b00
[ 42.752464] _do_fork+0x5a/0x450
[ 42.752464] kernel_thread+0x50/0x70
[ 42.752465] rest_init+0x19/0x240
[ 42.752465] start_kernel+0x548/0x56a
[ 42.752466] secondary_startup_64+0xa4/0xb0
[ 42.752467] -> #1 (&p->pi_lock){-.-.}-{2:2}:
[ 42.752468] _raw_spin_lock_irqsave+0x36/0x50
[ 42.752469] try_to_wake_up+0x4e/0x720
[ 42.752469] up+0x3b/0x50
[ 42.752470] __up_console_sem+0x29/0x60
[ 42.752470] console_unlock+0x390/0x690
[ 42.752471] con_font_op+0x2ec/0x480
[ 42.752471] vt_ioctl+0x4d1/0x16e0
[ 42.752471] tty_ioctl+0x3e2/0x9a0
[ 42.752472] __x64_sys_ioctl+0x81/0x9a
[ 42.752472] do_syscall_64+0x33/0x40
[ 42.752472] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 42.752473] -> #0 ((console_sem).lock){-.-.}-{2:2}:
[ 42.752474] __lock_acquire+0x11af/0x2010
[ 42.752474] lock_acquire+0xb7/0x400
[ 42.752474] _raw_spin_lock_irqsave+0x36/0x50
[ 42.752474] down_trylock+0xa/0x30
[ 42.752475] __down_trylock_console_sem+0x23/0x90
[ 42.752475] vprintk_emit+0xf6/0x380
[ 42.752476] printk+0x53/0x6a
[ 42.752476] __warn_printk+0x42/0x84
[ 42.752476] update_curr+0xe3/0x3f0
[ 42.752477] dequeue_task_fair+0xfa/0x6f0
[ 42.752477] do_set_cpus_allowed+0xbb/0x1c0
[ 42.752478] cpuset_cpus_allowed_fallback+0x73/0x1a0
[ 42.752479] select_fallback_rq+0x129/0x160
[ 42.752479] __balance_push_stop+0x132/0x230
[ 42.752480] cpu_stopper_thread+0x76/0x100
[ 42.752480] smpboot_thread_fn+0x106/0x1f0
[ 42.752480] kthread+0x135/0x150
[ 42.752480] ret_from_fork+0x22/0x30
[ 42.752481] other info that might help us debug this:
[ 42.752482] Chain exists of:
[ 42.752482] (console_sem).lock --> &p->pi_lock --> &rq->lock
[ 42.752483] Possible unsafe locking scenario:
[ 42.752484] CPU0 CPU1
[ 42.752484] ---- ----
[ 42.752484] lock(&rq->lock);
[ 42.752485] lock(&p->pi_lock);
[ 42.752486] lock(&rq->lock);
[ 42.752486] lock((console_sem).lock);
[ 42.752487] *** DEADLOCK ***
[ 42.752488] 3 locks held by migration/2/19:
[ 42.752488] #0: ffff88826aa5d0f8 (&p->pi_lock){-.-.}-{2:2}, at: __balance_push_stop+0x48/0x230
[ 42.752489] #1: ffff888276ca9598 (&rq->lock){-.-.}-{2:2}, at: __balance_push_stop+0x50/0x230
[ 42.752490] #2: ffffffff82478e00 (rcu_read_lock){....}-{1:2}, at: cpuset_cpus_allowed_fallback+0x0/0x1a0
[ 42.752491] stack backtrace:
[ 42.752492] CPU: 2 PID: 19 Comm: migration/2 Not tainted 5.9.0-rc5+ #107
[ 42.752492] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-1 04/01/2014
[ 42.752492] Call Trace:
[ 42.752493] dump_stack+0x77/0xa0
[ 42.752493] check_noncircular+0x156/0x170
[ 42.752493] __lock_acquire+0x11af/0x2010
[ 42.752493] ? __lock_acquire+0x1692/0x2010
[ 42.752493] lock_acquire+0xb7/0x400
[ 42.752494] ? down_trylock+0xa/0x30
[ 42.752494] ? log_store.constprop.0+0x1a4/0x250
[ 42.752494] ? printk+0x53/0x6a
[ 42.752494] _raw_spin_lock_irqsave+0x36/0x50
[ 42.752495] ? down_trylock+0xa/0x30
[ 42.752495] down_trylock+0xa/0x30
[ 42.752495] ? printk+0x53/0x6a
[ 42.752495] __down_trylock_console_sem+0x23/0x90
[ 42.752495] vprintk_emit+0xf6/0x380
[ 42.752496] printk+0x53/0x6a
[ 42.752496] __warn_printk+0x42/0x84
[ 42.752496] update_curr+0xe3/0x3f0
[ 42.752496] dequeue_task_fair+0xfa/0x6f0
[ 42.752497] do_set_cpus_allowed+0xbb/0x1c0
[ 42.752497] cpuset_cpus_allowed_fallback+0x73/0x1a0
[ 42.752497] select_fallback_rq+0x129/0x160
[ 42.752497] __balance_push_stop+0x132/0x230
[ 42.752497] ? migration_cpu_stop+0x1f0/0x1f0
[ 42.752498] cpu_stopper_thread+0x76/0x100
[ 42.752498] ? smpboot_thread_fn+0x21/0x1f0
[ 42.752498] smpboot_thread_fn+0x106/0x1f0
[ 42.752498] ? __smpboot_create_thread.part.0+0x100/0x100
[ 42.752499] kthread+0x135/0x150
[ 42.752499] ? kthread_create_worker_on_cpu+0x60/0x60
[ 42.752499] ret_from_fork+0x22/0x30
[ 42.878095] numa_remove_cpu cpu 2 node 0: mask now 0-1,3-7
[ 42.879977] smpboot: CPU 2 is now offline
Sebastian
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug
2020-09-16 10:18 ` Sebastian Andrzej Siewior
@ 2020-09-16 12:10 ` peterz
2020-09-16 13:58 ` Sebastian Andrzej Siewior
0 siblings, 1 reply; 13+ messages in thread
From: peterz @ 2020-09-16 12:10 UTC (permalink / raw)
To: Sebastian Andrzej Siewior
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood, valentin.schneider
On Wed, Sep 16, 2020 at 12:18:45PM +0200, Sebastian Andrzej Siewior wrote:
> With this on top of -rc5 I get:
>
> [ 42.678670] process 1816 (hackbench) no longer affine to cpu2
> [ 42.678684] process 1817 (hackbench) no longer affine to cpu2
> [ 42.710502] ------------[ cut here ]------------
> [ 42.711505] rq->clock_update_flags < RQCF_ACT_SKIP
> [ 42.711514] WARNING: CPU: 2 PID: 19 at kernel/sched/sched.h:1141 update_curr+0xe3/0x3f0
> [ 42.736635] select_fallback_rq+0x129/0x160
> [ 42.737450] __balance_push_stop+0x132/0x230
Duh.. I wonder why I didn't see that.. anyway, I think the below version
ought to cure that.
> That comes due to DEQUEUE_NOCLOCK in do_set_cpus_allowed(). And then
> there is this:
>
> [ 42.752454] ======================================================
> [ 42.752455] WARNING: possible circular locking dependency detected
> [ 42.752455] 5.9.0-rc5+ #107 Not tainted
> [ 42.752455] ------------------------------------------------------
> [ 42.752456] migration/2/19 is trying to acquire lock:
> [ 42.752456] ffffffff824639f8 ((console_sem).lock){-.-.}-{2:2}, at: down_trylock+0xa/0x30
> [ 42.752458] but task is already holding lock:
> [ 42.752458] ffff888276ca9598 (&rq->lock){-.-.}-{2:2}, at: __balance_push_stop+0x50/0x230
> [ 42.752460] which lock already depends on the new lock.
Yeah, that's the known issue with printk()...
---
Subject: sched/hotplug: Ensure only per-cpu kthreads run during hotplug
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri Sep 11 09:54:27 CEST 2020
In preparation for migrate_disable(), make sure only per-cpu kthreads
are allowed to run on !active CPUs.
This is ran (as one of the very first steps) from the cpu-hotplug
task which is a per-cpu kthread and completion of the hotplug
operation only requires such tasks.
This constraint enables the migrate_disable() implementation to wait
for completion of all migrate_disable regions on this CPU at hotplug
time without fear of any new ones starting.
This replaces the unlikely(rq->balance_callbacks) test at the tail of
context_switch with an unlikely(rq->balance_work), the fast path is
not affected.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
kernel/sched/core.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++-
kernel/sched/sched.h | 5 ++
2 files changed, 117 insertions(+), 2 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3513,8 +3513,10 @@ static inline struct callback_head *spli
struct callback_head *head = rq->balance_callback;
lockdep_assert_held(&rq->lock);
- if (head)
+ if (head) {
rq->balance_callback = NULL;
+ rq->balance_flags &= ~BALANCE_WORK;
+ }
return head;
}
@@ -3535,6 +3537,22 @@ static inline void balance_callbacks(str
}
}
+static bool balance_push(struct rq *rq);
+
+static inline void balance_switch(struct rq *rq)
+{
+ if (unlikely(rq->balance_flags)) {
+ /*
+ * Run the balance_callbacks, except on hotplug
+ * when we need to push the current task away.
+ */
+ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
+ !(rq->balance_flags & BALANCE_PUSH) ||
+ !balance_push(rq))
+ __balance_callbacks(rq);
+ }
+}
+
#else
static inline void __balance_callbacks(struct rq *rq)
@@ -3550,6 +3568,10 @@ static inline void balance_callbacks(str
{
}
+static inline void balance_switch(struct rq *rq)
+{
+}
+
#endif
static inline void
@@ -3577,7 +3599,7 @@ static inline void finish_lock_switch(st
* prev into current:
*/
spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
- __balance_callbacks(rq);
+ balance_switch(rq);
raw_spin_unlock_irq(&rq->lock);
}
@@ -6836,6 +6858,89 @@ static void migrate_tasks(struct rq *dea
rq->stop = stop;
}
+
+static int __balance_push_cpu_stop(void *arg)
+{
+ struct task_struct *p = arg;
+ struct rq *rq = this_rq();
+ struct rq_flags rf;
+ int cpu;
+
+ raw_spin_lock_irq(&p->pi_lock);
+ rq_lock(rq, &rf);
+
+ update_rq_clock();
+
+ if (task_rq(p) == rq && task_on_rq_queued(p)) {
+ cpu = select_fallback_rq(rq->cpu, p);
+ rq = __migrate_task(rq, &rf, p, cpu);
+ }
+
+ rq_unlock(rq, &rf);
+ raw_spin_unlock_irq(&p->pi_lock);
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
+
+/*
+ * Ensure we only run per-cpu kthreads once the CPU goes !active.
+ */
+static bool balance_push(struct rq *rq)
+{
+ struct task_struct *push_task = rq->curr;
+
+ lockdep_assert_held(&rq->lock);
+ SCHED_WARN_ON(rq->cpu != smp_processor_id());
+
+ /*
+ * Both the cpu-hotplug and stop task are in this case and are
+ * required to complete the hotplug process.
+ */
+ if (is_per_cpu_kthread(push_task))
+ return false;
+
+ get_task_struct(push_task);
+ /*
+ * Temporarily drop rq->lock such that we can wake-up the stop task.
+ * Both preemption and IRQs are still disabled.
+ */
+ raw_spin_unlock(&rq->lock);
+ stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
+ this_cpu_ptr(&push_work));
+ /*
+ * At this point need_resched() is true and we'll take the loop in
+ * schedule(). The next pick is obviously going to be the stop task
+ * which is_per_cpu_kthread() and will push this task away.
+ */
+ raw_spin_lock(&rq->lock);
+
+ return true;
+}
+
+static void balance_push_set(int cpu, bool on)
+{
+ struct rq *rq = cpu_rq(cpu);
+ struct rq_flags rf;
+
+ rq_lock_irqsave(rq, &rf);
+ if (on)
+ rq->balance_flags |= BALANCE_PUSH;
+ else
+ rq->balance_flags &= ~BALANCE_PUSH;
+ rq_unlock_irqrestore(rq, &rf);
+}
+
+#else
+
+static inline bool balance_push(struct rq *rq)
+{
+ return false;
+}
+
#endif /* CONFIG_HOTPLUG_CPU */
void set_rq_online(struct rq *rq)
@@ -6921,6 +7026,8 @@ int sched_cpu_activate(unsigned int cpu)
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
+ balance_push_set(cpu, false);
+
#ifdef CONFIG_SCHED_SMT
/*
* When going up, increment the number of cores with SMT present.
@@ -6968,6 +7075,8 @@ int sched_cpu_deactivate(unsigned int cp
*/
synchronize_rcu();
+ balance_push_set(cpu, true);
+
#ifdef CONFIG_SCHED_SMT
/*
* When going down, decrement the number of cores with SMT present.
@@ -6981,6 +7090,7 @@ int sched_cpu_deactivate(unsigned int cp
ret = cpuset_cpu_inactive(cpu);
if (ret) {
+ balance_push_set(cpu, false);
set_cpu_active(cpu, true);
return ret;
}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -973,6 +973,7 @@ struct rq {
unsigned long cpu_capacity_orig;
struct callback_head *balance_callback;
+ unsigned char balance_flags;
unsigned char nohz_idle_balance;
unsigned char idle_balance;
@@ -1384,6 +1385,9 @@ init_numa_balancing(unsigned long clone_
#ifdef CONFIG_SMP
+#define BALANCE_WORK 0x01
+#define BALANCE_PUSH 0x02
+
static inline void
queue_balance_callback(struct rq *rq,
struct callback_head *head,
@@ -1397,6 +1401,7 @@ queue_balance_callback(struct rq *rq,
head->func = (void (*)(struct callback_head *))func;
head->next = rq->balance_callback;
rq->balance_callback = head;
+ rq->balance_flags |= BALANCE_WORK;
}
#define rcu_dereference_check_sched_domain(p) \
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug
2020-09-16 12:10 ` peterz
@ 2020-09-16 13:58 ` Sebastian Andrzej Siewior
2020-09-16 14:07 ` peterz
0 siblings, 1 reply; 13+ messages in thread
From: Sebastian Andrzej Siewior @ 2020-09-16 13:58 UTC (permalink / raw)
To: peterz
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood, valentin.schneider
On 2020-09-16 14:10:20 [+0200], peterz@infradead.org wrote:
squeeze that in please:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a4fe22b8b8418..bed3cd28af578 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6866,7 +6866,7 @@ static int __balance_push_cpu_stop(void *arg)
raw_spin_lock_irq(&p->pi_lock);
rq_lock(rq, &rf);
- update_rq_clock();
+ update_rq_clock(rq);
if (task_rq(p) == rq && task_on_rq_queued(p)) {
cpu = select_fallback_rq(rq->cpu, p);
and count me in :)
Sebastian
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug
2020-09-16 13:58 ` Sebastian Andrzej Siewior
@ 2020-09-16 14:07 ` peterz
0 siblings, 0 replies; 13+ messages in thread
From: peterz @ 2020-09-16 14:07 UTC (permalink / raw)
To: Sebastian Andrzej Siewior
Cc: mingo, vincent.guittot, tglx, linux-kernel, dietmar.eggemann,
rostedt, bristot, swood, valentin.schneider
On Wed, Sep 16, 2020 at 03:58:17PM +0200, Sebastian Andrzej Siewior wrote:
> On 2020-09-16 14:10:20 [+0200], peterz@infradead.org wrote:
>
> squeeze that in please:
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a4fe22b8b8418..bed3cd28af578 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6866,7 +6866,7 @@ static int __balance_push_cpu_stop(void *arg)
> raw_spin_lock_irq(&p->pi_lock);
> rq_lock(rq, &rf);
>
> - update_rq_clock();
> + update_rq_clock(rq);
>
> if (task_rq(p) == rq && task_on_rq_queued(p)) {
> cpu = select_fallback_rq(rq->cpu, p);
>
>
> and count me in :)
Duh... /me goes in search of the brown paper bag -- again!
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2020-09-16 20:47 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-11 8:17 [PATCH 0/2] sched: migrate_disable() preparations Peter Zijlstra
2020-09-11 8:17 ` [PATCH 1/2] sched: Fix balance_callback() Peter Zijlstra
2020-09-11 12:17 ` Valentin Schneider
2020-09-11 12:25 ` peterz
2020-09-11 13:27 ` Valentin Schneider
2020-09-11 8:17 ` [PATCH 2/2] sched/hotplug: Ensure only per-cpu kthreads run during hotplug Peter Zijlstra
2020-09-11 12:17 ` Valentin Schneider
2020-09-11 12:29 ` peterz
2020-09-11 13:48 ` Valentin Schneider
2020-09-16 10:18 ` Sebastian Andrzej Siewior
2020-09-16 12:10 ` peterz
2020-09-16 13:58 ` Sebastian Andrzej Siewior
2020-09-16 14:07 ` peterz
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).