linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock
@ 2015-07-21 19:22 Oleg Nesterov
  2015-07-21 19:22 ` [PATCH v2 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper Oleg Nesterov
                   ` (6 more replies)
  0 siblings, 7 replies; 15+ messages in thread
From: Oleg Nesterov @ 2015-07-21 19:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, Rik van Riel, Tejun Heo, linux-kernel

Hello,

Let me resend this. The only change in v2 is that I rediffed this
series against v4.2-rc3.

5/6 patch fixes the bug, I think. Say, stop_one_cpu(X) can race with
_cpu_down(X)->stop_machine() so that the kernel will crash if this
CPU X becomes online again. The window after cpu_stopper_thread()
returns and before smpboot_thread() calls ->park() is tiny, but still
this is possible afaics. But see the changelog in 6/6, I think we
should turn this cpu_stop_signal_done() into BUG() later.

6/6 removes lglock from kernel/stop_machine.c.

Peter, Rik, what do you think ?

Oleg.

 include/linux/lglock.h       |    5 --
 include/linux/stop_machine.h |   28 ++------
 kernel/cpu.c                 |    2 +-
 kernel/locking/lglock.c      |   22 ------
 kernel/stop_machine.c        |  162 ++++++++++++++++++++++++------------------
 5 files changed, 98 insertions(+), 121 deletions(-)


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper
  2015-07-21 19:22 [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Oleg Nesterov
@ 2015-07-21 19:22 ` Oleg Nesterov
  2015-07-21 19:22 ` [PATCH v2 2/6] stop_machine: don't do for_each_cpu() twice in queue_stop_cpus_work() Oleg Nesterov
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2015-07-21 19:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, Rik van Riel, Tejun Heo, linux-kernel

Multpiple DEFINE_PER_CPU's do not make sense, move all the per-cpu
variables into struct cpu_stopper.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/stop_machine.c |   17 +++++++++--------
 1 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index fd643d8..6e677b0 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -35,13 +35,16 @@ struct cpu_stop_done {
 
 /* the actual stopper, one per every possible cpu, enabled on online cpus */
 struct cpu_stopper {
+	struct task_struct	*thread;
+
 	spinlock_t		lock;
 	bool			enabled;	/* is this stopper enabled? */
 	struct list_head	works;		/* list of pending works */
+
+	struct cpu_stop_work	stop_work;	/* for stop_cpus */
 };
 
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
-static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
 static bool stop_machine_initialized = false;
 
 /*
@@ -74,7 +77,6 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
 static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 {
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-	struct task_struct *p = per_cpu(cpu_stopper_task, cpu);
 
 	unsigned long flags;
 
@@ -82,7 +84,7 @@ static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 
 	if (stopper->enabled) {
 		list_add_tail(&work->list, &stopper->works);
-		wake_up_process(p);
+		wake_up_process(stopper->thread);
 	} else
 		cpu_stop_signal_done(work->done, false);
 
@@ -293,7 +295,6 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 
 /* static data for stop_cpus */
 static DEFINE_MUTEX(stop_cpus_mutex);
-static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 
 static void queue_stop_cpus_work(const struct cpumask *cpumask,
 				 cpu_stop_fn_t fn, void *arg,
@@ -304,7 +305,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 
 	/* initialize works and done */
 	for_each_cpu(cpu, cpumask) {
-		work = &per_cpu(stop_cpus_work, cpu);
+		work = &per_cpu(cpu_stopper.stop_work, cpu);
 		work->fn = fn;
 		work->arg = arg;
 		work->done = done;
@@ -317,7 +318,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 	 */
 	lg_global_lock(&stop_cpus_lock);
 	for_each_cpu(cpu, cpumask)
-		cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
+		cpu_stop_queue_work(cpu, &per_cpu(cpu_stopper.stop_work, cpu));
 	lg_global_unlock(&stop_cpus_lock);
 }
 
@@ -458,7 +459,7 @@ extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 
 static void cpu_stop_create(unsigned int cpu)
 {
-	sched_set_stop_task(cpu, per_cpu(cpu_stopper_task, cpu));
+	sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
 }
 
 static void cpu_stop_park(unsigned int cpu)
@@ -485,7 +486,7 @@ static void cpu_stop_unpark(unsigned int cpu)
 }
 
 static struct smp_hotplug_thread cpu_stop_threads = {
-	.store			= &cpu_stopper_task,
+	.store			= &cpu_stopper.thread,
 	.thread_should_run	= cpu_stop_should_run,
 	.thread_fn		= cpu_stopper_thread,
 	.thread_comm		= "migration/%u",
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 2/6] stop_machine: don't do for_each_cpu() twice in queue_stop_cpus_work()
  2015-07-21 19:22 [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Oleg Nesterov
  2015-07-21 19:22 ` [PATCH v2 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper Oleg Nesterov
@ 2015-07-21 19:22 ` Oleg Nesterov
  2015-07-21 19:22 ` [PATCH v2 3/6] stop_machine: unexport __stop_machine() Oleg Nesterov
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2015-07-21 19:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, Rik van Riel, Tejun Heo, linux-kernel

queue_stop_cpus_work() can do everything in one for_each_cpu() loop.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/stop_machine.c |   17 +++++++----------
 1 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 6e677b0..6212208 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -303,22 +303,19 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 	struct cpu_stop_work *work;
 	unsigned int cpu;
 
-	/* initialize works and done */
-	for_each_cpu(cpu, cpumask) {
-		work = &per_cpu(cpu_stopper.stop_work, cpu);
-		work->fn = fn;
-		work->arg = arg;
-		work->done = done;
-	}
-
 	/*
 	 * Disable preemption while queueing to avoid getting
 	 * preempted by a stopper which might wait for other stoppers
 	 * to enter @fn which can lead to deadlock.
 	 */
 	lg_global_lock(&stop_cpus_lock);
-	for_each_cpu(cpu, cpumask)
-		cpu_stop_queue_work(cpu, &per_cpu(cpu_stopper.stop_work, cpu));
+	for_each_cpu(cpu, cpumask) {
+		work = &per_cpu(cpu_stopper.stop_work, cpu);
+		work->fn = fn;
+		work->arg = arg;
+		work->done = done;
+		cpu_stop_queue_work(cpu, work);
+	}
 	lg_global_unlock(&stop_cpus_lock);
 }
 
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 3/6] stop_machine: unexport __stop_machine()
  2015-07-21 19:22 [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Oleg Nesterov
  2015-07-21 19:22 ` [PATCH v2 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper Oleg Nesterov
  2015-07-21 19:22 ` [PATCH v2 2/6] stop_machine: don't do for_each_cpu() twice in queue_stop_cpus_work() Oleg Nesterov
@ 2015-07-21 19:22 ` Oleg Nesterov
  2015-07-21 19:22 ` [PATCH v2 4/6] stop_machine: use cpu_stop_fn_t where possible Oleg Nesterov
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2015-07-21 19:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, Rik van Riel, Tejun Heo, linux-kernel

The only caller outside of stop_machine.c is _cpu_down(), it can use
stop_machine(). get_online_cpus() is fine under cpu_hotplug_begin().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/stop_machine.h |   22 ++--------------------
 kernel/cpu.c                 |    2 +-
 kernel/stop_machine.c        |    2 +-
 3 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index d2abbdb..0fca276 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -114,23 +114,11 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
  * grabbing every spinlock in the kernel. */
 int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 
-/**
- * __stop_machine: freeze the machine on all CPUs and run this function
- * @fn: the function to run
- * @data: the data ptr for the @fn
- * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
- *
- * Description: This is a special version of the above, which assumes cpus
- * won't come or go while it's being called.  Used by hotplug cpu.
- */
-int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
-
 int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
 				   const struct cpumask *cpus);
-
 #else	 /* CONFIG_STOP_MACHINE && CONFIG_SMP */
 
-static inline int __stop_machine(int (*fn)(void *), void *data,
+static inline int stop_machine(int (*fn)(void *), void *data,
 				 const struct cpumask *cpus)
 {
 	unsigned long flags;
@@ -141,16 +129,10 @@ static inline int __stop_machine(int (*fn)(void *), void *data,
 	return ret;
 }
 
-static inline int stop_machine(int (*fn)(void *), void *data,
-			       const struct cpumask *cpus)
-{
-	return __stop_machine(fn, data, cpus);
-}
-
 static inline int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
 						 const struct cpumask *cpus)
 {
-	return __stop_machine(fn, data, cpus);
+	return stop_machine(fn, data, cpus);
 }
 
 #endif	/* CONFIG_STOP_MACHINE && CONFIG_SMP */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5644ec5..81c0a59 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -401,7 +401,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	/*
 	 * So now all preempt/rcu users must observe !cpu_active().
 	 */
-	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+	err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 6212208..b50910d 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -513,7 +513,7 @@ early_initcall(cpu_stop_init);
 
 #ifdef CONFIG_STOP_MACHINE
 
-int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
+static int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	struct multi_stop_data msdata = {
 		.fn = fn,
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 4/6] stop_machine: use cpu_stop_fn_t where possible
  2015-07-21 19:22 [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Oleg Nesterov
                   ` (2 preceding siblings ...)
  2015-07-21 19:22 ` [PATCH v2 3/6] stop_machine: unexport __stop_machine() Oleg Nesterov
@ 2015-07-21 19:22 ` Oleg Nesterov
  2015-07-21 19:22 ` [PATCH v2 5/6] stop_machine: cpu_stop_park() should remove cpu_stop_work's from list Oleg Nesterov
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2015-07-21 19:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, Rik van Riel, Tejun Heo, linux-kernel

Cosmetic, but cpu_stop_fn_t actually makes the code more readable and
it doesn't break cscope. And most of the declarations already use it.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/stop_machine.h |    8 ++++----
 kernel/stop_machine.c        |    8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 0fca276..414d924 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -112,13 +112,13 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
-int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
+int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 
-int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
+int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 				   const struct cpumask *cpus);
 #else	 /* CONFIG_STOP_MACHINE && CONFIG_SMP */
 
-static inline int stop_machine(int (*fn)(void *), void *data,
+static inline int stop_machine(cpu_stop_fn_t fn, void *data,
 				 const struct cpumask *cpus)
 {
 	unsigned long flags;
@@ -129,7 +129,7 @@ static inline int stop_machine(int (*fn)(void *), void *data,
 	return ret;
 }
 
-static inline int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
+static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 						 const struct cpumask *cpus)
 {
 	return stop_machine(fn, data, cpus);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b50910d..9a70def 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -141,7 +141,7 @@ enum multi_stop_state {
 };
 
 struct multi_stop_data {
-	int			(*fn)(void *);
+	cpu_stop_fn_t		fn;
 	void			*data;
 	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
 	unsigned int		num_threads;
@@ -513,7 +513,7 @@ early_initcall(cpu_stop_init);
 
 #ifdef CONFIG_STOP_MACHINE
 
-static int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
+static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 {
 	struct multi_stop_data msdata = {
 		.fn = fn,
@@ -546,7 +546,7 @@ static int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *c
 	return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
 }
 
-int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
+int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 {
 	int ret;
 
@@ -580,7 +580,7 @@ EXPORT_SYMBOL_GPL(stop_machine);
  * 0 if all executions of @fn returned 0, any non zero return value if any
  * returned non zero.
  */
-int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
+int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 				  const struct cpumask *cpus)
 {
 	struct multi_stop_data msdata = { .fn = fn, .data = data,
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 5/6] stop_machine: cpu_stop_park() should remove cpu_stop_work's from list
  2015-07-21 19:22 [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Oleg Nesterov
                   ` (3 preceding siblings ...)
  2015-07-21 19:22 ` [PATCH v2 4/6] stop_machine: use cpu_stop_fn_t where possible Oleg Nesterov
@ 2015-07-21 19:22 ` Oleg Nesterov
  2015-07-21 19:22 ` [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock() Oleg Nesterov
  2015-07-30 17:34 ` [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Peter Zijlstra
  6 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2015-07-21 19:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, Rik van Riel, Tejun Heo, linux-kernel

cpu_stop_park() does cpu_stop_signal_done() but leaves the work on
stopper->works. The owner of this work can free/reuse this memory
right after that and corrupt the list, so if this CPU becomes online
again cpu_stopper_thread() will crash.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/stop_machine.c |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 9a70def..12484e5 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -462,13 +462,15 @@ static void cpu_stop_create(unsigned int cpu)
 static void cpu_stop_park(unsigned int cpu)
 {
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-	struct cpu_stop_work *work;
+	struct cpu_stop_work *work, *tmp;
 	unsigned long flags;
 
 	/* drain remaining works */
 	spin_lock_irqsave(&stopper->lock, flags);
-	list_for_each_entry(work, &stopper->works, list)
+	list_for_each_entry_safe(work, tmp, &stopper->works, list) {
+		list_del_init(&work->list);
 		cpu_stop_signal_done(work->done, false);
+	}
 	stopper->enabled = false;
 	spin_unlock_irqrestore(&stopper->lock, flags);
 }
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock()
  2015-07-21 19:22 [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Oleg Nesterov
                   ` (4 preceding siblings ...)
  2015-07-21 19:22 ` [PATCH v2 5/6] stop_machine: cpu_stop_park() should remove cpu_stop_work's from list Oleg Nesterov
@ 2015-07-21 19:22 ` Oleg Nesterov
  2015-07-30 21:55   ` Peter Zijlstra
  2015-07-30 17:34 ` [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Peter Zijlstra
  6 siblings, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2015-07-21 19:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, Rik van Riel, Tejun Heo, linux-kernel

stop_two_cpus() and stop_cpus() use stop_cpus_lock to avoid the
deadlock, we need to ensure that the stopper functions can't be
queued "backwards" from one another.

Instead, we can change stop_two_cpus() to take 2 stopper->lock's
and queue both works "atomically"; just we need to check that both
->stop_work's on these CPU's are either free or already queued.

Note: this patch preserves the cpu_active() checks, but I think
we need to shift them into migrate_swap_stop(). However, we can't
do this without another cleanup: currently stopper->enabled does
not guarantee that work->fn() will be actually executed if we race
with cpu_down().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/lglock.h  |    5 --
 kernel/locking/lglock.c |   22 ---------
 kernel/stop_machine.c   |  120 ++++++++++++++++++++++++++++-------------------
 3 files changed, 71 insertions(+), 76 deletions(-)

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index c92ebd1..0081f00 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -52,15 +52,10 @@ struct lglock {
 	static struct lglock name = { .lock = &name ## _lock }
 
 void lg_lock_init(struct lglock *lg, char *name);
-
 void lg_local_lock(struct lglock *lg);
 void lg_local_unlock(struct lglock *lg);
 void lg_local_lock_cpu(struct lglock *lg, int cpu);
 void lg_local_unlock_cpu(struct lglock *lg, int cpu);
-
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2);
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2);
-
 void lg_global_lock(struct lglock *lg);
 void lg_global_unlock(struct lglock *lg);
 
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
index 951cfcd..86ae2ae 100644
--- a/kernel/locking/lglock.c
+++ b/kernel/locking/lglock.c
@@ -60,28 +60,6 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu)
 }
 EXPORT_SYMBOL(lg_local_unlock_cpu);
 
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2)
-{
-	BUG_ON(cpu1 == cpu2);
-
-	/* lock in cpu order, just like lg_global_lock */
-	if (cpu2 < cpu1)
-		swap(cpu1, cpu2);
-
-	preempt_disable();
-	lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
-	arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
-	arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
-}
-
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
-{
-	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
-	arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
-	arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
-	preempt_enable();
-}
-
 void lg_global_lock(struct lglock *lg)
 {
 	int i;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 12484e5..20fb291 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -20,7 +20,6 @@
 #include <linux/kallsyms.h>
 #include <linux/smpboot.h>
 #include <linux/atomic.h>
-#include <linux/lglock.h>
 
 /*
  * Structure to determine completion condition and record errors.  May
@@ -47,14 +46,6 @@ struct cpu_stopper {
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
 static bool stop_machine_initialized = false;
 
-/*
- * Avoids a race between stop_two_cpus and global stop_cpus, where
- * the stoppers could get queued up in reverse order, leading to
- * system deadlock. Using an lglock means stop_two_cpus remains
- * relatively cheap.
- */
-DEFINE_STATIC_LGLOCK(stop_cpus_lock);
-
 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
 {
 	memset(done, 0, sizeof(*done));
@@ -73,21 +64,29 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
 	}
 }
 
+static inline bool stop_work_pending(struct cpu_stopper *stopper)
+{
+	return !list_empty(&stopper->stop_work.list);
+}
+
+static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
+					struct cpu_stop_work *work)
+{
+	list_add_tail(&work->list, &stopper->works);
+	wake_up_process(stopper->thread);
+}
+
 /* queue @work to @stopper.  if offline, @work is completed immediately */
 static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 {
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-
 	unsigned long flags;
 
 	spin_lock_irqsave(&stopper->lock, flags);
-
-	if (stopper->enabled) {
-		list_add_tail(&work->list, &stopper->works);
-		wake_up_process(stopper->thread);
-	} else
+	if (stopper->enabled)
+		__cpu_stop_queue_work(stopper, work);
+	else
 		cpu_stop_signal_done(work->done, false);
-
 	spin_unlock_irqrestore(&stopper->lock, flags);
 }
 
@@ -213,6 +212,48 @@ static int multi_cpu_stop(void *data)
 	return err;
 }
 
+static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
+				    int cpu2, struct cpu_stop_work *work2)
+{
+	struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
+	struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
+	int err;
+retry:
+	spin_lock_irq(&stopper1->lock);
+	spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
+	/*
+	 * If we observe both CPUs active we know _cpu_down() cannot yet have
+	 * queued its stop_machine works and therefore ours will get executed
+	 * first. Or its not either one of our CPUs that's getting unplugged,
+	 * in which case we don't care.
+	 */
+	err = -ENOENT;
+	if (!cpu_active(cpu1) || !cpu_active(cpu2))
+		goto unlock;
+
+	WARN_ON(!stopper1->enabled || !stopper2->enabled);
+	/*
+	 * Ensure that if we race with stop_cpus() the stoppers won't
+	 * get queued up in reverse order, leading to system deadlock.
+	 */
+	err = -EDEADLK;
+	if (stop_work_pending(stopper1) != stop_work_pending(stopper2))
+		goto unlock;
+
+	err = 0;
+	__cpu_stop_queue_work(stopper1, work1);
+	__cpu_stop_queue_work(stopper2, work2);
+unlock:
+	spin_unlock(&stopper2->lock);
+	spin_unlock_irq(&stopper1->lock);
+
+	if (unlikely(err == -EDEADLK)) {
+		cond_resched();
+		goto retry;
+	}
+	return err;
+}
+
 /**
  * stop_two_cpus - stops two cpus
  * @cpu1: the cpu to stop
@@ -228,48 +269,28 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 {
 	struct cpu_stop_done done;
 	struct cpu_stop_work work1, work2;
-	struct multi_stop_data msdata;
-
-	preempt_disable();
-	msdata = (struct multi_stop_data){
+	struct multi_stop_data msdata = {
 		.fn = fn,
 		.data = arg,
 		.num_threads = 2,
 		.active_cpus = cpumask_of(cpu1),
 	};
 
-	work1 = work2 = (struct cpu_stop_work){
-		.fn = multi_cpu_stop,
-		.arg = &msdata,
-		.done = &done
-	};
-
-	cpu_stop_init_done(&done, 2);
 	set_state(&msdata, MULTI_STOP_PREPARE);
+	cpu_stop_init_done(&done, 2);
 
-	/*
-	 * If we observe both CPUs active we know _cpu_down() cannot yet have
-	 * queued its stop_machine works and therefore ours will get executed
-	 * first. Or its not either one of our CPUs that's getting unplugged,
-	 * in which case we don't care.
-	 *
-	 * This relies on the stopper workqueues to be FIFO.
-	 */
-	if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
-		preempt_enable();
-		return -ENOENT;
-	}
-
-	lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
-	cpu_stop_queue_work(cpu1, &work1);
-	cpu_stop_queue_work(cpu2, &work2);
-	lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
+	work1.fn   = work2.fn   = multi_cpu_stop;
+	work1.arg  = work2.arg  = &msdata;
+	work1.done = work2.done = &done;
 
-	preempt_enable();
+	if (cpu1 > cpu2)
+		swap(cpu1, cpu2);
+	if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
+		return -ENOENT;
 
 	wait_for_completion(&done.completion);
-
-	return done.executed ? done.ret : -ENOENT;
+	WARN_ON(!done.executed);
+	return done.ret;
 }
 
 /**
@@ -308,7 +329,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 	 * preempted by a stopper which might wait for other stoppers
 	 * to enter @fn which can lead to deadlock.
 	 */
-	lg_global_lock(&stop_cpus_lock);
+	preempt_disable();
 	for_each_cpu(cpu, cpumask) {
 		work = &per_cpu(cpu_stopper.stop_work, cpu);
 		work->fn = fn;
@@ -316,7 +337,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 		work->done = done;
 		cpu_stop_queue_work(cpu, work);
 	}
-	lg_global_unlock(&stop_cpus_lock);
+	preempt_enable();
 }
 
 static int __stop_cpus(const struct cpumask *cpumask,
@@ -505,6 +526,7 @@ static int __init cpu_stop_init(void)
 
 		spin_lock_init(&stopper->lock);
 		INIT_LIST_HEAD(&stopper->works);
+		INIT_LIST_HEAD(&stopper->stop_work.list);
 	}
 
 	BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock
  2015-07-21 19:22 [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Oleg Nesterov
                   ` (5 preceding siblings ...)
  2015-07-21 19:22 ` [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock() Oleg Nesterov
@ 2015-07-30 17:34 ` Peter Zijlstra
  6 siblings, 0 replies; 15+ messages in thread
From: Peter Zijlstra @ 2015-07-30 17:34 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Ingo Molnar, Rik van Riel, Tejun Heo, linux-kernel

On Tue, Jul 21, 2015 at 09:22:19PM +0200, Oleg Nesterov wrote:
> Hello,
> 
> Let me resend this. The only change in v2 is that I rediffed this
> series against v4.2-rc3.
> 
> 5/6 patch fixes the bug, I think. Say, stop_one_cpu(X) can race with
> _cpu_down(X)->stop_machine() so that the kernel will crash if this
> CPU X becomes online again. The window after cpu_stopper_thread()
> returns and before smpboot_thread() calls ->park() is tiny, but still
> this is possible afaics. But see the changelog in 6/6, I think we
> should turn this cpu_stop_signal_done() into BUG() later.

These I've queued (from v1 I think, nothing really changed there).

> 6/6 removes lglock from kernel/stop_machine.c.
> 
> Peter, Rik, what do you think ?

This one I've got some 'problems' with, but let me reply there.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock()
  2015-07-21 19:22 ` [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock() Oleg Nesterov
@ 2015-07-30 21:55   ` Peter Zijlstra
  2015-07-31 11:12     ` Peter Zijlstra
                       ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Peter Zijlstra @ 2015-07-30 21:55 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Ingo Molnar, Rik van Riel, Tejun Heo, linux-kernel

On Tue, Jul 21, 2015 at 09:22:47PM +0200, Oleg Nesterov wrote:

> +static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
> +				    int cpu2, struct cpu_stop_work *work2)
> +{
> +	struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
> +	struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
> +	int err;
> +retry:
> +	spin_lock_irq(&stopper1->lock);
> +	spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
> +	/*
> +	 * If we observe both CPUs active we know _cpu_down() cannot yet have
> +	 * queued its stop_machine works and therefore ours will get executed
> +	 * first. Or its not either one of our CPUs that's getting unplugged,
> +	 * in which case we don't care.
> +	 */
> +	err = -ENOENT;
> +	if (!cpu_active(cpu1) || !cpu_active(cpu2))
> +		goto unlock;
> +
> +	WARN_ON(!stopper1->enabled || !stopper2->enabled);
> +	/*
> +	 * Ensure that if we race with stop_cpus() the stoppers won't
> +	 * get queued up in reverse order, leading to system deadlock.
> +	 */
> +	err = -EDEADLK;
> +	if (stop_work_pending(stopper1) != stop_work_pending(stopper2))
> +		goto unlock;

You could DoS/false positive this by running stop_one_cpu() in a loop,
and thereby 'always' having work pending on one but not the other.

(doing so if obviously daft for other reasons)

> +
> +	err = 0;
> +	__cpu_stop_queue_work(stopper1, work1);
> +	__cpu_stop_queue_work(stopper2, work2);
> +unlock:
> +	spin_unlock(&stopper2->lock);
> +	spin_unlock_irq(&stopper1->lock);
> +
> +	if (unlikely(err == -EDEADLK)) {
> +		cond_resched();
> +		goto retry;

And this just gives me -rt nightmares.

> +	}
> +	return err;
> +}

As it is, -rt does horrible things to stop_machine, and I would very
much like to make it such that we don't need to do that.

Now, obviously, stop_cpus() is _BAD_ for -rt, and we try real hard to
make sure that doesn't happen, but stop_one_cpu() and stop_two_cpus()
should not be a problem.

Exclusion between stop_{one,two}_cpu{,s}() and stop_cpus() makes this
trivially go away.

Paul's RCU branch already kills try_stop_cpus() dead, so that wart is
also gone. But we're still stuck with stop_machine_from_inactive_cpu()
which does a spin-wait for exclusive state. So I suppose we'll have to
keep stop_cpus_mutex :/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock()
  2015-07-30 21:55   ` Peter Zijlstra
@ 2015-07-31 11:12     ` Peter Zijlstra
  2015-07-31 14:17       ` Peter Zijlstra
  2015-08-01 10:57     ` Oleg Nesterov
  2015-08-03 14:58     ` Oleg Nesterov
  2 siblings, 1 reply; 15+ messages in thread
From: Peter Zijlstra @ 2015-07-31 11:12 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Ingo Molnar, Rik van Riel, Tejun Heo, linux-kernel

On Thu, Jul 30, 2015 at 11:55:27PM +0200, Peter Zijlstra wrote:
> 
> Exclusion between stop_{one,two}_cpu{,s}() and stop_cpus() makes this
> trivially go away.
> 
> Paul's RCU branch already kills try_stop_cpus() dead, so that wart is
> also gone. But we're still stuck with stop_machine_from_inactive_cpu()
> which does a spin-wait for exclusive state. So I suppose we'll have to
> keep stop_cpus_mutex :/

*groan* we really need to kill that from_inactive_cpu() shite too. Lemme
go have a look at how this MTRR crap works.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock()
  2015-07-31 11:12     ` Peter Zijlstra
@ 2015-07-31 14:17       ` Peter Zijlstra
  2015-08-03 14:58         ` Oleg Nesterov
  0 siblings, 1 reply; 15+ messages in thread
From: Peter Zijlstra @ 2015-07-31 14:17 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Ingo Molnar, Rik van Riel, Tejun Heo, linux-kernel

On Fri, Jul 31, 2015 at 01:12:46PM +0200, Peter Zijlstra wrote:
> On Thu, Jul 30, 2015 at 11:55:27PM +0200, Peter Zijlstra wrote:
> > 
> > Exclusion between stop_{one,two}_cpu{,s}() and stop_cpus() makes this
> > trivially go away.
> > 
> > Paul's RCU branch already kills try_stop_cpus() dead, so that wart is
> > also gone. But we're still stuck with stop_machine_from_inactive_cpu()
> > which does a spin-wait for exclusive state. So I suppose we'll have to
> > keep stop_cpus_mutex :/
> 
> *groan* we really need to kill that from_inactive_cpu() shite too. Lemme
> go have a look at how this MTRR crap works.

*sigh* we can't get rid of it :/ The hardware is 'broken', see:
d0af9eed5aa9 ("x86, pat/mtrr: Rendezvous all the cpus for MTRR/PAT
init")

And that means we must not ever block and the global primitive must be a
spinner.

The below is the best we can do.. At least it localizes the ugly.

--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -20,7 +20,6 @@
 #include <linux/kallsyms.h>
 #include <linux/smpboot.h>
 #include <linux/atomic.h>
-#include <linux/lglock.h>
 
 /*
  * Structure to determine completion condition and record errors.  May
@@ -47,14 +46,6 @@ struct cpu_stopper {
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
 static bool stop_machine_initialized = false;
 
-/*
- * Avoids a race between stop_two_cpus and global stop_cpus, where
- * the stoppers could get queued up in reverse order, leading to
- * system deadlock. Using an lglock means stop_two_cpus remains
- * relatively cheap.
- */
-DEFINE_STATIC_LGLOCK(stop_cpus_lock);
-
 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
 {
 	memset(done, 0, sizeof(*done));
@@ -74,20 +65,25 @@ static void cpu_stop_signal_done(struct
 }
 
 /* queue @work to @stopper.  if offline, @work is completed immediately */
-static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
+static void __cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 {
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 
-	unsigned long flags;
-
-	spin_lock_irqsave(&stopper->lock, flags);
-
 	if (stopper->enabled) {
 		list_add_tail(&work->list, &stopper->works);
 		wake_up_process(stopper->thread);
-	} else
+	} else {
 		cpu_stop_signal_done(work->done, false);
+	}
+}
 
+static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
+{
+	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+	unsigned long flags;
+
+	spin_lock_irqsave(&stopper->lock, flags);
+	__cpu_stop_queue_work(cpu, work);
 	spin_unlock_irqrestore(&stopper->lock, flags);
 }
 
@@ -226,9 +222,14 @@ static int multi_cpu_stop(void *data)
  */
 int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
 {
-	struct cpu_stop_done done;
+	struct cpu_stopper *stopper1, *stopper2;
 	struct cpu_stop_work work1, work2;
 	struct multi_stop_data msdata;
+	struct cpu_stop_done done;
+	unsigned long flags;
+
+	if (cpu2 < cpu1)
+		swap(cpu1, cpu2);
 
 	preempt_disable();
 	msdata = (struct multi_stop_data){
@@ -260,10 +261,17 @@ int stop_two_cpus(unsigned int cpu1, uns
 		return -ENOENT;
 	}
 
-	lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
-	cpu_stop_queue_work(cpu1, &work1);
-	cpu_stop_queue_work(cpu2, &work2);
-	lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
+	stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
+	stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
+
+	spin_lock_irqsave(&stopper1->lock, flags);
+	spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
+
+	__cpu_stop_queue_work(cpu1, &work1);
+	__cpu_stop_queue_work(cpu2, &work2);
+
+	spin_unlock(&stopper2->lock);
+	spin_unlock_irqrestore(&stopper1->lock, flags);
 
 	preempt_enable();
 
@@ -304,19 +312,24 @@ static void queue_stop_cpus_work(const s
 	unsigned int cpu;
 
 	/*
-	 * Disable preemption while queueing to avoid getting
-	 * preempted by a stopper which might wait for other stoppers
-	 * to enter @fn which can lead to deadlock.
+	 * Disgusting, but take all relevant per-cpu spinlocks to serialize
+	 * against stop_{one,two}_cpu{,s}().
 	 */
-	lg_global_lock(&stop_cpus_lock);
+	preempt_disable();
+	for_each_cpu(cpu, cpumask)
+		arch_spin_lock((arch_spinlock_t *)&per_cpu(cpu_stopper.lock, cpu));
+
 	for_each_cpu(cpu, cpumask) {
 		work = &per_cpu(cpu_stopper.stop_work, cpu);
 		work->fn = fn;
 		work->arg = arg;
 		work->done = done;
-		cpu_stop_queue_work(cpu, work);
+		__cpu_stop_queue_work(cpu, work);
 	}
-	lg_global_unlock(&stop_cpus_lock);
+
+	for_each_cpu(cpu, cpumask)
+		arch_spin_unlock((arch_spinlock_t *)&per_cpu(cpu_stopper.lock, cpu));
+	preempt_enable();
 }
 
 static int __stop_cpus(const struct cpumask *cpumask,

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock()
  2015-07-30 21:55   ` Peter Zijlstra
  2015-07-31 11:12     ` Peter Zijlstra
@ 2015-08-01 10:57     ` Oleg Nesterov
  2015-08-01 22:36       ` Peter Zijlstra
  2015-08-03 14:58     ` Oleg Nesterov
  2 siblings, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2015-08-01 10:57 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Rik van Riel, Tejun Heo, linux-kernel

Hi Peter,

Thanks for looking. I'll try to reply on Monday, just one note...

On 07/30, Peter Zijlstra wrote:
>
> On Tue, Jul 21, 2015 at 09:22:47PM +0200, Oleg Nesterov wrote:
>
> > +static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
> > +				    int cpu2, struct cpu_stop_work *work2)
> > +{
> > +	struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
> > +	struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
> > +	int err;
> > +retry:
> > +	spin_lock_irq(&stopper1->lock);
> > +	spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
> > +	/*
> > +	 * If we observe both CPUs active we know _cpu_down() cannot yet have
> > +	 * queued its stop_machine works and therefore ours will get executed
> > +	 * first. Or its not either one of our CPUs that's getting unplugged,
> > +	 * in which case we don't care.
> > +	 */
> > +	err = -ENOENT;
> > +	if (!cpu_active(cpu1) || !cpu_active(cpu2))
> > +		goto unlock;
> > +
> > +	WARN_ON(!stopper1->enabled || !stopper2->enabled);
> > +	/*
> > +	 * Ensure that if we race with stop_cpus() the stoppers won't
> > +	 * get queued up in reverse order, leading to system deadlock.
> > +	 */
> > +	err = -EDEADLK;
> > +	if (stop_work_pending(stopper1) != stop_work_pending(stopper2))
> > +		goto unlock;
>
> You could DoS/false positive this by running stop_one_cpu() in a loop,
> and thereby 'always' having work pending on one but not the other.

IIRC no. I am pretty sure stop_one_cpu() doesn't use stopper->stop_work,
only stop_machine() does.

Oleg.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock()
  2015-08-01 10:57     ` Oleg Nesterov
@ 2015-08-01 22:36       ` Peter Zijlstra
  0 siblings, 0 replies; 15+ messages in thread
From: Peter Zijlstra @ 2015-08-01 22:36 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Ingo Molnar, Rik van Riel, Tejun Heo, linux-kernel

On Sat, Aug 01, 2015 at 12:57:18PM +0200, Oleg Nesterov wrote:

> > > +	if (stop_work_pending(stopper1) != stop_work_pending(stopper2))
> > > +		goto unlock;
> >
> > You could DoS/false positive this by running stop_one_cpu() in a loop,
> > and thereby 'always' having work pending on one but not the other.
> 
> IIRC no. I am pretty sure stop_one_cpu() doesn't use stopper->stop_work,
> only stop_machine() does.

Urgh, I missed you were testing the cpu_stopper::stop_work not the
cpu_stopper::works list.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock()
  2015-07-31 14:17       ` Peter Zijlstra
@ 2015-08-03 14:58         ` Oleg Nesterov
  0 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2015-08-03 14:58 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Rik van Riel, Tejun Heo, linux-kernel

On 07/31, Peter Zijlstra wrote:
>
> +	for_each_cpu(cpu, cpumask)
> +		arch_spin_lock((arch_spinlock_t *)&per_cpu(cpu_stopper.lock, cpu));
> +
>  	for_each_cpu(cpu, cpumask) {
>  		work = &per_cpu(cpu_stopper.stop_work, cpu);
>  		work->fn = fn;
>  		work->arg = arg;
>  		work->done = done;
> -		cpu_stop_queue_work(cpu, work);
> +		__cpu_stop_queue_work(cpu, work);
>  	}
> -	lg_global_unlock(&stop_cpus_lock);
> +
> +	for_each_cpu(cpu, cpumask)
> +		arch_spin_unlock((arch_spinlock_t *)&per_cpu(cpu_stopper.lock, cpu));

Of course, we discussed this before and I think this should work too.
However to me this looks more ugly (although better than the current
code), and this is what I tried to avoid.

But! of course "more ugly" is very much subjective, so I won't really
argue if you prefer this change. That said, let me write another email
in reply to your initial review.

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock()
  2015-07-30 21:55   ` Peter Zijlstra
  2015-07-31 11:12     ` Peter Zijlstra
  2015-08-01 10:57     ` Oleg Nesterov
@ 2015-08-03 14:58     ` Oleg Nesterov
  2 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2015-08-03 14:58 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Rik van Riel, Tejun Heo, linux-kernel

On 07/30, Peter Zijlstra wrote:
>
> On Tue, Jul 21, 2015 at 09:22:47PM +0200, Oleg Nesterov wrote:
>
> > +	err = -EDEADLK;
> > +	if (stop_work_pending(stopper1) != stop_work_pending(stopper2))
> > +		goto unlock;
>
> You could DoS/false positive this by running stop_one_cpu() in a loop,
> and thereby 'always' having work pending on one but not the other.

as we already discussed this is not a problem.

> > +	if (unlikely(err == -EDEADLK)) {
> > +		cond_resched();
> > +		goto retry;
>
> And this just gives me -rt nightmares.

Why?

> As it is, -rt does horrible things to stop_machine, and I would very
> much like to make it such that we don't need to do that.
>
> Now, obviously, stop_cpus() is _BAD_ for -rt, and we try real hard to
> make sure that doesn't happen,

Yes. stop_cpus() is already bad so I am not sure I understand why this
change make the things really worse.

stop_two_cpus() needs to spin/retry if it races with the main loop in
queue_stop_cpus_work(),

        preempt_disable();
        for_each_cpu(cpu, cpumask) {
                work = &per_cpu(cpu_stopper.stop_work, cpu);
                work->fn = fn;
                work->arg = arg;
                work->done = done;
                cpu_stop_queue_work(cpu, work);
        }
        preempt_enable();

and iirc preempt_disable() means "disable preemption" even in -rt, but
I am not sure. So "goto retry" should be really, really unlikely.

Besides, whatever we do stop_two_cpus(X, Y) will wait anyway if ->stop_work
was queued on X or Y anyway. And with your patch in the next email it will
spin too (yes, yes, -rt differs).


Another case when stop_two_cpus(X, Y) needs to retry is when ->stop_work
was already dequeued on CPU X but not on CPU Y (and this is why it needs
cond_resched() for CONFIG_PREEMPT=n, it can run on CPU Y). This does not
look really bad too, the migration/Y thread is already activated and it
has the highest priority.


So I still think that at least correctness wise this patch is fine. Am I
missed something else?


> Paul's RCU branch already kills try_stop_cpus() dead, so that wart is
> also gone. But we're still stuck with stop_machine_from_inactive_cpu()
> which does a spin-wait for exclusive state. So I suppose we'll have to
> keep stop_cpus_mutex :/

Yes, we still need stop_cpus_mutex. Even if we remove try_stop_cpus() and
stop_machine_from_inactive_cpu(). But this is another issue.

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2015-08-03 15:00 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-21 19:22 [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Oleg Nesterov
2015-07-21 19:22 ` [PATCH v2 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper Oleg Nesterov
2015-07-21 19:22 ` [PATCH v2 2/6] stop_machine: don't do for_each_cpu() twice in queue_stop_cpus_work() Oleg Nesterov
2015-07-21 19:22 ` [PATCH v2 3/6] stop_machine: unexport __stop_machine() Oleg Nesterov
2015-07-21 19:22 ` [PATCH v2 4/6] stop_machine: use cpu_stop_fn_t where possible Oleg Nesterov
2015-07-21 19:22 ` [PATCH v2 5/6] stop_machine: cpu_stop_park() should remove cpu_stop_work's from list Oleg Nesterov
2015-07-21 19:22 ` [PATCH v2 6/6] stop_machine: kill stop_cpus_lock and lg_double_lock/unlock() Oleg Nesterov
2015-07-30 21:55   ` Peter Zijlstra
2015-07-31 11:12     ` Peter Zijlstra
2015-07-31 14:17       ` Peter Zijlstra
2015-08-03 14:58         ` Oleg Nesterov
2015-08-01 10:57     ` Oleg Nesterov
2015-08-01 22:36       ` Peter Zijlstra
2015-08-03 14:58     ` Oleg Nesterov
2015-07-30 17:34 ` [PATCH v2 0/6] stop_machine: cleanups, fix, remove lglock Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).