[RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus

All of lore.kernel.org
 help / color / mirror / Atom feed

* [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock
@ 2015-06-26  2:14 Oleg Nesterov
  2015-06-26  2:15 ` [RFC PATCH 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper Oleg Nesterov
                   ` (7 more replies)
  0 siblings, 8 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-26  2:14 UTC (permalink / raw)
  To: Peter Zijlstra, paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds
  Cc: linux-kernel

On 06/25, Peter Zijlstra wrote:
>
> On Tue, Jun 23, 2015 at 07:24:16PM +0200, Oleg Nesterov wrote:
> >
> > 	lock_stop_cpus_works(cpumask)
> > 	{
> > 		for_each_cpu(cpu, cpumask)
> > 			mutex_lock(per_cpu(cpu_stopper_task, cpu).work_mutex);
> > 	}
> >
> > 	unlock_stop_cpus_works(cpumask)
> > 	{
> > 		for_each_cpu(cpu, cpumask)
> > 			mutex_lock(...);
> > 	}
> >
> > which should be used instead of stop_cpus_mutex. After this change
> > stop_two_cpus() can just use stop_cpus().
>
> Right, lockdep annotating that will be 'interesting' though.

Sure, and this is too inefficient, this is only to explain what
I mean.

How about this series? Untested. For review.

> And
> stop_two_cpus() then has the problem of allocating a cpumask.

Yes, but we can avoid this, see the changelog in 5/6.

> Simpler to
> let it keep 'abuse' the queueing spinlock in there.

Not sure.

And note that this series kills stop_cpus_mutex, so that multiple
stop_cpus()'s / stop_machine()'s can run in parallel if cpumask's
do not overlap.

Note also the changelog in 6/6, we can simplify + optimize this code
a bit more.

What do you think?

Oleg.

 include/linux/lglock.h  |    5 -
 kernel/locking/lglock.c |   22 -----
 kernel/stop_machine.c   |  197 ++++++++++++++++++++++++++++-------------------
 3 files changed, 119 insertions(+), 105 deletions(-)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [RFC PATCH 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper
  2015-06-26  2:14 [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
@ 2015-06-26  2:15 ` Oleg Nesterov
  2015-06-26  2:15 ` [RFC PATCH 2/6] stop_machine: don't do for_each_cpu() twice in queue_stop_cpus_work() Oleg Nesterov
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-26  2:15 UTC (permalink / raw)
  To: Peter Zijlstra, paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds
  Cc: linux-kernel

Multpiple DEFINE_PER_CPU's do not make sense, move all the per-cpu
variables into struct cpu_stopper.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/stop_machine.c |   17 +++++++++--------
 1 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index fd643d8..6e677b0 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -35,13 +35,16 @@ struct cpu_stop_done {
 
 /* the actual stopper, one per every possible cpu, enabled on online cpus */
 struct cpu_stopper {
+	struct task_struct	*thread;
+
 	spinlock_t		lock;
 	bool			enabled;	/* is this stopper enabled? */
 	struct list_head	works;		/* list of pending works */
+
+	struct cpu_stop_work	stop_work;	/* for stop_cpus */
 };
 
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
-static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
 static bool stop_machine_initialized = false;
 
 /*
@@ -74,7 +77,6 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
 static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 {
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-	struct task_struct *p = per_cpu(cpu_stopper_task, cpu);
 
 	unsigned long flags;
 
@@ -82,7 +84,7 @@ static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 
 	if (stopper->enabled) {
 		list_add_tail(&work->list, &stopper->works);
-		wake_up_process(p);
+		wake_up_process(stopper->thread);
 	} else
 		cpu_stop_signal_done(work->done, false);
 
@@ -293,7 +295,6 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 
 /* static data for stop_cpus */
 static DEFINE_MUTEX(stop_cpus_mutex);
-static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 
 static void queue_stop_cpus_work(const struct cpumask *cpumask,
 				 cpu_stop_fn_t fn, void *arg,
@@ -304,7 +305,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 
 	/* initialize works and done */
 	for_each_cpu(cpu, cpumask) {
-		work = &per_cpu(stop_cpus_work, cpu);
+		work = &per_cpu(cpu_stopper.stop_work, cpu);
 		work->fn = fn;
 		work->arg = arg;
 		work->done = done;
@@ -317,7 +318,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 	 */
 	lg_global_lock(&stop_cpus_lock);
 	for_each_cpu(cpu, cpumask)
-		cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
+		cpu_stop_queue_work(cpu, &per_cpu(cpu_stopper.stop_work, cpu));
 	lg_global_unlock(&stop_cpus_lock);
 }
 
@@ -458,7 +459,7 @@ extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 
 static void cpu_stop_create(unsigned int cpu)
 {
-	sched_set_stop_task(cpu, per_cpu(cpu_stopper_task, cpu));
+	sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
 }
 
 static void cpu_stop_park(unsigned int cpu)
@@ -485,7 +486,7 @@ static void cpu_stop_unpark(unsigned int cpu)
 }
 
 static struct smp_hotplug_thread cpu_stop_threads = {
-	.store			= &cpu_stopper_task,
+	.store			= &cpu_stopper.thread,
 	.thread_should_run	= cpu_stop_should_run,
 	.thread_fn		= cpu_stopper_thread,
 	.thread_comm		= "migration/%u",
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC PATCH 2/6] stop_machine: don't do for_each_cpu() twice in queue_stop_cpus_work()
  2015-06-26  2:14 [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
  2015-06-26  2:15 ` [RFC PATCH 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper Oleg Nesterov
@ 2015-06-26  2:15 ` Oleg Nesterov
  2015-06-26  2:15 ` [RFC PATCH 3/6] stop_machine: introduce stop_work_alloc() and stop_work_free() Oleg Nesterov
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-26  2:15 UTC (permalink / raw)
  To: Peter Zijlstra, paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds
  Cc: linux-kernel

queue_stop_cpus_work() can do everything in one for_each_cpu() loop.
This simplifies this code a bit, and this is also preparation for the
locking changes.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/stop_machine.c |   17 +++++++----------
 1 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 6e677b0..6212208 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -303,22 +303,19 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 	struct cpu_stop_work *work;
 	unsigned int cpu;
 
-	/* initialize works and done */
-	for_each_cpu(cpu, cpumask) {
-		work = &per_cpu(cpu_stopper.stop_work, cpu);
-		work->fn = fn;
-		work->arg = arg;
-		work->done = done;
-	}
-
 	/*
 	 * Disable preemption while queueing to avoid getting
 	 * preempted by a stopper which might wait for other stoppers
 	 * to enter @fn which can lead to deadlock.
 	 */
 	lg_global_lock(&stop_cpus_lock);
-	for_each_cpu(cpu, cpumask)
-		cpu_stop_queue_work(cpu, &per_cpu(cpu_stopper.stop_work, cpu));
+	for_each_cpu(cpu, cpumask) {
+		work = &per_cpu(cpu_stopper.stop_work, cpu);
+		work->fn = fn;
+		work->arg = arg;
+		work->done = done;
+		cpu_stop_queue_work(cpu, work);
+	}
 	lg_global_unlock(&stop_cpus_lock);
 }
 
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC PATCH 3/6] stop_machine: introduce stop_work_alloc() and stop_work_free()
  2015-06-26  2:14 [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
  2015-06-26  2:15 ` [RFC PATCH 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper Oleg Nesterov
  2015-06-26  2:15 ` [RFC PATCH 2/6] stop_machine: don't do for_each_cpu() twice in queue_stop_cpus_work() Oleg Nesterov
@ 2015-06-26  2:15 ` Oleg Nesterov
  2015-06-26  2:15 ` [RFC PATCH 4/6] stop_machine: kill stop_cpus_mutex Oleg Nesterov
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-26  2:15 UTC (permalink / raw)
  To: Peter Zijlstra, paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds
  Cc: linux-kernel

A separate and intentionally suboptimal patch to simplify the review of
this and the next changes.

And the new helpers, stop_work_alloc(cpumask) and stop_work_free(cpumask),
should be called if the caller is going to use cpu_stopper->stop_work's.
Note that 2 callers can never deadlock even if their cpumask's overlap,
they always "lock" cpu_stopper->stop_owner's in the same order as if we
had another per-cpu mutex.

This is obviously greatly inefficient, this will be fixed later.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/stop_machine.c |   50 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 50 insertions(+), 0 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 6212208..3d5d810 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -42,11 +42,61 @@ struct cpu_stopper {
 	struct list_head	works;		/* list of pending works */
 
 	struct cpu_stop_work	stop_work;	/* for stop_cpus */
+	struct task_struct	*stop_owner;
 };
 
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
 static bool stop_machine_initialized = false;
 
+static DECLARE_WAIT_QUEUE_HEAD(stop_work_wq);
+
+static void stop_work_free_one(int cpu)
+{
+	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+	/* Can be NULL if stop_work_alloc(wait => false) fails */
+	if (likely(stopper->stop_owner == current))
+		stopper->stop_owner = NULL;
+}
+
+static void stop_work_free(const struct cpumask *cpumask)
+{
+	int cpu;
+
+	for_each_cpu(cpu, cpumask)
+		stop_work_free_one(cpu);
+	wake_up_all(&stop_work_wq);
+}
+
+static struct cpu_stop_work *stop_work_alloc_one(int cpu, bool wait)
+{
+	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+
+	if (cmpxchg(&stopper->stop_owner, NULL, current) == NULL)
+		goto done;
+
+	if (!wait)
+		return NULL;
+
+	__wait_event(stop_work_wq,
+		cmpxchg(&stopper->stop_owner, NULL, current) == NULL);
+done:
+	return &stopper->stop_work;
+}
+
+static bool stop_work_alloc(const struct cpumask *cpumask, bool wait)
+{
+	int cpu;
+
+	for_each_cpu(cpu, cpumask) {
+		if (stop_work_alloc_one(cpu, wait))
+			continue;
+		stop_work_free(cpumask);
+		return false;
+	}
+
+	return true;
+}
+
 /*
  * Avoids a race between stop_two_cpus and global stop_cpus, where
  * the stoppers could get queued up in reverse order, leading to
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC PATCH 4/6] stop_machine: kill stop_cpus_mutex
  2015-06-26  2:14 [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
                   ` (2 preceding siblings ...)
  2015-06-26  2:15 ` [RFC PATCH 3/6] stop_machine: introduce stop_work_alloc() and stop_work_free() Oleg Nesterov
@ 2015-06-26  2:15 ` Oleg Nesterov
  2015-06-26  2:15 ` [RFC PATCH 5/6] stop_machine: change stop_two_cpus() just use stop_cpu(), kill lg_double_lock/unlock Oleg Nesterov
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-26  2:15 UTC (permalink / raw)
  To: Peter Zijlstra, paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds
  Cc: linux-kernel

Change the users of stop_cpus_mutex to rely on stop_work_alloc() and
stop_work_free(). This means that 2 stop_cpus() can run in parallel
as long a their cpumask's do not overlap.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/stop_machine.c |   16 ++++++----------
 1 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 3d5d810..4b23875 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -343,9 +343,6 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 	cpu_stop_queue_work(cpu, work_buf);
 }
 
-/* static data for stop_cpus */
-static DEFINE_MUTEX(stop_cpus_mutex);
-
 static void queue_stop_cpus_work(const struct cpumask *cpumask,
 				 cpu_stop_fn_t fn, void *arg,
 				 struct cpu_stop_done *done)
@@ -412,10 +409,9 @@ int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 {
 	int ret;
 
-	/* static works are used, process one request at a time */
-	mutex_lock(&stop_cpus_mutex);
+	stop_work_alloc(cpumask, true);
 	ret = __stop_cpus(cpumask, fn, arg);
-	mutex_unlock(&stop_cpus_mutex);
+	stop_work_free(cpumask);
 	return ret;
 }
 
@@ -442,10 +438,10 @@ int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 	int ret;
 
 	/* static works are used, process one request at a time */
-	if (!mutex_trylock(&stop_cpus_mutex))
+	if (!stop_work_alloc(cpumask, false))
 		return -EAGAIN;
 	ret = __stop_cpus(cpumask, fn, arg);
-	mutex_unlock(&stop_cpus_mutex);
+	stop_work_free(cpumask);
 	return ret;
 }
 
@@ -643,7 +639,7 @@ int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
 	msdata.num_threads = num_active_cpus() + 1;	/* +1 for local */
 
 	/* No proper task established and can't sleep - busy wait for lock. */
-	while (!mutex_trylock(&stop_cpus_mutex))
+	while (!stop_work_alloc(cpus, false))
 		cpu_relax();
 
 	/* Schedule work on other CPUs and execute directly for local CPU */
@@ -656,8 +652,8 @@ int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
 	/* Busy wait for completion. */
 	while (!completion_done(&done.completion))
 		cpu_relax();
+	stop_work_free(cpus);
 
-	mutex_unlock(&stop_cpus_mutex);
 	return ret ?: done.ret;
 }
 
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC PATCH 5/6] stop_machine: change stop_two_cpus() just use stop_cpu(), kill lg_double_lock/unlock
  2015-06-26  2:14 [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
                   ` (3 preceding siblings ...)
  2015-06-26  2:15 ` [RFC PATCH 4/6] stop_machine: kill stop_cpus_mutex Oleg Nesterov
@ 2015-06-26  2:15 ` Oleg Nesterov
  2015-06-26  2:15 ` [RFC PATCH 6/6] stop_machine: optimize stop_work_alloc() Oleg Nesterov
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-26  2:15 UTC (permalink / raw)
  To: Peter Zijlstra, paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds
  Cc: linux-kernel

Now that stop_cpus() is not "exclusive" if cpumask's don't overlap, we
can turn stop_two_cpus() into a trivial wrapper on top of stop_cpus().

Note: if zalloc_cpumask_var(cpumask, GFP_KERNEL) is not an option, we
can reimplement this function using stop_work_{alloc,free}_one().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/lglock.h  |    5 ---
 kernel/locking/lglock.c |   22 ----------------
 kernel/stop_machine.c   |   64 ++++++++--------------------------------------
 3 files changed, 11 insertions(+), 80 deletions(-)

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index c92ebd1..0081f00 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -52,15 +52,10 @@ struct lglock {
 	static struct lglock name = { .lock = &name ## _lock }
 
 void lg_lock_init(struct lglock *lg, char *name);
-
 void lg_local_lock(struct lglock *lg);
 void lg_local_unlock(struct lglock *lg);
 void lg_local_lock_cpu(struct lglock *lg, int cpu);
 void lg_local_unlock_cpu(struct lglock *lg, int cpu);
-
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2);
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2);
-
 void lg_global_lock(struct lglock *lg);
 void lg_global_unlock(struct lglock *lg);
 
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
index 951cfcd..86ae2ae 100644
--- a/kernel/locking/lglock.c
+++ b/kernel/locking/lglock.c
@@ -60,28 +60,6 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu)
 }
 EXPORT_SYMBOL(lg_local_unlock_cpu);
 
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2)
-{
-	BUG_ON(cpu1 == cpu2);
-
-	/* lock in cpu order, just like lg_global_lock */
-	if (cpu2 < cpu1)
-		swap(cpu1, cpu2);
-
-	preempt_disable();
-	lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
-	arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
-	arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
-}
-
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
-{
-	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
-	arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
-	arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
-	preempt_enable();
-}
-
 void lg_global_lock(struct lglock *lg)
 {
 	int i;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 4b23875..572abc9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -20,7 +20,6 @@
 #include <linux/kallsyms.h>
 #include <linux/smpboot.h>
 #include <linux/atomic.h>
-#include <linux/lglock.h>
 
 /*
  * Structure to determine completion condition and record errors.  May
@@ -97,14 +96,6 @@ static bool stop_work_alloc(const struct cpumask *cpumask, bool wait)
 	return true;
 }
 
-/*
- * Avoids a race between stop_two_cpus and global stop_cpus, where
- * the stoppers could get queued up in reverse order, leading to
- * system deadlock. Using an lglock means stop_two_cpus remains
- * relatively cheap.
- */
-DEFINE_STATIC_LGLOCK(stop_cpus_lock);
-
 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
 {
 	memset(done, 0, sizeof(*done));
@@ -276,50 +267,17 @@ static int multi_cpu_stop(void *data)
  */
 int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
 {
-	struct cpu_stop_done done;
-	struct cpu_stop_work work1, work2;
-	struct multi_stop_data msdata;
-
-	preempt_disable();
-	msdata = (struct multi_stop_data){
-		.fn = fn,
-		.data = arg,
-		.num_threads = 2,
-		.active_cpus = cpumask_of(cpu1),
-	};
-
-	work1 = work2 = (struct cpu_stop_work){
-		.fn = multi_cpu_stop,
-		.arg = &msdata,
-		.done = &done
-	};
-
-	cpu_stop_init_done(&done, 2);
-	set_state(&msdata, MULTI_STOP_PREPARE);
-
-	/*
-	 * If we observe both CPUs active we know _cpu_down() cannot yet have
-	 * queued its stop_machine works and therefore ours will get executed
-	 * first. Or its not either one of our CPUs that's getting unplugged,
-	 * in which case we don't care.
-	 *
-	 * This relies on the stopper workqueues to be FIFO.
-	 */
-	if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
-		preempt_enable();
-		return -ENOENT;
-	}
-
-	lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
-	cpu_stop_queue_work(cpu1, &work1);
-	cpu_stop_queue_work(cpu2, &work2);
-	lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
-
-	preempt_enable();
+	cpumask_var_t cpumask;
+	int ret;
 
-	wait_for_completion(&done.completion);
+	if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+		return -ENOMEM;
 
-	return done.executed ? done.ret : -ENOENT;
+	cpumask_set_cpu(cpu1, cpumask);
+	cpumask_set_cpu(cpu2, cpumask);
+	ret = stop_cpus(cpumask, fn, arg);
+	free_cpumask_var(cpumask);
+	return ret;
 }
 
 /**
@@ -355,7 +313,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 	 * preempted by a stopper which might wait for other stoppers
 	 * to enter @fn which can lead to deadlock.
 	 */
-	lg_global_lock(&stop_cpus_lock);
+	preempt_disable();
 	for_each_cpu(cpu, cpumask) {
 		work = &per_cpu(cpu_stopper.stop_work, cpu);
 		work->fn = fn;
@@ -363,7 +321,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 		work->done = done;
 		cpu_stop_queue_work(cpu, work);
 	}
-	lg_global_unlock(&stop_cpus_lock);
+	preempt_enable();
 }
 
 static int __stop_cpus(const struct cpumask *cpumask,
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [RFC PATCH 6/6] stop_machine: optimize stop_work_alloc()
  2015-06-26  2:14 [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
                   ` (4 preceding siblings ...)
  2015-06-26  2:15 ` [RFC PATCH 5/6] stop_machine: change stop_two_cpus() just use stop_cpu(), kill lg_double_lock/unlock Oleg Nesterov
@ 2015-06-26  2:15 ` Oleg Nesterov
  2015-06-29  8:56   ` Peter Zijlstra
  2015-06-26  2:31 ` [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
  2015-06-26 12:23 ` Peter Zijlstra
  7 siblings, 1 reply; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-26  2:15 UTC (permalink / raw)
  To: Peter Zijlstra, paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds
  Cc: linux-kernel

wait_event()/wake_up_all() in stop_work_alloc/stop_work_free logic
is very suboptimal because of non-exclusive wakeups. So we add the
wait_queue_func_t alloc_wake() helper which wakes the waiter up only
a) if it actually waits for a stop_work in the "freed" cpumask, and
b) only after we already set ->stop_owner = waiter.

So if 2 stop_machine()'s race with each other, the loser will likely
call schedule() only once and we will have a single wakeup.

TODO: we can optimize (and simplify!) this code more. We can remove
stop_work_alloc_one() and fold it into stop_work_alloc(), so that
prepare_to_wait() will be the outer loop. Lets do this later.

TODO: the init_waitqueue_func_entry() code in stop_work_alloc_one()
is really annoying, we need the trivial new __init_wait(wait, func)
helper.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/stop_machine.c |   47 +++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 572abc9..bbfc670 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -63,21 +63,60 @@ static void stop_work_free(const struct cpumask *cpumask)
 
 	for_each_cpu(cpu, cpumask)
 		stop_work_free_one(cpu);
-	wake_up_all(&stop_work_wq);
+	__wake_up(&stop_work_wq, TASK_ALL, 0, (void *)cpumask);
+}
+
+struct alloc_wait {
+	wait_queue_t	wait;
+	int		cpu;
+};
+
+static int alloc_wake(wait_queue_t *wait, unsigned int mode, int sync, void *key)
+{
+	struct alloc_wait *aw = container_of(wait, struct alloc_wait, wait);
+	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, aw->cpu);
+	const struct cpumask *cpumask = key;
+
+	if (!cpumask_test_cpu(aw->cpu, cpumask))
+		return 0;
+	if (cmpxchg(&stopper->stop_owner, NULL, aw->wait.private) != NULL)
+		return 0;
+
+	return autoremove_wake_function(wait, mode, sync, key);
 }
 
 static struct cpu_stop_work *stop_work_alloc_one(int cpu, bool wait)
 {
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+	struct task_struct *me = current;
+	struct alloc_wait aw;
 
-	if (cmpxchg(&stopper->stop_owner, NULL, current) == NULL)
+	if (cmpxchg(&stopper->stop_owner, NULL, me) == NULL)
 		goto done;
 
 	if (!wait)
 		return NULL;
 
-	__wait_event(stop_work_wq,
-		cmpxchg(&stopper->stop_owner, NULL, current) == NULL);
+	/* TODO: add __init_wait(wait, func) helper! */
+	INIT_LIST_HEAD(&aw.wait.task_list);
+	init_waitqueue_func_entry(&aw.wait, alloc_wake);
+	aw.wait.private = me;
+	aw.cpu = cpu;
+	for (;;) {
+		prepare_to_wait(&stop_work_wq, &aw.wait, TASK_UNINTERRUPTIBLE);
+		/*
+		 * This can "falsely" fail if we race with alloc_wake() and
+		 * stopper->stop_owner is already me, in this case schedule()
+		 * won't block and the check below will notice this change.
+		 */
+		if (cmpxchg(&stopper->stop_owner, NULL, me) == NULL)
+			break;
+
+		schedule();
+		if (likely(stopper->stop_owner == me))
+			break;
+	}
+	finish_wait(&stop_work_wq, &aw.wait);
 done:
 	return &stopper->stop_work;
 }
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock
  2015-06-26  2:14 [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
                   ` (5 preceding siblings ...)
  2015-06-26  2:15 ` [RFC PATCH 6/6] stop_machine: optimize stop_work_alloc() Oleg Nesterov
@ 2015-06-26  2:31 ` Oleg Nesterov
  2015-06-26 12:23 ` Peter Zijlstra
  7 siblings, 0 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-26  2:31 UTC (permalink / raw)
  To: Peter Zijlstra, paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds
  Cc: linux-kernel

As always, forgot to mention...

Also. We can turn these stop_work_alloc/stop_work_free into the generic
helpers which (I think) can have more users.

On 06/26, Oleg Nesterov wrote:
>
> On 06/25, Peter Zijlstra wrote:
> >
> > On Tue, Jun 23, 2015 at 07:24:16PM +0200, Oleg Nesterov wrote:
> > >
> > > 	lock_stop_cpus_works(cpumask)
> > > 	{
> > > 		for_each_cpu(cpu, cpumask)
> > > 			mutex_lock(per_cpu(cpu_stopper_task, cpu).work_mutex);
> > > 	}
> > >
> > > 	unlock_stop_cpus_works(cpumask)
> > > 	{
> > > 		for_each_cpu(cpu, cpumask)
> > > 			mutex_lock(...);
> > > 	}
> > >
> > > which should be used instead of stop_cpus_mutex. After this change
> > > stop_two_cpus() can just use stop_cpus().
> >
> > Right, lockdep annotating that will be 'interesting' though.
>
> Sure, and this is too inefficient, this is only to explain what
> I mean.
>
> How about this series? Untested. For review.
>
> > And
> > stop_two_cpus() then has the problem of allocating a cpumask.
>
> Yes, but we can avoid this, see the changelog in 5/6.
>
> > Simpler to
> > let it keep 'abuse' the queueing spinlock in there.
>
> Not sure.
>
> And note that this series kills stop_cpus_mutex, so that multiple
> stop_cpus()'s / stop_machine()'s can run in parallel if cpumask's
> do not overlap.
>
> Note also the changelog in 6/6, we can simplify + optimize this code
> a bit more.
>
> What do you think?
>
> Oleg.
>
>  include/linux/lglock.h  |    5 -
>  kernel/locking/lglock.c |   22 -----
>  kernel/stop_machine.c   |  197 ++++++++++++++++++++++++++++-------------------
>  3 files changed, 119 insertions(+), 105 deletions(-)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock
  2015-06-26  2:14 [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
                   ` (6 preceding siblings ...)
  2015-06-26  2:31 ` [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
@ 2015-06-26 12:23 ` Peter Zijlstra
  2015-06-26 20:46   ` Oleg Nesterov
  7 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2015-06-26 12:23 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds, linux-kernel

On Fri, Jun 26, 2015 at 04:14:55AM +0200, Oleg Nesterov wrote:
> Not sure.
> 
> And note that this series kills stop_cpus_mutex, so that multiple
> stop_cpus()'s / stop_machine()'s can run in parallel if cpumask's
> do not overlap.
> 
> Note also the changelog in 6/6, we can simplify + optimize this code
> a bit more.
> 
> What do you think?

The problem I have with this is that it makes the better operation
(stop_two_cpus) slower while improving the worse operation (stop_cpus).

I would much prefer to keep stop_two_cpus() as proposed with taking two
cpu_stopper::lock instances and replacing the stop_cpu_mutex with a
percpu-rwsem.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock
  2015-06-26 12:23 ` Peter Zijlstra
@ 2015-06-26 20:46   ` Oleg Nesterov
  2015-06-29  4:02     ` Oleg Nesterov
  2015-06-29  8:49     ` Peter Zijlstra
  0 siblings, 2 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-26 20:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds, linux-kernel

On 06/26, Peter Zijlstra wrote:
>
> On Fri, Jun 26, 2015 at 04:14:55AM +0200, Oleg Nesterov wrote:
> > Not sure.
> >
> > And note that this series kills stop_cpus_mutex, so that multiple
> > stop_cpus()'s / stop_machine()'s can run in parallel if cpumask's
> > do not overlap.
> >
> > Note also the changelog in 6/6, we can simplify + optimize this code
> > a bit more.
> >
> > What do you think?
>
> The problem I have with this is that it makes the better operation
> (stop_two_cpus) slower while improving the worse operation (stop_cpus).
>
> I would much prefer to keep stop_two_cpus() as proposed with taking two
> cpu_stopper::lock instances and replacing the stop_cpu_mutex with a
> percpu-rwsem.

OK, lets avoid cpumask in stop_two_cpus,

	int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
	{
		struct multi_stop_data msdata;
		struct cpu_stop_done done;
		struct cpu_stop_work *work1, *work2;

		msdata = (struct multi_stop_data){
			.fn = fn,
			.data = arg,
			.num_threads = 2,
			.active_cpus = cpumask_of(cpu1),
		};

		cpu_stop_init_done(&done, 2);
		set_state(&msdata, MULTI_STOP_PREPARE);

		if (cpu1 > cpu2)
			swap(cpu1, cpu2);
		work1 = stop_work_alloc_one(cpu1, true);
		work2 = stop_work_alloc_one(cpu1, true);

		*work1 = *work2 = (struct cpu_stop_work) {
			.fn = multi_cpu_stop,
			.arg = &msdata,
			.done = &done
		};

		preempt_disable();
		cpu_stop_queue_work(cpu1, work1);
		cpu_stop_queue_work(cpu2, work2);
		preempt_enable();

		wait_for_completion(&done.completion);

		stop_work_free_one(cpu1);
		stop_work_free_one(cpu2);
		wake_up(&stop_work_wq);

		return done.executed ? done.ret : -ENOENT;
	}

2 cmpxchg()'s vs 2 spin_lock()'s. Plus wake_up(), but we can check
waitqueue_active().

Do you think thi will be noticeably slower?

Of course, if it races with another stop_two_cpus/stop_cpus it will
sleep, but in this case we need to wait anyway.


And I don't think that percpu-rwsem instead of stop_cpu_mutex makes
sense. at least I don't understand how can it help. OK, stop_two_cpus()
can use percpu_down_read() to avoid the deadlock with stop_cpus(), but
you still need double-lock... So I don't think this will make it faster,
this will just penalize stop_cpus(). Or I misunderstood.

So I am still not convinced... But probably I am too biased ;)


Btw. I can't understand the cpu_active() checks in stop_two_cpus().
Do we really need them?

Oleg.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock
  2015-06-26 20:46   ` Oleg Nesterov
@ 2015-06-29  4:02     ` Oleg Nesterov
  2015-06-29  8:51       ` Peter Zijlstra
  2015-06-29  8:49     ` Peter Zijlstra
  1 sibling, 1 reply; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-29  4:02 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds, linux-kernel

On 06/26, Oleg Nesterov wrote:
>
> 2 cmpxchg()'s vs 2 spin_lock()'s. Plus wake_up(), but we can check
> waitqueue_active().
>
> Do you think thi will be noticeably slower?
>
> Of course, if it races with another stop_two_cpus/stop_cpus it will
> sleep, but in this case we need to wait anyway.
>
>
> And I don't think that percpu-rwsem instead of stop_cpu_mutex makes
> sense. at least I don't understand how can it help. OK, stop_two_cpus()
> can use percpu_down_read() to avoid the deadlock with stop_cpus(), but
> you still need double-lock... So I don't think this will make it faster,
> this will just penalize stop_cpus(). Or I misunderstood.
>
> So I am still not convinced... But probably I am too biased ;)

Yes... I'll probably try to make v2, this version is overcomplicated
and buggy.


> Btw. I can't understand the cpu_active() checks in stop_two_cpus().
> Do we really need them?

Ah, please ignore.

Yes, we can't rely on stopper->enabled check in cpu_stop_queue_work(),
cpu_stop_signal_done() does not update multi_stop_data->num_threads /
->thread_ack. So we need to ensure that cpu_online() == T for both CPUS
or multi_cpu_stop() can hang.

But we can't use cpu_online() instead, take_cpu_down() can be already
queued.

So this relies on the fact that CPU_DOWN_PREPARE (which removes CPU
from cpu_active_mask) is called before stop_machine(take_cpu_down) and
we do not care that cpu_active() is not stable; if we see cpu_active()
cpu_online() can't change unders us because take_cpu_down() was not
queued.

If we change stop_two_cpus() to use stop_work_alloc_one() it can use
cpu_online(),

	int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
	{
		struct cpu_stop_work *work1, *work2;
		struct cpu_stop_done done;
		struct multi_stop_data msdata = {
			.fn = fn,
			.data = arg,
			.num_threads = 2,
			.active_cpus = cpumask_of(cpu1),
		};

		set_state(&msdata, MULTI_STOP_PREPARE);
		cpu_stop_init_done(&done, 2);

		if (cpu1 > cpu2)
			swap(cpu1, cpu2);

		work1 = stop_work_alloc_one(cpu1, true);
		work2 = stop_work_alloc_one(cpu2, true);
		/* stop_machine() is blocked, cpu can't go away */
		if (cpu_online(cpu1) && cpu_online(cpu2)) {
			work1->fn   = work2->fn   = multi_cpu_stop;
			work1->arg  = work2->arg  = &msdata;
			work1->done = work2->done = &done;

			preempt_disable();
			cpu_stop_queue_work(cpu1, work1);
			cpu_stop_queue_work(cpu2, work2);
			preempt_enable();
			wait_for_completion(&done.completion);
		}

		stop_work_free_one(cpu1);
		stop_work_free_one(cpu2);
		stop_work_wake_up();

		return done.executed ? done.ret : -ENOENT;
	}

Oleg.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock
  2015-06-26 20:46   ` Oleg Nesterov
  2015-06-29  4:02     ` Oleg Nesterov
@ 2015-06-29  8:49     ` Peter Zijlstra
  2015-06-30  1:03       ` Oleg Nesterov
  1 sibling, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2015-06-29  8:49 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds, linux-kernel

On Fri, Jun 26, 2015 at 10:46:12PM +0200, Oleg Nesterov wrote:
> > I would much prefer to keep stop_two_cpus() as proposed with taking two
> > cpu_stopper::lock instances and replacing the stop_cpu_mutex with a
> > percpu-rwsem.
> 
> OK, lets avoid cpumask in stop_two_cpus,
> 
> 	int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
> 	{
> 		struct multi_stop_data msdata;
> 		struct cpu_stop_done done;
> 		struct cpu_stop_work *work1, *work2;
> 
> 		msdata = (struct multi_stop_data){
> 			.fn = fn,
> 			.data = arg,
> 			.num_threads = 2,
> 			.active_cpus = cpumask_of(cpu1),
> 		};
> 
> 		cpu_stop_init_done(&done, 2);
> 		set_state(&msdata, MULTI_STOP_PREPARE);
> 
> 		if (cpu1 > cpu2)
> 			swap(cpu1, cpu2);
> 		work1 = stop_work_alloc_one(cpu1, true);
> 		work2 = stop_work_alloc_one(cpu1, true);
> 
> 		*work1 = *work2 = (struct cpu_stop_work) {
> 			.fn = multi_cpu_stop,
> 			.arg = &msdata,
> 			.done = &done
> 		};
> 
> 		preempt_disable();
> 		cpu_stop_queue_work(cpu1, work1);
> 		cpu_stop_queue_work(cpu2, work2);
> 		preempt_enable();
> 
> 		wait_for_completion(&done.completion);
> 
> 		stop_work_free_one(cpu1);
> 		stop_work_free_one(cpu2);
> 		wake_up(&stop_work_wq);
> 
> 		return done.executed ? done.ret : -ENOENT;
> 	}
> 
> 2 cmpxchg()'s vs 2 spin_lock()'s. Plus wake_up(), but we can check
> waitqueue_active().
> 
> Do you think thi will be noticeably slower?

Nah, I suppose not. Either we wait on the 'mutex' for access to the work
or we wait on the completion.

> So I am still not convinced... But probably I am too biased ;)

I'm just a tad worried, I don't want to make the relatively cheap
operation of stop_two_cpus() more expensive to the benefit of
stop_cpus().

> Btw. I can't understand the cpu_active() checks in stop_two_cpus().
> Do we really need them?

The comment is misleading and part of an earlier attempt to avoid the
deadlock I think, but I suspect we still need them. Either that or I
need to wake up more :-)

I cannot see how multi_cpu_stop() handles offline cpus, afaict it will
spin-wait for the other cpu to join its state indefinitely. So we need
to bail early if either CPU is unavailable.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock
  2015-06-29  4:02     ` Oleg Nesterov
@ 2015-06-29  8:51       ` Peter Zijlstra
  2015-06-30  1:08         ` Oleg Nesterov
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2015-06-29  8:51 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds, linux-kernel

On Mon, Jun 29, 2015 at 06:02:51AM +0200, Oleg Nesterov wrote:
> > Btw. I can't understand the cpu_active() checks in stop_two_cpus().
> > Do we really need them?
> 
> Ah, please ignore.
> 
> Yes, we can't rely on stopper->enabled check in cpu_stop_queue_work(),
> cpu_stop_signal_done() does not update multi_stop_data->num_threads /
> ->thread_ack. So we need to ensure that cpu_online() == T for both CPUS
> or multi_cpu_stop() can hang.
> 
> But we can't use cpu_online() instead, take_cpu_down() can be already
> queued.
> 
> So this relies on the fact that CPU_DOWN_PREPARE (which removes CPU
> from cpu_active_mask) is called before stop_machine(take_cpu_down) and
> we do not care that cpu_active() is not stable; if we see cpu_active()
> cpu_online() can't change unders us because take_cpu_down() was not
> queued.

Just so.

> If we change stop_two_cpus() to use stop_work_alloc_one() it can use
> cpu_online(),

So the one user of this actually needs cpu_active(); we do not want to
go move tasks to an inactive cpu.

So if you change this to cpu_online() we need to audit the user is doing
the stricter test.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC PATCH 6/6] stop_machine: optimize stop_work_alloc()
  2015-06-26  2:15 ` [RFC PATCH 6/6] stop_machine: optimize stop_work_alloc() Oleg Nesterov
@ 2015-06-29  8:56   ` Peter Zijlstra
  0 siblings, 0 replies; 16+ messages in thread
From: Peter Zijlstra @ 2015-06-29  8:56 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds, linux-kernel

On Fri, Jun 26, 2015 at 04:15:26AM +0200, Oleg Nesterov wrote:
> wait_event()/wake_up_all() in stop_work_alloc/stop_work_free logic
> is very suboptimal because of non-exclusive wakeups. So we add the
> wait_queue_func_t alloc_wake() helper which wakes the waiter up only
> a) if it actually waits for a stop_work in the "freed" cpumask, and
> b) only after we already set ->stop_owner = waiter.
> 
> So if 2 stop_machine()'s race with each other, the loser will likely
> call schedule() only once and we will have a single wakeup.

So I think I can beat lockdep into submission (ugly but still) do we
want to use an actual per-cpu mutex instead?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock
  2015-06-29  8:49     ` Peter Zijlstra
@ 2015-06-30  1:03       ` Oleg Nesterov
  0 siblings, 0 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-30  1:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds, linux-kernel

On 06/29, Peter Zijlstra wrote:
>
> On Fri, Jun 26, 2015 at 10:46:12PM +0200, Oleg Nesterov wrote:
>
> > So I am still not convinced... But probably I am too biased ;)
>
> I'm just a tad worried, I don't want to make the relatively cheap
> operation of stop_two_cpus() more expensive to the benefit of
> stop_cpus().

OK. And I have another (simple) idea...

Never say tomorrow, but I will try to re-check and make the patch
tomorrow ;)

But let me send some cleanups first. Plus I believe I found another
stop_machine bug, see the last patch. So I hope these changes make
sense in any case.

Oleg.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock
  2015-06-29  8:51       ` Peter Zijlstra
@ 2015-06-30  1:08         ` Oleg Nesterov
  0 siblings, 0 replies; 16+ messages in thread
From: Oleg Nesterov @ 2015-06-30  1:08 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: paulmck, tj, mingo, der.herr, dave, riel, viro, torvalds, linux-kernel

On 06/29, Peter Zijlstra wrote:
>
> On Mon, Jun 29, 2015 at 06:02:51AM +0200, Oleg Nesterov wrote:
>
> > If we change stop_two_cpus() to use stop_work_alloc_one() it can use
> > cpu_online(),
>
> So the one user of this actually needs cpu_active(); we do not want to
> go move tasks to an inactive cpu.
>
> So if you change this to cpu_online() we need to audit the user is doing
> the stricter test.

Hmm. But the user (migrate_swap_stop) should check cpu_active() anyway?

The cpu_active() checks in stop_two_cpus() can only help to ensure that
multi_cpu_stop() won't hang. CPU_DOWN_PREPARE can deactivate either CPU
right after the check?

Or stop_two_cpus() needs get_online_cpus(). Or I missed something.

Oleg.


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-06-30  1:09 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-26  2:14 [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
2015-06-26  2:15 ` [RFC PATCH 1/6] stop_machine: move cpu_stopper_task and stop_cpus_work into struct cpu_stopper Oleg Nesterov
2015-06-26  2:15 ` [RFC PATCH 2/6] stop_machine: don't do for_each_cpu() twice in queue_stop_cpus_work() Oleg Nesterov
2015-06-26  2:15 ` [RFC PATCH 3/6] stop_machine: introduce stop_work_alloc() and stop_work_free() Oleg Nesterov
2015-06-26  2:15 ` [RFC PATCH 4/6] stop_machine: kill stop_cpus_mutex Oleg Nesterov
2015-06-26  2:15 ` [RFC PATCH 5/6] stop_machine: change stop_two_cpus() just use stop_cpu(), kill lg_double_lock/unlock Oleg Nesterov
2015-06-26  2:15 ` [RFC PATCH 6/6] stop_machine: optimize stop_work_alloc() Oleg Nesterov
2015-06-29  8:56   ` Peter Zijlstra
2015-06-26  2:31 ` [RFC PATCH 0/6] stop_machine: kill stop_cpus_mutex and stop_cpus_lock Oleg Nesterov
2015-06-26 12:23 ` Peter Zijlstra
2015-06-26 20:46   ` Oleg Nesterov
2015-06-29  4:02     ` Oleg Nesterov
2015-06-29  8:51       ` Peter Zijlstra
2015-06-30  1:08         ` Oleg Nesterov
2015-06-29  8:49     ` Peter Zijlstra
2015-06-30  1:03       ` Oleg Nesterov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.