linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] stop_machine enhancements and simplifications
@ 2008-07-08  7:50 Rusty Russell
  2008-07-08  7:56 ` [PATCH 1/3] stop_machine: add ALL_CPUS option Rusty Russell
                   ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Rusty Russell @ 2008-07-08  7:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Jason Baron, Mathieu Desnoyers, Max Krasnyansky

Hi all,

   Here are the three stop_machine patches I've queued for the next merge
window.  The first two are pretty well tested via linux-next, the last is
recent but fairly straightforward.

   I'm assuming that Jason and/or Mathieu have need for the ALL_CPUS mod,
but it can be removed by the last of these patches (left in to reduce
transition breakage).

Feedback welcome as always,
Rusty.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/3] stop_machine: add ALL_CPUS option
  2008-07-08  7:50 [PATCH 0/3] stop_machine enhancements and simplifications Rusty Russell
@ 2008-07-08  7:56 ` Rusty Russell
  2008-07-08  7:56   ` [PATCH 2/3] stop_machine: simplify Rusty Russell
  2008-07-08 16:21 ` [PATCH 0/3] stop_machine enhancements and simplifications Christian Borntraeger
  2008-07-08 20:10 ` Jason Baron
  2 siblings, 1 reply; 23+ messages in thread
From: Rusty Russell @ 2008-07-08  7:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jason Baron, Mathieu Desnoyers, Max Krasnyansky, Hidetoshi Seto

From: Jason Baron <jbaron@redhat.com>

-allow stop_mahcine_run() to call a function on all cpus. Calling
 stop_machine_run() with a 'ALL_CPUS' invokes this new behavior.
 stop_machine_run() proceeds as normal until the calling cpu has
 invoked 'fn'. Then, we tell all the other cpus to call 'fn'.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
CC: Adrian Bunk <bunk@stusta.de>
CC: Andi Kleen <andi@firstfloor.org>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Christoph Hellwig <hch@infradead.org>
CC: mingo@elte.hu
CC: akpm@osdl.org
---
 include/linux/stop_machine.h |    8 +++++++-
 kernel/stop_machine.c        |   32 +++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 5bfc553..18af011 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -8,11 +8,17 @@
 #include <asm/system.h>
 
 #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+
+#define ALL_CPUS ~0U
+
 /**
  * stop_machine_run: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn()
- * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS.
+ * @cpu: if @cpu == n, run @fn() on cpu n
+ *       if @cpu == NR_CPUS, run @fn() on any cpu
+ *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
+ *       concurrently on all the other cpus
  *
  * Description: This causes a thread to be scheduled on every other cpu,
  * each of which disables interrupts, and finally interrupts are disabled
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 0101aee..bab5d2a 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -22,9 +22,17 @@ enum stopmachine_state {
 	STOPMACHINE_WAIT,
 	STOPMACHINE_PREPARE,
 	STOPMACHINE_DISABLE_IRQ,
+	STOPMACHINE_RUN,
 	STOPMACHINE_EXIT,
 };
 
+struct stop_machine_data {
+	int (*fn)(void *);
+	void *data;
+	struct completion done;
+	int run_all;
+} smdata;
+
 static enum stopmachine_state stopmachine_state;
 static unsigned int stopmachine_num_threads;
 static atomic_t stopmachine_thread_ack;
@@ -33,6 +41,7 @@ static int stopmachine(void *cpu)
 {
 	int irqs_disabled = 0;
 	int prepared = 0;
+	int ran = 0;
 
 	set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
 
@@ -57,6 +66,11 @@ static int stopmachine(void *cpu)
 			prepared = 1;
 			smp_mb(); /* Must read state first. */
 			atomic_inc(&stopmachine_thread_ack);
+		} else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
+			smdata.fn(smdata.data);
+			ran = 1;
+			smp_mb(); /* Must read state first. */
+			atomic_inc(&stopmachine_thread_ack);
 		}
 		/* Yield in first stage: migration threads need to
 		 * help our sisters onto their CPUs. */
@@ -134,11 +148,10 @@ static void restart_machine(void)
 	preempt_enable_no_resched();
 }
 
-struct stop_machine_data {
-	int (*fn)(void *);
-	void *data;
-	struct completion done;
-};
+static void run_other_cpus(void)
+{
+	stopmachine_set_state(STOPMACHINE_RUN);
+}
 
 static int do_stop(void *_smdata)
 {
@@ -148,6 +161,8 @@ static int do_stop(void *_smdata)
 	ret = stop_machine();
 	if (ret == 0) {
 		ret = smdata->fn(smdata->data);
+		if (smdata->run_all)
+			run_other_cpus();
 		restart_machine();
 	}
 
@@ -171,14 +186,17 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
 	struct stop_machine_data smdata;
 	struct task_struct *p;
 
+	mutex_lock(&stopmachine_mutex);
+
 	smdata.fn = fn;
 	smdata.data = data;
+	smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
 	init_completion(&smdata.done);
 
-	mutex_lock(&stopmachine_mutex);
+	smp_wmb(); /* make sure other cpus see smdata updates */
 
 	/* If they don't care which CPU fn runs on, bind to any online one. */
-	if (cpu == NR_CPUS)
+	if (cpu == NR_CPUS || cpu == ALL_CPUS)
 		cpu = raw_smp_processor_id();
 
 	p = kthread_create(do_stop, &smdata, "kstopmachine");

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/3] stop_machine: simplify
  2008-07-08  7:56 ` [PATCH 1/3] stop_machine: add ALL_CPUS option Rusty Russell
@ 2008-07-08  7:56   ` Rusty Russell
  2008-07-08  8:01     ` [PATCH 3/3] stop_machine: use cpu mask rather than magic numbers Rusty Russell
                       ` (4 more replies)
  0 siblings, 5 replies; 23+ messages in thread
From: Rusty Russell @ 2008-07-08  7:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jason Baron, Mathieu Desnoyers, Max Krasnyansky, Hidetoshi Seto

stop_machine creates a kthread which creates kernel threads.  We can
create those threads directly and simplify things a little.  Some care
must be taken with CPU hotunplug, which has special needs, but that code
seems more robust than it was in the past.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/stop_machine.h |   12 -
 kernel/cpu.c                 |   13 -
 kernel/stop_machine.c        |  299 ++++++++++++++++++-------------------------
 3 files changed, 135 insertions(+), 189 deletions(-)

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -17,13 +17,12 @@
  * @data: the data ptr for the @fn()
  * @cpu: if @cpu == n, run @fn() on cpu n
  *       if @cpu == NR_CPUS, run @fn() on any cpu
- *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
- *       concurrently on all the other cpus
+ *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
  *
- * Description: This causes a thread to be scheduled on every other cpu,
- * each of which disables interrupts, and finally interrupts are disabled
- * on the current CPU.  The result is that noone is holding a spinlock
- * or inside any other preempt-disabled region when @fn() runs.
+ * Description: This causes a thread to be scheduled on every cpu,
+ * each of which disables interrupts.  The result is that noone is
+ * holding a spinlock or inside any other preempt-disabled region when
+ * @fn() runs.
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
@@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), 
  * @data: the data ptr for the @fn
  * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
  *
- * Description: This is a special version of the above, which returns the
- * thread which has run @fn(): kthread_stop will return the return value
- * of @fn().  Used by hotplug cpu.
+ * Description: This is a special version of the above, which assumes cpus
+ * won't come or go while it's being called.  Used by hotplug cpu.
  */
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
-				       unsigned int cpu);
-
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
 #else
 
 static inline int stop_machine_run(int (*fn)(void *), void *data,
diff --git a/kernel/cpu.c b/kernel/cpu.c
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -192,7 +192,6 @@ static int __ref _cpu_down(unsigned int 
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
 	int err, nr_calls = 0;
-	struct task_struct *p;
 	cpumask_t old_allowed, tmp;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
@@ -226,19 +225,15 @@ static int __ref _cpu_down(unsigned int 
 	cpu_clear(cpu, tmp);
 	set_cpus_allowed_ptr(current, &tmp);
 
-	p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
+	err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
 
-	if (IS_ERR(p) || cpu_online(cpu)) {
+	if (err || cpu_online(cpu)) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 
-		if (IS_ERR(p)) {
-			err = PTR_ERR(p);
-			goto out_allowed;
-		}
-		goto out_thread;
+		goto out_allowed;
 	}
 
 	/* Wait for it to sleep (leaving idle task). */
@@ -255,8 +250,6 @@ static int __ref _cpu_down(unsigned int 
 
 	check_for_tasks(cpu);
 
-out_thread:
-	err = kthread_stop(p);
 out_allowed:
 	set_cpus_allowed_ptr(current, &old_allowed);
 out_release:
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -1,4 +1,4 @@
-/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
+/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
  * GPL v2 and any later version.
  */
 #include <linux/cpu.h>
@@ -13,219 +13,176 @@
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
-/* Since we effect priority and affinity (both of which are visible
- * to, and settable by outside processes) we do indirection via a
- * kthread. */
-
-/* Thread to stop each CPU in user context. */
+/* This controls the threads on each CPU. */
 enum stopmachine_state {
-	STOPMACHINE_WAIT,
+	/* Dummy starting state for thread. */
+	STOPMACHINE_NONE,
+	/* Awaiting everyone to be scheduled. */
 	STOPMACHINE_PREPARE,
+	/* Disable interrupts. */
 	STOPMACHINE_DISABLE_IRQ,
+	/* Run the function */
 	STOPMACHINE_RUN,
+	/* Exit */
 	STOPMACHINE_EXIT,
 };
+static enum stopmachine_state state;
 
 struct stop_machine_data {
 	int (*fn)(void *);
 	void *data;
-	struct completion done;
-	int run_all;
-} smdata;
+	int fnret;
+};
 
-static enum stopmachine_state stopmachine_state;
-static unsigned int stopmachine_num_threads;
-static atomic_t stopmachine_thread_ack;
+/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
+static unsigned int num_threads;
+static atomic_t thread_ack;
+static struct completion finished;
+static DEFINE_MUTEX(lock);
 
-static int stopmachine(void *cpu)
+static void set_state(enum stopmachine_state newstate)
 {
-	int irqs_disabled = 0;
-	int prepared = 0;
-	int ran = 0;
+	/* Reset ack counter. */
+	atomic_set(&thread_ack, num_threads);
+	smp_wmb();
+	state = newstate;
+}
 
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
+/* Last one to ack a state moves to the next state. */
+static void ack_state(void)
+{
+	if (atomic_dec_and_test(&thread_ack)) {
+		/* If we're the last one to ack the EXIT, we're finished. */
+		if (state == STOPMACHINE_EXIT)
+			complete(&finished);
+		else
+			set_state(state + 1);
+	}
+}
 
-	/* Ack: we are alive */
-	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
-	atomic_inc(&stopmachine_thread_ack);
+/* This is the actual thread which stops the CPU.  It exits by itself rather
+ * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
+static int stop_cpu(struct stop_machine_data *smdata)
+{
+	enum stopmachine_state curstate = STOPMACHINE_NONE;
+	int uninitialized_var(ret);
 
 	/* Simple state machine */
-	while (stopmachine_state != STOPMACHINE_EXIT) {
-		if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
-		    && !irqs_disabled) {
-			local_irq_disable();
-			hard_irq_disable();
-			irqs_disabled = 1;
-			/* Ack: irqs disabled. */
-			smp_mb(); /* Must read state first. */
-			atomic_inc(&stopmachine_thread_ack);
-		} else if (stopmachine_state == STOPMACHINE_PREPARE
-			   && !prepared) {
-			/* Everyone is in place, hold CPU. */
-			preempt_disable();
-			prepared = 1;
-			smp_mb(); /* Must read state first. */
-			atomic_inc(&stopmachine_thread_ack);
-		} else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
-			smdata.fn(smdata.data);
-			ran = 1;
-			smp_mb(); /* Must read state first. */
-			atomic_inc(&stopmachine_thread_ack);
+	do {
+		/* Chill out and ensure we re-read stopmachine_state. */
+		cpu_relax();
+		if (state != curstate) {
+			curstate = state;
+			switch (curstate) {
+			case STOPMACHINE_DISABLE_IRQ:
+				local_irq_disable();
+				hard_irq_disable();
+				break;
+			case STOPMACHINE_RUN:
+				/* |= allows error detection if functions on
+				 * multiple CPUs. */
+				smdata->fnret |= smdata->fn(smdata->data);
+				break;
+			default:
+				break;
+			}
+			ack_state();
 		}
-		/* Yield in first stage: migration threads need to
-		 * help our sisters onto their CPUs. */
-		if (!prepared && !irqs_disabled)
-			yield();
-		cpu_relax();
-	}
+	} while (curstate != STOPMACHINE_EXIT);
 
-	/* Ack: we are exiting. */
-	smp_mb(); /* Must read state first. */
-	atomic_inc(&stopmachine_thread_ack);
+	local_irq_enable();
+	do_exit(0);
+}
 
-	if (irqs_disabled)
-		local_irq_enable();
-	if (prepared)
-		preempt_enable();
-
+/* Callback for CPUs which aren't supposed to do anything. */
+static int chill(void *unused)
+{
 	return 0;
 }
 
-/* Change the thread state */
-static void stopmachine_set_state(enum stopmachine_state state)
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 {
-	atomic_set(&stopmachine_thread_ack, 0);
-	smp_wmb();
-	stopmachine_state = state;
-	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
-		cpu_relax();
-}
+	int i, err;
+	struct stop_machine_data active, idle;
+	struct task_struct **threads;
 
-static int stop_machine(void)
-{
-	int i, ret = 0;
+	active.fn = fn;
+	active.data = data;
+	active.fnret = 0;
+	idle.fn = chill;
+	idle.data = NULL;
 
-	atomic_set(&stopmachine_thread_ack, 0);
-	stopmachine_num_threads = 0;
-	stopmachine_state = STOPMACHINE_WAIT;
+	/* If they don't care which cpu fn runs on, just pick one. */
+	if (cpu == NR_CPUS)
+		cpu = any_online_cpu(cpu_online_map);
+
+	/* This could be too big for stack on large machines. */
+	threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
+	if (!threads)
+		return -ENOMEM;
+
+	/* Set up initial state. */
+	mutex_lock(&lock);
+	init_completion(&finished);
+	num_threads = num_online_cpus();
+	set_state(STOPMACHINE_PREPARE);
 
 	for_each_online_cpu(i) {
-		if (i == raw_smp_processor_id())
-			continue;
-		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
-		if (ret < 0)
-			break;
-		stopmachine_num_threads++;
+		struct stop_machine_data *smdata;
+		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+		if (cpu == ALL_CPUS || i == cpu)
+			smdata = &active;
+		else
+			smdata = &idle;
+
+		threads[i] = kthread_create(stop_cpu, smdata, "kstop%u", i);
+		if (IS_ERR(threads[i])) {
+			err = PTR_ERR(threads[i]);
+			threads[i] = NULL;
+			goto kill_threads;
+		}
+
+		/* Place it onto correct cpu. */
+		kthread_bind(threads[i], i);
+
+		/* Make it highest prio. */
+		if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
+			BUG();
 	}
 
-	/* Wait for them all to come to life. */
-	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
-		yield();
-		cpu_relax();
-	}
+	/* We've created all the threads.  Wake them all: hold this CPU so one
+	 * doesn't hit this CPU until we're ready. */
+	cpu = get_cpu();
+	for_each_online_cpu(i)
+		wake_up_process(threads[i]);
 
-	/* If some failed, kill them all. */
-	if (ret < 0) {
-		stopmachine_set_state(STOPMACHINE_EXIT);
-		return ret;
-	}
+	/* This will release the thread on our CPU. */
+	put_cpu();
+	wait_for_completion(&finished);
+	mutex_unlock(&lock);
 
-	/* Now they are all started, make them hold the CPUs, ready. */
-	preempt_disable();
-	stopmachine_set_state(STOPMACHINE_PREPARE);
+	kfree(threads);
 
-	/* Make them disable irqs. */
-	local_irq_disable();
-	hard_irq_disable();
-	stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
+	return active.fnret;
 
-	return 0;
-}
+kill_threads:
+	for_each_online_cpu(i)
+		if (threads[i])
+			kthread_stop(threads[i]);
+	mutex_unlock(&lock);
 
-static void restart_machine(void)
-{
-	stopmachine_set_state(STOPMACHINE_EXIT);
-	local_irq_enable();
-	preempt_enable_no_resched();
-}
-
-static void run_other_cpus(void)
-{
-	stopmachine_set_state(STOPMACHINE_RUN);
-}
-
-static int do_stop(void *_smdata)
-{
-	struct stop_machine_data *smdata = _smdata;
-	int ret;
-
-	ret = stop_machine();
-	if (ret == 0) {
-		ret = smdata->fn(smdata->data);
-		if (smdata->run_all)
-			run_other_cpus();
-		restart_machine();
-	}
-
-	/* We're done: you can kthread_stop us now */
-	complete(&smdata->done);
-
-	/* Wait for kthread_stop */
-	set_current_state(TASK_INTERRUPTIBLE);
-	while (!kthread_should_stop()) {
-		schedule();
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-	__set_current_state(TASK_RUNNING);
-	return ret;
-}
-
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
-				       unsigned int cpu)
-{
-	static DEFINE_MUTEX(stopmachine_mutex);
-	struct stop_machine_data smdata;
-	struct task_struct *p;
-
-	mutex_lock(&stopmachine_mutex);
-
-	smdata.fn = fn;
-	smdata.data = data;
-	smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
-	init_completion(&smdata.done);
-
-	smp_wmb(); /* make sure other cpus see smdata updates */
-
-	/* If they don't care which CPU fn runs on, bind to any online one. */
-	if (cpu == NR_CPUS || cpu == ALL_CPUS)
-		cpu = raw_smp_processor_id();
-
-	p = kthread_create(do_stop, &smdata, "kstopmachine");
-	if (!IS_ERR(p)) {
-		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-
-		/* One high-prio thread per cpu.  We'll do this one. */
-		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
-		kthread_bind(p, cpu);
-		wake_up_process(p);
-		wait_for_completion(&smdata.done);
-	}
-	mutex_unlock(&stopmachine_mutex);
-	return p;
+	kfree(threads);
+	return err;
 }
 
 int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 {
-	struct task_struct *p;
 	int ret;
 
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
-	p = __stop_machine_run(fn, data, cpu);
-	if (!IS_ERR(p))
-		ret = kthread_stop(p);
-	else
-		ret = PTR_ERR(p);
+	ret = __stop_machine_run(fn, data, cpu);
 	put_online_cpus();
 
 	return ret;

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 3/3] stop_machine: use cpu mask rather than magic numbers
  2008-07-08  7:56   ` [PATCH 2/3] stop_machine: simplify Rusty Russell
@ 2008-07-08  8:01     ` Rusty Russell
  2008-07-08 11:44     ` [PATCH 2/3] stop_machine: simplify Akinobu Mita
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 23+ messages in thread
From: Rusty Russell @ 2008-07-08  8:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jason Baron, Mathieu Desnoyers, Max Krasnyansky, Hidetoshi Seto

Instead of a "cpu" arg with magic values NR_CPUS (any cpu) and ~0 (all
cpus), pass a cpumask_t.  Allow NULL for the common case (where we
don't care which CPU the function is run on): temporary cpumask_t's
are usually considered bad for stack space.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff -r 277c5fb41d25 arch/s390/kernel/kprobes.c
--- a/arch/s390/kernel/kprobes.c	Tue Jul 08 12:57:33 2008 +1000
+++ b/arch/s390/kernel/kprobes.c	Tue Jul 08 17:31:41 2008 +1000
@@ -199,7 +199,7 @@ void __kprobes arch_arm_kprobe(struct kp
 	args.new = BREAKPOINT_INSTRUCTION;
 
 	kcb->kprobe_status = KPROBE_SWAP_INST;
-	stop_machine_run(swap_instruction, &args, NR_CPUS);
+	stop_machine_run(swap_instruction, &args, NULL);
 	kcb->kprobe_status = status;
 }
 
@@ -214,7 +214,7 @@ void __kprobes arch_disarm_kprobe(struct
 	args.new = p->opcode;
 
 	kcb->kprobe_status = KPROBE_SWAP_INST;
-	stop_machine_run(swap_instruction, &args, NR_CPUS);
+	stop_machine_run(swap_instruction, &args, NULL);
 	kcb->kprobe_status = status;
 }
 
diff -r 277c5fb41d25 drivers/char/hw_random/intel-rng.c
--- a/drivers/char/hw_random/intel-rng.c	Tue Jul 08 12:57:33 2008 +1000
+++ b/drivers/char/hw_random/intel-rng.c	Tue Jul 08 17:31:41 2008 +1000
@@ -368,7 +368,7 @@ static int __init mod_init(void)
 	 * Use stop_machine_run because IPIs can be blocked by disabling
 	 * interrupts.
 	 */
-	err = stop_machine_run(intel_rng_hw_init, intel_rng_hw, NR_CPUS);
+	err = stop_machine_run(intel_rng_hw_init, intel_rng_hw, NULL);
 	pci_dev_put(dev);
 	iounmap(intel_rng_hw->mem);
 	kfree(intel_rng_hw);
diff -r 277c5fb41d25 include/linux/stop_machine.h
--- a/include/linux/stop_machine.h	Tue Jul 08 12:57:33 2008 +1000
+++ b/include/linux/stop_machine.h	Tue Jul 08 17:31:41 2008 +1000
@@ -5,19 +5,19 @@
    (and more).  So the "read" side to such a lock is anything which
    diables preeempt. */
 #include <linux/cpu.h>
+#include <linux/cpumask.h>
 #include <asm/system.h>
 
 #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
 
-#define ALL_CPUS ~0U
+/* Deprecated, but useful for transition. */
+#define ALL_CPUS CPU_MASK_ALL_PTR
 
 /**
  * stop_machine_run: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn()
- * @cpu: if @cpu == n, run @fn() on cpu n
- *       if @cpu == NR_CPUS, run @fn() on any cpu
- *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
  *
  * Description: This causes a thread to be scheduled on every cpu,
  * each of which disables interrupts.  The result is that noone is
@@ -26,22 +26,22 @@
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus);
 
 /**
  * __stop_machine_run: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn
- * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
  *
  * Description: This is a special version of the above, which assumes cpus
  * won't come or go while it's being called.  Used by hotplug cpu.
  */
-int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int __stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus);
 #else
 
 static inline int stop_machine_run(int (*fn)(void *), void *data,
-				   unsigned int cpu)
+				   const cpumask_t *cpus)
 {
 	int ret;
 	local_irq_disable();
diff -r 277c5fb41d25 kernel/cpu.c
--- a/kernel/cpu.c	Tue Jul 08 12:57:33 2008 +1000
+++ b/kernel/cpu.c	Tue Jul 08 17:31:41 2008 +1000
@@ -224,8 +224,9 @@ static int __ref _cpu_down(unsigned int 
 	cpus_setall(tmp);
 	cpu_clear(cpu, tmp);
 	set_cpus_allowed_ptr(current, &tmp);
+	tmp = cpumask_of_cpu(cpu);
 
-	err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
+	err = __stop_machine_run(take_cpu_down, &tcd_param, &tmp);
 
 	if (err || cpu_online(cpu)) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
diff -r 277c5fb41d25 kernel/module.c
--- a/kernel/module.c	Tue Jul 08 12:57:33 2008 +1000
+++ b/kernel/module.c	Tue Jul 08 17:31:41 2008 +1000
@@ -689,7 +689,7 @@ static int try_stop_module(struct module
 {
 	struct stopref sref = { mod, flags, forced };
 
-	return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
+	return stop_machine_run(__try_stop_module, &sref, NULL);
 }
 
 unsigned int module_refcount(struct module *mod)
@@ -1421,7 +1421,7 @@ static void free_module(struct module *m
 static void free_module(struct module *mod)
 {
 	/* Delete from various lists */
-	stop_machine_run(__unlink_module, mod, NR_CPUS);
+	stop_machine_run(__unlink_module, mod, NULL);
 	remove_notes_attrs(mod);
 	remove_sect_attrs(mod);
 	mod_kobject_remove(mod);
@@ -2189,7 +2189,7 @@ static struct module *load_module(void _
 	/* Now sew it into the lists so we can get lockdep and oops
          * info during argument parsing.  Noone should access us, since
          * strong_try_module_get() will fail. */
-	stop_machine_run(__link_module, mod, NR_CPUS);
+	stop_machine_run(__link_module, mod, NULL);
 
 	/* Size of section 0 is 0, so this works well if no params */
 	err = parse_args(mod->name, mod->args,
@@ -2223,7 +2223,7 @@ static struct module *load_module(void _
 	return mod;
 
  unlink:
-	stop_machine_run(__unlink_module, mod, NR_CPUS);
+	stop_machine_run(__unlink_module, mod, NULL);
 	module_arch_cleanup(mod);
  cleanup:
 	kobject_del(&mod->mkobj.kobj);
diff -r 277c5fb41d25 kernel/stop_machine.c
--- a/kernel/stop_machine.c	Tue Jul 08 12:57:33 2008 +1000
+++ b/kernel/stop_machine.c	Tue Jul 08 17:31:41 2008 +1000
@@ -100,7 +100,7 @@ static int chill(void *unused)
 	return 0;
 }
 
-int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int __stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus)
 {
 	int i, err;
 	struct stop_machine_data active, idle;
@@ -111,10 +111,6 @@ int __stop_machine_run(int (*fn)(void *)
 	active.fnret = 0;
 	idle.fn = chill;
 	idle.data = NULL;
-
-	/* If they don't care which cpu fn runs on, just pick one. */
-	if (cpu == NR_CPUS)
-		cpu = any_online_cpu(cpu_online_map);
 
 	/* This could be too big for stack on large machines. */
 	threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
@@ -128,13 +124,16 @@ int __stop_machine_run(int (*fn)(void *)
 	set_state(STOPMACHINE_PREPARE);
 
 	for_each_online_cpu(i) {
-		struct stop_machine_data *smdata;
+		struct stop_machine_data *smdata = &idle;
 		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
-		if (cpu == ALL_CPUS || i == cpu)
-			smdata = &active;
-		else
-			smdata = &idle;
+		if (!cpus) {
+			if (i == first_cpu(cpu_online_map))
+				smdata = &active;
+		} else {
+			if (cpu_isset(i, *cpus))
+				smdata = &active;
+		}
 
 		threads[i] = kthread_create(stop_cpu, smdata, "kstop%u", i);
 		if (IS_ERR(threads[i])) {
@@ -153,7 +152,7 @@ int __stop_machine_run(int (*fn)(void *)
 
 	/* We've created all the threads.  Wake them all: hold this CPU so one
 	 * doesn't hit this CPU until we're ready. */
-	cpu = get_cpu();
+	get_cpu();
 	for_each_online_cpu(i)
 		wake_up_process(threads[i]);
 
@@ -176,13 +175,13 @@ kill_threads:
 	return err;
 }
 
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus)
 {
 	int ret;
 
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
-	ret = __stop_machine_run(fn, data, cpu);
+	ret = __stop_machine_run(fn, data, cpus);
 	put_online_cpus();
 
 	return ret;
diff -r 277c5fb41d25 mm/page_alloc.c
--- a/mm/page_alloc.c	Tue Jul 08 12:57:33 2008 +1000
+++ b/mm/page_alloc.c	Tue Jul 08 17:31:41 2008 +1000
@@ -2356,7 +2356,7 @@ void build_all_zonelists(void)
 	} else {
 		/* we have to stop all cpus to guarantee there is no user
 		   of zonelist */
-		stop_machine_run(__build_all_zonelists, NULL, NR_CPUS);
+		stop_machine_run(__build_all_zonelists, NULL, NULL);
 		/* cpuset refresh routine should be here */
 	}
 	vm_total_pages = nr_free_pagecache_pages();

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-08  7:56   ` [PATCH 2/3] stop_machine: simplify Rusty Russell
  2008-07-08  8:01     ` [PATCH 3/3] stop_machine: use cpu mask rather than magic numbers Rusty Russell
@ 2008-07-08 11:44     ` Akinobu Mita
  2008-07-08 13:11       ` Rusty Russell
  2008-07-08 14:27     ` Mathieu Desnoyers
                       ` (2 subsequent siblings)
  4 siblings, 1 reply; 23+ messages in thread
From: Akinobu Mita @ 2008-07-08 11:44 UTC (permalink / raw)
  To: Rusty Russell
  Cc: linux-kernel, Jason Baron, Mathieu Desnoyers, Max Krasnyansky,
	Hidetoshi Seto

I found a small possible cleanup in this patch.

> diff --git a/kernel/cpu.c b/kernel/cpu.c
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -192,7 +192,6 @@ static int __ref _cpu_down(unsigned int
>  static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
>  {
>        int err, nr_calls = 0;
> -       struct task_struct *p;
>        cpumask_t old_allowed, tmp;
>        void *hcpu = (void *)(long)cpu;
>        unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
> @@ -226,19 +225,15 @@ static int __ref _cpu_down(unsigned int
>        cpu_clear(cpu, tmp);
>        set_cpus_allowed_ptr(current, &tmp);
>
> -       p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
> +       err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
>
> -       if (IS_ERR(p) || cpu_online(cpu)) {
> +       if (err || cpu_online(cpu)) {

I think checking cpu_online() is now unnecessary by __stop_machine_run()
change in this patch. i.e. this line can simply be:

        if (err) {

Because __stop_machine_run(take_cpu_down, ...) returned zero means
take_cpu_down() has already finished and suceeded (returned zero).
It also means __cpu_disable() in take_cpu_down() has been succeeded.
So you can remove cpu_online() check.

>                /* CPU didn't die: tell everyone.  Can't complain. */
>                if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
>                                            hcpu) == NOTIFY_BAD)
>                        BUG();
>
> -               if (IS_ERR(p)) {
> -                       err = PTR_ERR(p);
> -                       goto out_allowed;
> -               }
> -               goto out_thread;
> +               goto out_allowed;
>        }
>
>        /* Wait for it to sleep (leaving idle task). */

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-08 11:44     ` [PATCH 2/3] stop_machine: simplify Akinobu Mita
@ 2008-07-08 13:11       ` Rusty Russell
  2008-07-08 15:02         ` Akinobu Mita
  0 siblings, 1 reply; 23+ messages in thread
From: Rusty Russell @ 2008-07-08 13:11 UTC (permalink / raw)
  To: Akinobu Mita
  Cc: linux-kernel, Jason Baron, Mathieu Desnoyers, Max Krasnyansky,
	Hidetoshi Seto

On Tuesday 08 July 2008 21:44:40 Akinobu Mita wrote:
> I found a small possible cleanup in this patch.

Well spotted!  I think this cleanup is actually orthogonal to my patch,
so best served as a separate patch, how's this?

===
Hotplug CPU: don't check cpu_online after take_cpu_down

Akinobu points out that if take_cpu_down() succeeds, the cpu must be offline
(otherwise we're in deep trouble anyway.

Remove the cpu_online() check, and put a BUG_ON().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Akinobu Mita" <akinobu.mita@gmail.com>

diff -r 805a2e5e68dd kernel/cpu.c
--- a/kernel/cpu.c	Tue Jul 08 23:04:48 2008 +1000
+++ b/kernel/cpu.c	Tue Jul 08 23:07:43 2008 +1000
@@ -226,8 +226,7 @@ static int __ref _cpu_down(unsigned int 
 	set_cpus_allowed_ptr(current, &tmp);
 
 	err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
-
-	if (err || cpu_online(cpu)) {
+	if (err) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
 					    hcpu) == NOTIFY_BAD)
@@ -235,6 +234,7 @@ static int __ref _cpu_down(unsigned int 
 
 		goto out_allowed;
 	}
+	BUG_ON(cpu_online(cpu));
 
 	/* Wait for it to sleep (leaving idle task). */
 	while (!idle_cpu(cpu))

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-08  7:56   ` [PATCH 2/3] stop_machine: simplify Rusty Russell
  2008-07-08  8:01     ` [PATCH 3/3] stop_machine: use cpu mask rather than magic numbers Rusty Russell
  2008-07-08 11:44     ` [PATCH 2/3] stop_machine: simplify Akinobu Mita
@ 2008-07-08 14:27     ` Mathieu Desnoyers
  2008-07-09  2:11       ` Rusty Russell
  2008-07-10  0:30     ` Max Krasnyansky
  2008-07-10 21:07     ` [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace Milton Miller
  4 siblings, 1 reply; 23+ messages in thread
From: Mathieu Desnoyers @ 2008-07-08 14:27 UTC (permalink / raw)
  To: Rusty Russell; +Cc: linux-kernel, Jason Baron, Max Krasnyansky, Hidetoshi Seto

Hi Rusty,

* Rusty Russell (rusty@rustcorp.com.au) wrote:
> stop_machine creates a kthread which creates kernel threads.  We can
> create those threads directly and simplify things a little.  Some care
> must be taken with CPU hotunplug, which has special needs, but that code
> seems more robust than it was in the past.
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> ---
>  include/linux/stop_machine.h |   12 -
>  kernel/cpu.c                 |   13 -
>  kernel/stop_machine.c        |  299 ++++++++++++++++++-------------------------
>  3 files changed, 135 insertions(+), 189 deletions(-)
> 
> diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
> --- a/include/linux/stop_machine.h
> +++ b/include/linux/stop_machine.h
> @@ -17,13 +17,12 @@
>   * @data: the data ptr for the @fn()
>   * @cpu: if @cpu == n, run @fn() on cpu n
>   *       if @cpu == NR_CPUS, run @fn() on any cpu
> - *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
> - *       concurrently on all the other cpus
> + *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
>   *

I agree with this change if it makes things simpler. However, callers
must be aware of this important change :

"run @fn() first on the calling cpu, and then concurrently on all the
other cpus" becomes "run @fn() on every online CPU".

There were assumptions done in @fn() where a simple non atomic increment
was used on a static variable to detect that it was the first thread to
execute. It will have to be changed into an atomic inc/dec and test.
Given that the other threads have tasks to perform _after_ the first
thread has executed, they will have to busy-wait (spin) there waiting
for the first thread to finish its execution.

Mathieu

> - * Description: This causes a thread to be scheduled on every other cpu,
> - * each of which disables interrupts, and finally interrupts are disabled
> - * on the current CPU.  The result is that noone is holding a spinlock
> - * or inside any other preempt-disabled region when @fn() runs.
> + * Description: This causes a thread to be scheduled on every cpu,
> + * each of which disables interrupts.  The result is that noone is
> + * holding a spinlock or inside any other preempt-disabled region when
> + * @fn() runs.
>   *
>   * This can be thought of as a very heavy write lock, equivalent to
>   * grabbing every spinlock in the kernel. */
> @@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), 
>   * @data: the data ptr for the @fn
>   * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
>   *
> - * Description: This is a special version of the above, which returns the
> - * thread which has run @fn(): kthread_stop will return the return value
> - * of @fn().  Used by hotplug cpu.
> + * Description: This is a special version of the above, which assumes cpus
> + * won't come or go while it's being called.  Used by hotplug cpu.
>   */
> -struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
> -				       unsigned int cpu);
> -
> +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
>  #else
>  
>  static inline int stop_machine_run(int (*fn)(void *), void *data,
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -192,7 +192,6 @@ static int __ref _cpu_down(unsigned int 
>  static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
>  {
>  	int err, nr_calls = 0;
> -	struct task_struct *p;
>  	cpumask_t old_allowed, tmp;
>  	void *hcpu = (void *)(long)cpu;
>  	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
> @@ -226,19 +225,15 @@ static int __ref _cpu_down(unsigned int 
>  	cpu_clear(cpu, tmp);
>  	set_cpus_allowed_ptr(current, &tmp);
>  
> -	p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
> +	err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
>  
> -	if (IS_ERR(p) || cpu_online(cpu)) {
> +	if (err || cpu_online(cpu)) {
>  		/* CPU didn't die: tell everyone.  Can't complain. */
>  		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
>  					    hcpu) == NOTIFY_BAD)
>  			BUG();
>  
> -		if (IS_ERR(p)) {
> -			err = PTR_ERR(p);
> -			goto out_allowed;
> -		}
> -		goto out_thread;
> +		goto out_allowed;
>  	}
>  
>  	/* Wait for it to sleep (leaving idle task). */
> @@ -255,8 +250,6 @@ static int __ref _cpu_down(unsigned int 
>  
>  	check_for_tasks(cpu);
>  
> -out_thread:
> -	err = kthread_stop(p);
>  out_allowed:
>  	set_cpus_allowed_ptr(current, &old_allowed);
>  out_release:
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -1,4 +1,4 @@
> -/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
> +/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
>   * GPL v2 and any later version.
>   */
>  #include <linux/cpu.h>
> @@ -13,219 +13,176 @@
>  #include <asm/atomic.h>
>  #include <asm/uaccess.h>
>  
> -/* Since we effect priority and affinity (both of which are visible
> - * to, and settable by outside processes) we do indirection via a
> - * kthread. */
> -
> -/* Thread to stop each CPU in user context. */
> +/* This controls the threads on each CPU. */
>  enum stopmachine_state {
> -	STOPMACHINE_WAIT,
> +	/* Dummy starting state for thread. */
> +	STOPMACHINE_NONE,
> +	/* Awaiting everyone to be scheduled. */
>  	STOPMACHINE_PREPARE,
> +	/* Disable interrupts. */
>  	STOPMACHINE_DISABLE_IRQ,
> +	/* Run the function */
>  	STOPMACHINE_RUN,
> +	/* Exit */
>  	STOPMACHINE_EXIT,
>  };
> +static enum stopmachine_state state;
>  
>  struct stop_machine_data {
>  	int (*fn)(void *);
>  	void *data;
> -	struct completion done;
> -	int run_all;
> -} smdata;
> +	int fnret;
> +};
>  
> -static enum stopmachine_state stopmachine_state;
> -static unsigned int stopmachine_num_threads;
> -static atomic_t stopmachine_thread_ack;
> +/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
> +static unsigned int num_threads;
> +static atomic_t thread_ack;
> +static struct completion finished;
> +static DEFINE_MUTEX(lock);
>  
> -static int stopmachine(void *cpu)
> +static void set_state(enum stopmachine_state newstate)
>  {
> -	int irqs_disabled = 0;
> -	int prepared = 0;
> -	int ran = 0;
> +	/* Reset ack counter. */
> +	atomic_set(&thread_ack, num_threads);
> +	smp_wmb();
> +	state = newstate;
> +}
>  
> -	set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
> +/* Last one to ack a state moves to the next state. */
> +static void ack_state(void)
> +{
> +	if (atomic_dec_and_test(&thread_ack)) {
> +		/* If we're the last one to ack the EXIT, we're finished. */
> +		if (state == STOPMACHINE_EXIT)
> +			complete(&finished);
> +		else
> +			set_state(state + 1);
> +	}
> +}
>  
> -	/* Ack: we are alive */
> -	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
> -	atomic_inc(&stopmachine_thread_ack);
> +/* This is the actual thread which stops the CPU.  It exits by itself rather
> + * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
> +static int stop_cpu(struct stop_machine_data *smdata)
> +{
> +	enum stopmachine_state curstate = STOPMACHINE_NONE;
> +	int uninitialized_var(ret);
>  
>  	/* Simple state machine */
> -	while (stopmachine_state != STOPMACHINE_EXIT) {
> -		if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
> -		    && !irqs_disabled) {
> -			local_irq_disable();
> -			hard_irq_disable();
> -			irqs_disabled = 1;
> -			/* Ack: irqs disabled. */
> -			smp_mb(); /* Must read state first. */
> -			atomic_inc(&stopmachine_thread_ack);
> -		} else if (stopmachine_state == STOPMACHINE_PREPARE
> -			   && !prepared) {
> -			/* Everyone is in place, hold CPU. */
> -			preempt_disable();
> -			prepared = 1;
> -			smp_mb(); /* Must read state first. */
> -			atomic_inc(&stopmachine_thread_ack);
> -		} else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
> -			smdata.fn(smdata.data);
> -			ran = 1;
> -			smp_mb(); /* Must read state first. */
> -			atomic_inc(&stopmachine_thread_ack);
> +	do {
> +		/* Chill out and ensure we re-read stopmachine_state. */
> +		cpu_relax();
> +		if (state != curstate) {
> +			curstate = state;
> +			switch (curstate) {
> +			case STOPMACHINE_DISABLE_IRQ:
> +				local_irq_disable();
> +				hard_irq_disable();
> +				break;
> +			case STOPMACHINE_RUN:
> +				/* |= allows error detection if functions on
> +				 * multiple CPUs. */
> +				smdata->fnret |= smdata->fn(smdata->data);
> +				break;
> +			default:
> +				break;
> +			}
> +			ack_state();
>  		}
> -		/* Yield in first stage: migration threads need to
> -		 * help our sisters onto their CPUs. */
> -		if (!prepared && !irqs_disabled)
> -			yield();
> -		cpu_relax();
> -	}
> +	} while (curstate != STOPMACHINE_EXIT);
>  
> -	/* Ack: we are exiting. */
> -	smp_mb(); /* Must read state first. */
> -	atomic_inc(&stopmachine_thread_ack);
> +	local_irq_enable();
> +	do_exit(0);
> +}
>  
> -	if (irqs_disabled)
> -		local_irq_enable();
> -	if (prepared)
> -		preempt_enable();
> -
> +/* Callback for CPUs which aren't supposed to do anything. */
> +static int chill(void *unused)
> +{
>  	return 0;
>  }
>  
> -/* Change the thread state */
> -static void stopmachine_set_state(enum stopmachine_state state)
> +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
>  {
> -	atomic_set(&stopmachine_thread_ack, 0);
> -	smp_wmb();
> -	stopmachine_state = state;
> -	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
> -		cpu_relax();
> -}
> +	int i, err;
> +	struct stop_machine_data active, idle;
> +	struct task_struct **threads;
>  
> -static int stop_machine(void)
> -{
> -	int i, ret = 0;
> +	active.fn = fn;
> +	active.data = data;
> +	active.fnret = 0;
> +	idle.fn = chill;
> +	idle.data = NULL;
>  
> -	atomic_set(&stopmachine_thread_ack, 0);
> -	stopmachine_num_threads = 0;
> -	stopmachine_state = STOPMACHINE_WAIT;
> +	/* If they don't care which cpu fn runs on, just pick one. */
> +	if (cpu == NR_CPUS)
> +		cpu = any_online_cpu(cpu_online_map);
> +
> +	/* This could be too big for stack on large machines. */
> +	threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
> +	if (!threads)
> +		return -ENOMEM;
> +
> +	/* Set up initial state. */
> +	mutex_lock(&lock);
> +	init_completion(&finished);
> +	num_threads = num_online_cpus();
> +	set_state(STOPMACHINE_PREPARE);
>  
>  	for_each_online_cpu(i) {
> -		if (i == raw_smp_processor_id())
> -			continue;
> -		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
> -		if (ret < 0)
> -			break;
> -		stopmachine_num_threads++;
> +		struct stop_machine_data *smdata;
> +		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
> +
> +		if (cpu == ALL_CPUS || i == cpu)
> +			smdata = &active;
> +		else
> +			smdata = &idle;
> +
> +		threads[i] = kthread_create(stop_cpu, smdata, "kstop%u", i);
> +		if (IS_ERR(threads[i])) {
> +			err = PTR_ERR(threads[i]);
> +			threads[i] = NULL;
> +			goto kill_threads;
> +		}
> +
> +		/* Place it onto correct cpu. */
> +		kthread_bind(threads[i], i);
> +
> +		/* Make it highest prio. */
> +		if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
> +			BUG();
>  	}
>  
> -	/* Wait for them all to come to life. */
> -	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
> -		yield();
> -		cpu_relax();
> -	}
> +	/* We've created all the threads.  Wake them all: hold this CPU so one
> +	 * doesn't hit this CPU until we're ready. */
> +	cpu = get_cpu();
> +	for_each_online_cpu(i)
> +		wake_up_process(threads[i]);
>  
> -	/* If some failed, kill them all. */
> -	if (ret < 0) {
> -		stopmachine_set_state(STOPMACHINE_EXIT);
> -		return ret;
> -	}
> +	/* This will release the thread on our CPU. */
> +	put_cpu();
> +	wait_for_completion(&finished);
> +	mutex_unlock(&lock);
>  
> -	/* Now they are all started, make them hold the CPUs, ready. */
> -	preempt_disable();
> -	stopmachine_set_state(STOPMACHINE_PREPARE);
> +	kfree(threads);
>  
> -	/* Make them disable irqs. */
> -	local_irq_disable();
> -	hard_irq_disable();
> -	stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
> +	return active.fnret;
>  
> -	return 0;
> -}
> +kill_threads:
> +	for_each_online_cpu(i)
> +		if (threads[i])
> +			kthread_stop(threads[i]);
> +	mutex_unlock(&lock);
>  
> -static void restart_machine(void)
> -{
> -	stopmachine_set_state(STOPMACHINE_EXIT);
> -	local_irq_enable();
> -	preempt_enable_no_resched();
> -}
> -
> -static void run_other_cpus(void)
> -{
> -	stopmachine_set_state(STOPMACHINE_RUN);
> -}
> -
> -static int do_stop(void *_smdata)
> -{
> -	struct stop_machine_data *smdata = _smdata;
> -	int ret;
> -
> -	ret = stop_machine();
> -	if (ret == 0) {
> -		ret = smdata->fn(smdata->data);
> -		if (smdata->run_all)
> -			run_other_cpus();
> -		restart_machine();
> -	}
> -
> -	/* We're done: you can kthread_stop us now */
> -	complete(&smdata->done);
> -
> -	/* Wait for kthread_stop */
> -	set_current_state(TASK_INTERRUPTIBLE);
> -	while (!kthread_should_stop()) {
> -		schedule();
> -		set_current_state(TASK_INTERRUPTIBLE);
> -	}
> -	__set_current_state(TASK_RUNNING);
> -	return ret;
> -}
> -
> -struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
> -				       unsigned int cpu)
> -{
> -	static DEFINE_MUTEX(stopmachine_mutex);
> -	struct stop_machine_data smdata;
> -	struct task_struct *p;
> -
> -	mutex_lock(&stopmachine_mutex);
> -
> -	smdata.fn = fn;
> -	smdata.data = data;
> -	smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
> -	init_completion(&smdata.done);
> -
> -	smp_wmb(); /* make sure other cpus see smdata updates */
> -
> -	/* If they don't care which CPU fn runs on, bind to any online one. */
> -	if (cpu == NR_CPUS || cpu == ALL_CPUS)
> -		cpu = raw_smp_processor_id();
> -
> -	p = kthread_create(do_stop, &smdata, "kstopmachine");
> -	if (!IS_ERR(p)) {
> -		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
> -
> -		/* One high-prio thread per cpu.  We'll do this one. */
> -		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
> -		kthread_bind(p, cpu);
> -		wake_up_process(p);
> -		wait_for_completion(&smdata.done);
> -	}
> -	mutex_unlock(&stopmachine_mutex);
> -	return p;
> +	kfree(threads);
> +	return err;
>  }
>  
>  int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
>  {
> -	struct task_struct *p;
>  	int ret;
>  
>  	/* No CPUs can come up or down during this. */
>  	get_online_cpus();
> -	p = __stop_machine_run(fn, data, cpu);
> -	if (!IS_ERR(p))
> -		ret = kthread_stop(p);
> -	else
> -		ret = PTR_ERR(p);
> +	ret = __stop_machine_run(fn, data, cpu);
>  	put_online_cpus();
>  
>  	return ret;

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-08 13:11       ` Rusty Russell
@ 2008-07-08 15:02         ` Akinobu Mita
  2008-07-09  2:18           ` Rusty Russell
  0 siblings, 1 reply; 23+ messages in thread
From: Akinobu Mita @ 2008-07-08 15:02 UTC (permalink / raw)
  To: Rusty Russell
  Cc: linux-kernel, Jason Baron, Mathieu Desnoyers, Max Krasnyansky,
	Hidetoshi Seto

2008/7/8 Rusty Russell <rusty@rustcorp.com.au>:
> On Tuesday 08 July 2008 21:44:40 Akinobu Mita wrote:
>> I found a small possible cleanup in this patch.
>
> Well spotted!  I think this cleanup is actually orthogonal to my patch,
> so best served as a separate patch, how's this?

Actually the cpu_online() check was necessary before appling this
stop_machine: simplify patch.

With old __stop_machine_run(), __stop_machine_run() could succeed
(return !IS_ERR(p) value) even if take_cpu_down() returned non-zero value.
The return value of take_cpu_down() was obtained through kthread_stop().

> ===
> Hotplug CPU: don't check cpu_online after take_cpu_down

So it seems that folding this patch into stop_machine: simplify patch is
more appropriate.

> Akinobu points out that if take_cpu_down() succeeds, the cpu must be offline
> (otherwise we're in deep trouble anyway.
>
> Remove the cpu_online() check, and put a BUG_ON().
>
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> Cc: "Akinobu Mita" <akinobu.mita@gmail.com>
>
> diff -r 805a2e5e68dd kernel/cpu.c
> --- a/kernel/cpu.c      Tue Jul 08 23:04:48 2008 +1000
> +++ b/kernel/cpu.c      Tue Jul 08 23:07:43 2008 +1000
> @@ -226,8 +226,7 @@ static int __ref _cpu_down(unsigned int
>        set_cpus_allowed_ptr(current, &tmp);
>
>        err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
> -
> -       if (err || cpu_online(cpu)) {
> +       if (err) {
>                /* CPU didn't die: tell everyone.  Can't complain. */
>                if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
>                                            hcpu) == NOTIFY_BAD)
> @@ -235,6 +234,7 @@ static int __ref _cpu_down(unsigned int
>
>                goto out_allowed;
>        }
> +       BUG_ON(cpu_online(cpu));
>
>        /* Wait for it to sleep (leaving idle task). */
>        while (!idle_cpu(cpu))
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/3] stop_machine enhancements and simplifications
  2008-07-08  7:50 [PATCH 0/3] stop_machine enhancements and simplifications Rusty Russell
  2008-07-08  7:56 ` [PATCH 1/3] stop_machine: add ALL_CPUS option Rusty Russell
@ 2008-07-08 16:21 ` Christian Borntraeger
  2008-07-08 20:10 ` Jason Baron
  2 siblings, 0 replies; 23+ messages in thread
From: Christian Borntraeger @ 2008-07-08 16:21 UTC (permalink / raw)
  To: Rusty Russell
  Cc: linux-kernel, Jason Baron, Mathieu Desnoyers, Max Krasnyansky

Am Dienstag, 8. Juli 2008 schrieb Rusty Russell:
> Hi all,
> 
>    Here are the three stop_machine patches I've queued for the next merge
> window.  The first two are pretty well tested via linux-next, the last is
> recent but fairly straightforward.

Hmm, these patches dont fit on kvm.git so I tested the version from 
linux-next.

When using linux-next in a kvm guest on a linux-next host this patch set 
really improves the situation. On a guest with 64 cpus on a 1 cpu host 
stop_machine_run seems to finish immediately. 

I still have some strange problems with 25 guests * 64 cpus on one host cpu, 
but this requires further analysis.

IMHO this patch set is really an improvement to the earlier version:

Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/3] stop_machine enhancements and simplifications
  2008-07-08  7:50 [PATCH 0/3] stop_machine enhancements and simplifications Rusty Russell
  2008-07-08  7:56 ` [PATCH 1/3] stop_machine: add ALL_CPUS option Rusty Russell
  2008-07-08 16:21 ` [PATCH 0/3] stop_machine enhancements and simplifications Christian Borntraeger
@ 2008-07-08 20:10 ` Jason Baron
  2008-07-09  3:29   ` Mathieu Desnoyers
  2 siblings, 1 reply; 23+ messages in thread
From: Jason Baron @ 2008-07-08 20:10 UTC (permalink / raw)
  To: Rusty Russell; +Cc: linux-kernel, Mathieu Desnoyers, Max Krasnyansky

On Tue, Jul 08, 2008 at 05:50:55PM +1000, Rusty Russell wrote:
> Hi all,
> 
>    Here are the three stop_machine patches I've queued for the next merge
> window.  The first two are pretty well tested via linux-next, the last is
> recent but fairly straightforward.
> 
>    I'm assuming that Jason and/or Mathieu have need for the ALL_CPUS mod,
> but it can be removed by the last of these patches (left in to reduce
> transition breakage).

hi,

I added the 'ALL_CPUS' feature to support the architecture independent immediate 
code that Mathieu wrote. So, if Mathieu needs it great, otherwise I have no 
other code depending on it.

thanks,

-Jason 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-08 14:27     ` Mathieu Desnoyers
@ 2008-07-09  2:11       ` Rusty Russell
  2008-07-09 12:42         ` Mathieu Desnoyers
  0 siblings, 1 reply; 23+ messages in thread
From: Rusty Russell @ 2008-07-09  2:11 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: linux-kernel, Jason Baron, Max Krasnyansky, Hidetoshi Seto

On Wednesday 09 July 2008 00:27:03 Mathieu Desnoyers wrote:
> Hi Rusty,
>
> * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > stop_machine creates a kthread which creates kernel threads.  We can
> > create those threads directly and simplify things a little.  Some care
> > must be taken with CPU hotunplug, which has special needs, but that code
> > seems more robust than it was in the past.
> >
> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> > ---
> >  include/linux/stop_machine.h |   12 -
> >  kernel/cpu.c                 |   13 -
> >  kernel/stop_machine.c        |  299
> > ++++++++++++++++++------------------------- 3 files changed, 135
> > insertions(+), 189 deletions(-)
> >
> > diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
> > --- a/include/linux/stop_machine.h
> > +++ b/include/linux/stop_machine.h
> > @@ -17,13 +17,12 @@
> >   * @data: the data ptr for the @fn()
> >   * @cpu: if @cpu == n, run @fn() on cpu n
> >   *       if @cpu == NR_CPUS, run @fn() on any cpu
> > - *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and
> > then - *       concurrently on all the other cpus
> > + *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
> >   *
>
> I agree with this change if it makes things simpler. However, callers
> must be aware of this important change :
>
> "run @fn() first on the calling cpu, and then concurrently on all the
> other cpus" becomes "run @fn() on every online CPU".

OK.  Since that was never in mainline, I think you're the only one who needs 
to be aware of the semantic change?

The new symmetric implementation breaks it; hope that isn't a showstopper for 
you?

> There were assumptions done in @fn() where a simple non atomic increment
> was used on a static variable to detect that it was the first thread to
> execute. It will have to be changed into an atomic inc/dec and test.
> Given that the other threads have tasks to perform _after_ the first
> thread has executed, they will have to busy-wait (spin) there waiting
> for the first thread to finish its execution.

I assume you can't do that step then call stop_machine.

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-08 15:02         ` Akinobu Mita
@ 2008-07-09  2:18           ` Rusty Russell
  0 siblings, 0 replies; 23+ messages in thread
From: Rusty Russell @ 2008-07-09  2:18 UTC (permalink / raw)
  To: Akinobu Mita
  Cc: linux-kernel, Jason Baron, Mathieu Desnoyers, Max Krasnyansky,
	Hidetoshi Seto

On Wednesday 09 July 2008 01:02:02 Akinobu Mita wrote:
> 2008/7/8 Rusty Russell <rusty@rustcorp.com.au>:
> > On Tuesday 08 July 2008 21:44:40 Akinobu Mita wrote:
> >> I found a small possible cleanup in this patch.
> >
> > Well spotted!  I think this cleanup is actually orthogonal to my patch,
> > so best served as a separate patch, how's this?
>
> Actually the cpu_online() check was necessary before appling this
> stop_machine: simplify patch.
>
> With old __stop_machine_run(), __stop_machine_run() could succeed
> (return !IS_ERR(p) value) even if take_cpu_down() returned non-zero value.
> The return value of take_cpu_down() was obtained through kthread_stop().

Ah, thanks for the analysis!

It's a little non-obvious, so I've left it as a separate patch (it doesn't 
hurt to have the check there), but included your excellent explanation within 
it.

Thanks!
Rusty.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/3] stop_machine enhancements and simplifications
  2008-07-08 20:10 ` Jason Baron
@ 2008-07-09  3:29   ` Mathieu Desnoyers
  0 siblings, 0 replies; 23+ messages in thread
From: Mathieu Desnoyers @ 2008-07-09  3:29 UTC (permalink / raw)
  To: Jason Baron; +Cc: Rusty Russell, linux-kernel, Max Krasnyansky

* Jason Baron (jbaron@redhat.com) wrote:
> On Tue, Jul 08, 2008 at 05:50:55PM +1000, Rusty Russell wrote:
> > Hi all,
> > 
> >    Here are the three stop_machine patches I've queued for the next merge
> > window.  The first two are pretty well tested via linux-next, the last is
> > recent but fairly straightforward.
> > 
> >    I'm assuming that Jason and/or Mathieu have need for the ALL_CPUS mod,
> > but it can be removed by the last of these patches (left in to reduce
> > transition breakage).
> 
> hi,
> 
> I added the 'ALL_CPUS' feature to support the architecture independent immediate 
> code that Mathieu wrote. So, if Mathieu needs it great, otherwise I have no 
> other code depending on it.
> 
> thanks,
> 
> -Jason 

Hi Rusty,

The immediate values implementation (the 'simple' version) uses this
patch. If we include your simplification, I'll have to modify the
immediate values patch a little bit so we deal with concurrent execution
of all the threads, as I pointed out in my earlier email.

Mathieu


-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-09  2:11       ` Rusty Russell
@ 2008-07-09 12:42         ` Mathieu Desnoyers
  0 siblings, 0 replies; 23+ messages in thread
From: Mathieu Desnoyers @ 2008-07-09 12:42 UTC (permalink / raw)
  To: Rusty Russell; +Cc: linux-kernel, Jason Baron, Max Krasnyansky, Hidetoshi Seto

* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Wednesday 09 July 2008 00:27:03 Mathieu Desnoyers wrote:
> > Hi Rusty,
> >
> > * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > > stop_machine creates a kthread which creates kernel threads.  We can
> > > create those threads directly and simplify things a little.  Some care
> > > must be taken with CPU hotunplug, which has special needs, but that code
> > > seems more robust than it was in the past.
> > >
> > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> > > ---
> > >  include/linux/stop_machine.h |   12 -
> > >  kernel/cpu.c                 |   13 -
> > >  kernel/stop_machine.c        |  299
> > > ++++++++++++++++++------------------------- 3 files changed, 135
> > > insertions(+), 189 deletions(-)
> > >
> > > diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
> > > --- a/include/linux/stop_machine.h
> > > +++ b/include/linux/stop_machine.h
> > > @@ -17,13 +17,12 @@
> > >   * @data: the data ptr for the @fn()
> > >   * @cpu: if @cpu == n, run @fn() on cpu n
> > >   *       if @cpu == NR_CPUS, run @fn() on any cpu
> > > - *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and
> > > then - *       concurrently on all the other cpus
> > > + *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
> > >   *
> >
> > I agree with this change if it makes things simpler. However, callers
> > must be aware of this important change :
> >
> > "run @fn() first on the calling cpu, and then concurrently on all the
> > other cpus" becomes "run @fn() on every online CPU".
> 
> OK.  Since that was never in mainline, I think you're the only one who needs 
> to be aware of the semantic change?
> 
> The new symmetric implementation breaks it; hope that isn't a showstopper for 
> you?
> 

Nope, that should be ok with something like :

...
                atomic_set(1, &stop_machine_first);
                wrote_text = 0;
                stop_machine_run(stop_machine_imv_update, (void *)imv,
                                        ALL_CPUS);
...

static int stop_machine_imv_update(void *imv_ptr)
{
        struct __imv *imv = imv_ptr;

        if (atomic_dec_and_test(&stop_machine_first)) {
                text_poke((void *)imv->imv, (void *)imv->var, imv->size);
                smp_wmb(); /* make sure other cpus see that this has run */
                wrote_text = 1;
        } else {
                while (!wrote_text)
                        smp_rmb();
                sync_core();
        }

        flush_icache_range(imv->imv, imv->imv + imv->size);

        return 0;
}

> > There were assumptions done in @fn() where a simple non atomic increment
> > was used on a static variable to detect that it was the first thread to
> > execute. It will have to be changed into an atomic inc/dec and test.
> > Given that the other threads have tasks to perform _after_ the first
> > thread has executed, they will have to busy-wait (spin) there waiting
> > for the first thread to finish its execution.
> 
> I assume you can't do that step then call stop_machine.
> 

Indeed, I can't, because I need to have all other CPUs busy looping with
interrupts disabled while I do the text_poke.

Mathieu


> Thanks,
> Rusty.

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-08  7:56   ` [PATCH 2/3] stop_machine: simplify Rusty Russell
                       ` (2 preceding siblings ...)
  2008-07-08 14:27     ` Mathieu Desnoyers
@ 2008-07-10  0:30     ` Max Krasnyansky
  2008-07-11  7:51       ` Rusty Russell
  2008-07-10 21:07     ` [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace Milton Miller
  4 siblings, 1 reply; 23+ messages in thread
From: Max Krasnyansky @ 2008-07-10  0:30 UTC (permalink / raw)
  To: Rusty Russell
  Cc: linux-kernel, Jason Baron, Mathieu Desnoyers, Hidetoshi Seto

Rusty Russell wrote:
> stop_machine creates a kthread which creates kernel threads.  We can
> create those threads directly and simplify things a little.  Some care
> must be taken with CPU hotunplug, which has special needs, but that code
> seems more robust than it was in the past.
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

Rusty,

You mentioned (in private conversation) that you were going to add some 
logic that checks whether CPU is running user-space code and not holding 
any locks to avoid scheduling stop_machine thread on it. Was it supposed 
to be part of this patch ?

Max

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace
  2008-07-08  7:56   ` [PATCH 2/3] stop_machine: simplify Rusty Russell
                       ` (3 preceding siblings ...)
  2008-07-10  0:30     ` Max Krasnyansky
@ 2008-07-10 21:07     ` Milton Miller
  2008-07-11  6:43       ` Rusty Russell
  2008-07-11  7:46       ` Ingo Molnar
  4 siblings, 2 replies; 23+ messages in thread
From: Milton Miller @ 2008-07-10 21:07 UTC (permalink / raw)
  To: Ingo Molnar, Rusty Russell; +Cc: linuxppc-dev, linux-kernel, linux-next



Hi Rusty, Ingo.

Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic numbers
didn't find kernel/trace/ftrace.c in -next, causing an immediate almost NULL
pointer dereference in ftrace_dynamic_init.


Signed-off-by: Milton Miller <miltonm@bga.com>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0f271c4..c29acb5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -587,7 +587,7 @@ static int __ftrace_modify_code(void *data)
 
 static void ftrace_run_update_code(int command)
 {
-	stop_machine_run(__ftrace_modify_code, &command, NR_CPUS);
+	stop_machine_run(__ftrace_modify_code, &command, NULL);
 }
 
 void ftrace_disable_daemon(void)
@@ -787,7 +787,7 @@ static int ftrace_update_code(void)
 	    !ftrace_enabled || !ftraced_trigger)
 		return 0;
 
-	stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
+	stop_machine_run(__ftrace_update_code, NULL, NULL);
 
 	return 1;
 }
@@ -1564,7 +1564,7 @@ static int __init ftrace_dynamic_init(void)
 
 	addr = (unsigned long)ftrace_record_ip;
 
-	stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS);
+	stop_machine_run(ftrace_dyn_arch_init, &addr, NULL);
 
 	/* ftrace_dyn_arch_init places the return code in addr */
 	if (addr) {

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace
  2008-07-10 21:07     ` [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace Milton Miller
@ 2008-07-11  6:43       ` Rusty Russell
  2008-07-11  7:46       ` Ingo Molnar
  1 sibling, 0 replies; 23+ messages in thread
From: Rusty Russell @ 2008-07-11  6:43 UTC (permalink / raw)
  To: Milton Miller; +Cc: Ingo Molnar, linuxppc-dev, linux-kernel, linux-next

On Friday 11 July 2008 07:07:57 Milton Miller wrote:
> Hi Rusty, Ingo.
>
> Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic
> numbers didn't find kernel/trace/ftrace.c in -next, causing an immediate
> almost NULL pointer dereference in ftrace_dynamic_init.

Yes, I'm switching the patches around, so it does the transition correctly. 
Introduces a new stop_machine() fn with the new interface and deprecates the 
old stop_machine_run().  We can remove stop_machine_run() after everyone is 
switched.

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace
  2008-07-10 21:07     ` [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace Milton Miller
  2008-07-11  6:43       ` Rusty Russell
@ 2008-07-11  7:46       ` Ingo Molnar
  2008-07-11  8:55         ` Ingo Molnar
  2008-07-11 12:34         ` Rusty Russell
  1 sibling, 2 replies; 23+ messages in thread
From: Ingo Molnar @ 2008-07-11  7:46 UTC (permalink / raw)
  To: Milton Miller
  Cc: Ingo Molnar, Rusty Russell, linuxppc-dev, linux-kernel,
	linux-next, Stephen Rothwell, Andrew Morton


* Milton Miller <miltonm@bga.com> wrote:

> Hi Rusty, Ingo.
> 
> Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic 
> numbers didn't find kernel/trace/ftrace.c in -next, causing an 
> immediate almost NULL pointer dereference in ftrace_dynamic_init.

Rusty - what's going on here? Please do not change APIs like that, which 
cause code to crash. Either do a compatible API change, or change it 
over in a way that causes clear build failures, not crashes.

	Ingo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-10  0:30     ` Max Krasnyansky
@ 2008-07-11  7:51       ` Rusty Russell
  2008-07-11 13:12         ` Mathieu Desnoyers
  0 siblings, 1 reply; 23+ messages in thread
From: Rusty Russell @ 2008-07-11  7:51 UTC (permalink / raw)
  To: Max Krasnyansky
  Cc: linux-kernel, Jason Baron, Mathieu Desnoyers, Hidetoshi Seto

On Thursday 10 July 2008 10:30:37 Max Krasnyansky wrote:
> Rusty Russell wrote:
> > stop_machine creates a kthread which creates kernel threads.  We can
> > create those threads directly and simplify things a little.  Some care
> > must be taken with CPU hotunplug, which has special needs, but that code
> > seems more robust than it was in the past.
> >
> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>
> Rusty,
>
> You mentioned (in private conversation) that you were going to add some
> logic that checks whether CPU is running user-space code and not holding
> any locks to avoid scheduling stop_machine thread on it. Was it supposed
> to be part of this patch ?
>
> Max

No... I tried it, and it killed my machine.  I didn't chase it for the moment, 
but it's on the "experimental" end of my patch queue.

Will play with it again and report,
Rusty.





^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace
  2008-07-11  7:46       ` Ingo Molnar
@ 2008-07-11  8:55         ` Ingo Molnar
  2008-07-11 12:34         ` Rusty Russell
  1 sibling, 0 replies; 23+ messages in thread
From: Ingo Molnar @ 2008-07-11  8:55 UTC (permalink / raw)
  To: Milton Miller
  Cc: Ingo Molnar, Rusty Russell, linuxppc-dev, linux-kernel,
	linux-next, Stephen Rothwell, Andrew Morton


* Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Milton Miller <miltonm@bga.com> wrote:
> 
> > Hi Rusty, Ingo.
> > 
> > Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic 
> > numbers didn't find kernel/trace/ftrace.c in -next, causing an 
> > immediate almost NULL pointer dereference in ftrace_dynamic_init.
> 
> Rusty - what's going on here? Please do not change APIs like that, 
> which cause code to crash. Either do a compatible API change, or 
> change it over in a way that causes clear build failures, not crashes.

ah, i see it from Rusty's other reply that there's going to be another 
version of this. Good :-)

	Ingo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace
  2008-07-11  7:46       ` Ingo Molnar
  2008-07-11  8:55         ` Ingo Molnar
@ 2008-07-11 12:34         ` Rusty Russell
  1 sibling, 0 replies; 23+ messages in thread
From: Rusty Russell @ 2008-07-11 12:34 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Milton Miller, Ingo Molnar, linuxppc-dev, linux-kernel,
	linux-next, Stephen Rothwell, Andrew Morton

On Friday 11 July 2008 17:46:03 Ingo Molnar wrote:
> * Milton Miller <miltonm@bga.com> wrote:
> > Hi Rusty, Ingo.
> >
> > Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic
> > numbers didn't find kernel/trace/ftrace.c in -next, causing an
> > immediate almost NULL pointer dereference in ftrace_dynamic_init.
>
> Rusty - what's going on here? Please do not change APIs like that, which
> cause code to crash. Either do a compatible API change, or change it
> over in a way that causes clear build failures, not crashes.

To be fair, I did.  Unfortunately GCC only warns about passing an int to a 
pointer arg, and boom.

But compatible is even better.  Given the number of stop_machine_run users I 
thought it unlikely that a new one would be introduced during the change.  I 
was wrong, so I'll do it the Right Way.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-11  7:51       ` Rusty Russell
@ 2008-07-11 13:12         ` Mathieu Desnoyers
  2008-07-12  5:07           ` Rusty Russell
  0 siblings, 1 reply; 23+ messages in thread
From: Mathieu Desnoyers @ 2008-07-11 13:12 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Max Krasnyansky, linux-kernel, Jason Baron, Hidetoshi Seto

* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Thursday 10 July 2008 10:30:37 Max Krasnyansky wrote:
> > Rusty Russell wrote:
> > > stop_machine creates a kthread which creates kernel threads.  We can
> > > create those threads directly and simplify things a little.  Some care
> > > must be taken with CPU hotunplug, which has special needs, but that code
> > > seems more robust than it was in the past.
> > >
> > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> >
> > Rusty,
> >
> > You mentioned (in private conversation) that you were going to add some
> > logic that checks whether CPU is running user-space code and not holding
> > any locks to avoid scheduling stop_machine thread on it. Was it supposed
> > to be part of this patch ?
> >
> > Max
> 
> No... I tried it, and it killed my machine.  I didn't chase it for the moment, 
> but it's on the "experimental" end of my patch queue.
> 
> Will play with it again and report,
> Rusty.
> 

Hrm, I must be missing something, but using the fact that other CPUs are
running in userspace as a guarantee that they are not running within
critical kernel sections seems kind of.. racy ? I'd like to have a look
at this experimental patch : does it inhibit interrupts somehow and/or
does it take control of userspace processes doing system calls at that
precise moment ?

Mathieu

> 
> 
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] stop_machine: simplify
  2008-07-11 13:12         ` Mathieu Desnoyers
@ 2008-07-12  5:07           ` Rusty Russell
  0 siblings, 0 replies; 23+ messages in thread
From: Rusty Russell @ 2008-07-12  5:07 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Max Krasnyansky, linux-kernel, Jason Baron, Hidetoshi Seto

On Friday 11 July 2008 23:12:21 Mathieu Desnoyers wrote:
> * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > On Thursday 10 July 2008 10:30:37 Max Krasnyansky wrote:
> > > You mentioned (in private conversation) that you were going to add some
> > > logic that checks whether CPU is running user-space code and not
> > > holding any locks to avoid scheduling stop_machine thread on it. 
> >
> > Will play with it again and report,
> > Rusty.
>
> Hrm, I must be missing something, but using the fact that other CPUs are
> running in userspace as a guarantee that they are not running within
> critical kernel sections seems kind of.. racy ? I'd like to have a look
> at this experimental patch : does it inhibit interrupts somehow and/or
> does it take control of userspace processes doing system calls at that
> precise moment ?

The idea was to try this ipi-to-all-cpus  and fall back on the current thread
method if it doesn't work.  I suspect it will succeed in the vast majority of
cases (with CONFIG_PREEMPT, we can also let the function execute if in-kernel
but preemptible).  Something like this:

+struct ipi_data {
+	atomic_t acked;
+	atomic_t failed;
+	unsigned int cpu;
+	int fnret;
+	int (*fn)(void *data);
+	void *data;
+};
+
+static void ipi_func(void *info)
+{
+	struct ipi_data *ipi = info;
+	bool ok = false;
+
+	printk("get_irq_regs(%i) = %p\n", smp_processor_id(), get_irq_regs());
+	goto fail;
+
+	if (user_mode(get_irq_regs()))
+		ok = true;
+	else {
+#ifdef CONFIG_PREEMPT
+		/* We're in an interrupt, ok, but were we preemptible
+		 * before that? */
+		if ((hardirq_count() >> HARDIRQ_SHIFT) == 1) {
+			int prev = preempt_count() & ~HARDIRQ_MASK;
+			if ((prev & ~PREEMPT_ACTIVE) == PREEMPT_INATOMIC_BASE)
+				ok = true;
+		}
+#endif
+	}
+
+fail:
+	if (!ok) {
+		/* Mark our failure before acking. */
+		atomic_inc(&ipi->failed);
+		wmb();
+	}
+
+	if (smp_processor_id() != ipi->cpu) {
+		/* Wait for cpu to call function (last to ack). */
+		atomic_inc(&ipi->acked);
+		while (atomic_read(&ipi->acked) != num_online_cpus())
+			cpu_relax();
+	} else {
+		while (atomic_read(&ipi->acked) != num_online_cpus() - 1)
+			cpu_relax();
+		/* Must read acked before failed. */
+		rmb();
+
+		/* Call function if noone failed. */
+		if (atomic_read(&ipi->failed) == 0)
+			ipi->fnret = ipi->fn(ipi->data);
+		atomic_inc(&ipi->acked);
+	}
+}
+
+static bool try_ipi_stop(int (*fn)(void *), void *data, unsigned int cpu,
+			 int *ret)
+{
+	struct ipi_data ipi;
+
+	/* If they don't care which cpu fn runs on, just pick one. */
+	if (cpu == NR_CPUS)
+		ipi.cpu = any_online_cpu(cpu_online_map);
+	else
+		ipi.cpu = cpu;
+
+	atomic_set(&ipi.acked, 0);
+	atomic_set(&ipi.failed, 0);
+	ipi.fn = fn;
+	ipi.data = data;
+	ipi.fnret = 0;
+
+	smp_call_function(ipi_func, &ipi, 0, 1);
+
+	printk("stop_machine: ipi acked %u failed %u\n",
+	       atomic_read(&ipi.acked), atomic_read(&ipi.failed));
+	*ret = ipi.fnret;
+	return (atomic_read(&ipi.failed) == 0);
+}

Hope that clarifies!
Rusty.


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2008-07-12  5:07 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-07-08  7:50 [PATCH 0/3] stop_machine enhancements and simplifications Rusty Russell
2008-07-08  7:56 ` [PATCH 1/3] stop_machine: add ALL_CPUS option Rusty Russell
2008-07-08  7:56   ` [PATCH 2/3] stop_machine: simplify Rusty Russell
2008-07-08  8:01     ` [PATCH 3/3] stop_machine: use cpu mask rather than magic numbers Rusty Russell
2008-07-08 11:44     ` [PATCH 2/3] stop_machine: simplify Akinobu Mita
2008-07-08 13:11       ` Rusty Russell
2008-07-08 15:02         ` Akinobu Mita
2008-07-09  2:18           ` Rusty Russell
2008-07-08 14:27     ` Mathieu Desnoyers
2008-07-09  2:11       ` Rusty Russell
2008-07-09 12:42         ` Mathieu Desnoyers
2008-07-10  0:30     ` Max Krasnyansky
2008-07-11  7:51       ` Rusty Russell
2008-07-11 13:12         ` Mathieu Desnoyers
2008-07-12  5:07           ` Rusty Russell
2008-07-10 21:07     ` [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace Milton Miller
2008-07-11  6:43       ` Rusty Russell
2008-07-11  7:46       ` Ingo Molnar
2008-07-11  8:55         ` Ingo Molnar
2008-07-11 12:34         ` Rusty Russell
2008-07-08 16:21 ` [PATCH 0/3] stop_machine enhancements and simplifications Christian Borntraeger
2008-07-08 20:10 ` Jason Baron
2008-07-09  3:29   ` Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).