linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch v2 0/2] enhance stop machine infrastructure for MTRR rendezvous sequence
@ 2011-06-07 20:14 Suresh Siddha
  2011-06-07 20:14 ` [patch v2 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
  2011-06-07 20:14 ` [patch v2 2/2] x86, mtrr: use __stop_machine() for doing MTRR rendezvous Suresh Siddha
  0 siblings, 2 replies; 6+ messages in thread
From: Suresh Siddha @ 2011-06-07 20:14 UTC (permalink / raw)
  To: mingo, tglx, hpa, trenn, prarit, tj
  Cc: linux-kernel, suresh.b.siddha, youquan.song

First patch enhance the stop machine infrastructure so that we
can call __stop_machine() from the cpu hotplug path, where the calling
cpu is not yet online. We do allow this for already for !CONFIG_SMP. So this
patch brings the CONFIG_SMP behavior inline with !CONFIG_SMP

Second patch uses the enhanced __stop_machine() to implement the x86 MTRR
init rendezvous sequence and thus remove the duplicate implementation
of stop machine using stop_one_cpu_nowait(). This duplicate implementation
of stop machine can potentially lead to deadlock if there is a parallel
system wide rendezvous using __stop_machine().

Both these address one of the deadlocks mentioned in the
https://bugzilla.novell.com/show_bug.cgi?id=672008

Changes from v1:
* Use stop_cpu thread enabled status to find out if the cpu is online/offline,
  instead of using cpu_online(smp_processor_id()). This avoids a false
  positive of using smp_processor_id() from preemptible section.

thanks,
suresh


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [patch v2 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path
  2011-06-07 20:14 [patch v2 0/2] enhance stop machine infrastructure for MTRR rendezvous sequence Suresh Siddha
@ 2011-06-07 20:14 ` Suresh Siddha
  2011-06-08 19:20   ` Ingo Molnar
  2011-06-09  9:54   ` Tejun Heo
  2011-06-07 20:14 ` [patch v2 2/2] x86, mtrr: use __stop_machine() for doing MTRR rendezvous Suresh Siddha
  1 sibling, 2 replies; 6+ messages in thread
From: Suresh Siddha @ 2011-06-07 20:14 UTC (permalink / raw)
  To: mingo, tglx, hpa, trenn, prarit, tj
  Cc: linux-kernel, suresh.b.siddha, youquan.song, stable

[-- Attachment #1: stop_machine_from_cpu_hotplug_path.patch --]
[-- Type: text/plain, Size: 4752 bytes --]

Currently stop machine infrastructure can be called only from a cpu that is
online. But for !CONFIG_SMP, we do allow calling __stop_machine() before the
cpu is online.

x86 for example requires stop machine infrastructure in the cpu online path
and currently implements its own stop machine (using stop_one_cpu_nowait())
for MTRR initialization in the cpu online path.

Enhance the __stop_machine() so that it can be called before the cpu
is onlined. This will pave the way for code consolidation and address potential
deadlocks caused by multiple mechanisms of doing system wide rendezvous.

This will also address the behavioral differences of __stop_machine()
between SMP and UP builds.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: stable@kernel.org    # v2.6.35+
---
 kernel/stop_machine.c |   62 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 57 insertions(+), 5 deletions(-)

Index: linux-2.6-tip/kernel/stop_machine.c
===================================================================
--- linux-2.6-tip.orig/kernel/stop_machine.c
+++ linux-2.6-tip/kernel/stop_machine.c
@@ -136,12 +136,35 @@ void stop_one_cpu_nowait(unsigned int cp
 static DEFINE_MUTEX(stop_cpus_mutex);
 static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 
+/**
+ * __stop_cpus - stop multiple cpus
+ * @cpumask: cpus to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on online cpus in @cpumask. If @cpumask is NULL, @fn
+ * is run on all the online cpus including the current cpu (even if it
+ * is not online).
+ * On each target cpu, @fn is run in a process context (except when run on
+ * the cpu that is in the process of coming online, in which case @fn is run
+ * in the same context as __stop_cpus()) with the highest priority
+ * preempting any task on the cpu and monopolizing it.  This function
+ * returns after all executions are complete.
+ */
 int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 {
+	int online = percpu_read(cpu_stopper.enabled);
+	int include_this_offline = 0;
 	struct cpu_stop_work *work;
 	struct cpu_stop_done done;
+	unsigned int weight;
 	unsigned int cpu;
 
+	if (!cpumask) {
+		cpumask = cpu_online_mask;
+		include_this_offline = 1;
+	}
+
 	/* initialize works and done */
 	for_each_cpu(cpu, cpumask) {
 		work = &per_cpu(stop_cpus_work, cpu);
@@ -149,7 +172,12 @@ int __stop_cpus(const struct cpumask *cp
 		work->arg = arg;
 		work->done = &done;
 	}
-	cpu_stop_init_done(&done, cpumask_weight(cpumask));
+
+	weight = cpumask_weight(cpumask);
+	if (!online && include_this_offline)
+		weight++;
+
+	cpu_stop_init_done(&done, weight);
 
 	/*
 	 * Disable preemption while queueing to avoid getting
@@ -162,7 +190,20 @@ int __stop_cpus(const struct cpumask *cp
 				    &per_cpu(stop_cpus_work, cpu));
 	preempt_enable();
 
-	wait_for_completion(&done.completion);
+	if (online)
+		wait_for_completion(&done.completion);
+	else {
+		/*
+		 * This cpu is not yet online. If @fn needs to be run on this
+		 * cpu, run it now. Also, we can't afford to sleep here,
+		 * so poll till the work is completed on all the cpu's.
+		 */
+		if (include_this_offline)
+			fn(arg);
+		while (atomic_read(&done.nr_todo) > 1)
+			cpu_relax();
+	}
+
 	return done.executed ? done.ret : -ENOENT;
 }
 
@@ -431,6 +472,7 @@ static int stop_machine_cpu_stop(void *d
 	struct stop_machine_data *smdata = data;
 	enum stopmachine_state curstate = STOPMACHINE_NONE;
 	int cpu = smp_processor_id(), err = 0;
+	unsigned long flags = 0;
 	bool is_active;
 
 	if (!smdata->active_cpus)
@@ -446,7 +488,7 @@ static int stop_machine_cpu_stop(void *d
 			curstate = smdata->state;
 			switch (curstate) {
 			case STOPMACHINE_DISABLE_IRQ:
-				local_irq_disable();
+				local_irq_save(flags);
 				hard_irq_disable();
 				break;
 			case STOPMACHINE_RUN:
@@ -460,7 +502,7 @@ static int stop_machine_cpu_stop(void *d
 		}
 	} while (curstate != STOPMACHINE_EXIT);
 
-	local_irq_enable();
+	local_irq_restore(flags);
 	return err;
 }
 
@@ -470,9 +512,19 @@ int __stop_machine(int (*fn)(void *), vo
 					    .num_threads = num_online_cpus(),
 					    .active_cpus = cpus };
 
+	/* Include the calling cpu that might not be online yet. */
+	if (!percpu_read(cpu_stopper.enabled))
+		smdata.num_threads++;
+
 	/* Set the initial state and stop all online cpus. */
 	set_state(&smdata, STOPMACHINE_PREPARE);
-	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
+
+	if (percpu_read(cpu_stopper.enabled))
+		return stop_cpus(cpu_online_mask, stop_machine_cpu_stop,
+				 &smdata);
+	else
+		return __stop_cpus(NULL, stop_machine_cpu_stop,
+				   &smdata);
 }
 
 int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [patch v2 2/2] x86, mtrr: use __stop_machine() for doing MTRR rendezvous
  2011-06-07 20:14 [patch v2 0/2] enhance stop machine infrastructure for MTRR rendezvous sequence Suresh Siddha
  2011-06-07 20:14 ` [patch v2 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
@ 2011-06-07 20:14 ` Suresh Siddha
  1 sibling, 0 replies; 6+ messages in thread
From: Suresh Siddha @ 2011-06-07 20:14 UTC (permalink / raw)
  To: mingo, tglx, hpa, trenn, prarit, tj
  Cc: linux-kernel, suresh.b.siddha, youquan.song, stable

[-- Attachment #1: use_stop_machine_for_mtrr_rendezvous.patch --]
[-- Type: text/plain, Size: 7320 bytes --]

MTRR rendezvous seqeuence using stop_one_cpu_nowait() can potentially
happen in parallel with another system wide rendezvous using
stop_machine(). This can lead to deadlock.

MTRR rendezvous sequence is not implemened using stop_machine() before, as this
gets called both from the process context aswell as the cpu online paths
(where the cpu has not come online and the interrupts are disabled etc).

Now that __stop_machine() works even when the calling cpu is not online,
use __stop_machine() to implement the MTRR rendezvous sequence. This
will consolidate the code aswell as avoid the above mentioned deadlock.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: stable@kernel.org    # v2.6.35+
---
 arch/x86/kernel/cpu/mtrr/main.c |  154 +++++++---------------------------------
 1 file changed, 27 insertions(+), 127 deletions(-)

Index: tree/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- tree.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ tree/arch/x86/kernel/cpu/mtrr/main.c
@@ -137,18 +137,15 @@ static void __init init_table(void)
 }
 
 struct set_mtrr_data {
-	atomic_t	count;
-	atomic_t	gate;
 	unsigned long	smp_base;
 	unsigned long	smp_size;
 	unsigned int	smp_reg;
 	mtrr_type	smp_type;
 };
 
-static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work);
-
 /**
- * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs.
+ * mtrr_work_handler - Work done in the synchronisation handler. Executed by
+ * all the CPUs.
  * @info: pointer to mtrr configuration data
  *
  * Returns nothing.
@@ -157,35 +154,26 @@ static int mtrr_work_handler(void *info)
 {
 #ifdef CONFIG_SMP
 	struct set_mtrr_data *data = info;
-	unsigned long flags;
-
-	atomic_dec(&data->count);
-	while (!atomic_read(&data->gate))
-		cpu_relax();
-
-	local_irq_save(flags);
 
-	atomic_dec(&data->count);
-	while (atomic_read(&data->gate))
-		cpu_relax();
-
-	/*  The master has cleared me to execute  */
+	/*
+	 * We use this same function to initialize the mtrrs during boot,
+	 * resume, runtime cpu online and on an explicit request to set a
+	 * specific MTRR.
+	 *
+	 * During boot or suspend, the state of the boot cpu's mtrrs has been
+	 * saved, and we want to replicate that across all the cpus that come
+	 * online (either at the end of boot or resume or during a runtime cpu
+	 * online). If we're doing that, @reg is set to something special and on
+	 * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
+	 * started the boot/resume sequence, this might be a duplicate
+	 * set_all()).
+	 */
 	if (data->smp_reg != ~0U) {
 		mtrr_if->set(data->smp_reg, data->smp_base,
 			     data->smp_size, data->smp_type);
-	} else if (mtrr_aps_delayed_init) {
-		/*
-		 * Initialize the MTRRs inaddition to the synchronisation.
-		 */
+	} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
 		mtrr_if->set_all();
 	}
-
-	atomic_dec(&data->count);
-	while (!atomic_read(&data->gate))
-		cpu_relax();
-
-	atomic_dec(&data->count);
-	local_irq_restore(flags);
 #endif
 	return 0;
 }
@@ -223,20 +211,11 @@ static inline int types_compatible(mtrr_
  * 14. Wait for buddies to catch up
  * 15. Enable interrupts.
  *
- * What does that mean for us? Well, first we set data.count to the number
- * of CPUs. As each CPU announces that it started the rendezvous handler by
- * decrementing the count, We reset data.count and set the data.gate flag
- * allowing all the cpu's to proceed with the work. As each cpu disables
- * interrupts, it'll decrement data.count once. We wait until it hits 0 and
- * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they
- * are waiting for that flag to be cleared. Once it's cleared, each
- * CPU goes through the transition of updating MTRRs.
- * The CPU vendors may each do it differently,
- * so we call mtrr_if->set() callback and let them take care of it.
- * When they're done, they again decrement data->count and wait for data.gate
- * to be set.
- * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
- * Everyone then enables interrupts and we all continue on.
+ * What does that mean for us? Well, __stop_machine() will ensure that
+ * the rendezvous handler is started on each CPU. And in lockstep they
+ * do the state transition of disabling interrupts, updating MTRR's
+ * (the CPU vendors may each do it differently, so we call mtrr_if->set()
+ * callback and let them take care of it.) and enabling interrupts.
  *
  * Note that the mechanism is the same for UP systems, too; all the SMP stuff
  * becomes nops.
@@ -244,92 +223,13 @@ static inline int types_compatible(mtrr_
 static void
 set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
 {
-	struct set_mtrr_data data;
-	unsigned long flags;
-	int cpu;
-
-	preempt_disable();
-
-	data.smp_reg = reg;
-	data.smp_base = base;
-	data.smp_size = size;
-	data.smp_type = type;
-	atomic_set(&data.count, num_booting_cpus() - 1);
-
-	/* Make sure data.count is visible before unleashing other CPUs */
-	smp_wmb();
-	atomic_set(&data.gate, 0);
-
-	/* Start the ball rolling on other CPUs */
-	for_each_online_cpu(cpu) {
-		struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu);
-
-		if (cpu == smp_processor_id())
-			continue;
-
-		stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work);
-	}
-
-
-	while (atomic_read(&data.count))
-		cpu_relax();
-
-	/* Ok, reset count and toggle gate */
-	atomic_set(&data.count, num_booting_cpus() - 1);
-	smp_wmb();
-	atomic_set(&data.gate, 1);
-
-	local_irq_save(flags);
-
-	while (atomic_read(&data.count))
-		cpu_relax();
-
-	/* Ok, reset count and toggle gate */
-	atomic_set(&data.count, num_booting_cpus() - 1);
-	smp_wmb();
-	atomic_set(&data.gate, 0);
-
-	/* Do our MTRR business */
-
-	/*
-	 * HACK!
-	 *
-	 * We use this same function to initialize the mtrrs during boot,
-	 * resume, runtime cpu online and on an explicit request to set a
-	 * specific MTRR.
-	 *
-	 * During boot or suspend, the state of the boot cpu's mtrrs has been
-	 * saved, and we want to replicate that across all the cpus that come
-	 * online (either at the end of boot or resume or during a runtime cpu
-	 * online). If we're doing that, @reg is set to something special and on
-	 * this cpu we still do mtrr_if->set_all(). During boot/resume, this
-	 * is unnecessary if at this point we are still on the cpu that started
-	 * the boot/resume sequence. But there is no guarantee that we are still
-	 * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be
-	 * sure that we are in sync with everyone else.
-	 */
-	if (reg != ~0U)
-		mtrr_if->set(reg, base, size, type);
-	else
-		mtrr_if->set_all();
-
-	/* Wait for the others */
-	while (atomic_read(&data.count))
-		cpu_relax();
-
-	atomic_set(&data.count, num_booting_cpus() - 1);
-	smp_wmb();
-	atomic_set(&data.gate, 1);
-
-	/*
-	 * Wait here for everyone to have seen the gate change
-	 * So we're the last ones to touch 'data'
-	 */
-	while (atomic_read(&data.count))
-		cpu_relax();
+	struct set_mtrr_data data = { .smp_reg = reg,
+				      .smp_base = base,
+				      .smp_size = size,
+				      .smp_type = type
+				    };
 
-	local_irq_restore(flags);
-	preempt_enable();
+	__stop_machine(mtrr_work_handler, &data, cpu_callout_mask);
 }
 
 /**



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch v2 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path
  2011-06-07 20:14 ` [patch v2 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
@ 2011-06-08 19:20   ` Ingo Molnar
  2011-06-09  9:54   ` Tejun Heo
  1 sibling, 0 replies; 6+ messages in thread
From: Ingo Molnar @ 2011-06-08 19:20 UTC (permalink / raw)
  To: Suresh Siddha, Rusty Russell, Tejun Heo
  Cc: tglx, hpa, trenn, prarit, tj, linux-kernel, youquan.song, stable


Rusty, Tejun, what do you think about the patch below?

Thanks,

	Ingo

* Suresh Siddha <suresh.b.siddha@intel.com> wrote:

> Currently stop machine infrastructure can be called only from a cpu that is
> online. But for !CONFIG_SMP, we do allow calling __stop_machine() before the
> cpu is online.
> 
> x86 for example requires stop machine infrastructure in the cpu online path
> and currently implements its own stop machine (using stop_one_cpu_nowait())
> for MTRR initialization in the cpu online path.
> 
> Enhance the __stop_machine() so that it can be called before the cpu
> is onlined. This will pave the way for code consolidation and address potential
> deadlocks caused by multiple mechanisms of doing system wide rendezvous.
> 
> This will also address the behavioral differences of __stop_machine()
> between SMP and UP builds.
> 
> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
> Cc: stable@kernel.org    # v2.6.35+
> ---
>  kernel/stop_machine.c |   62 +++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 57 insertions(+), 5 deletions(-)
> 
> Index: linux-2.6-tip/kernel/stop_machine.c
> ===================================================================
> --- linux-2.6-tip.orig/kernel/stop_machine.c
> +++ linux-2.6-tip/kernel/stop_machine.c
> @@ -136,12 +136,35 @@ void stop_one_cpu_nowait(unsigned int cp
>  static DEFINE_MUTEX(stop_cpus_mutex);
>  static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
>  
> +/**
> + * __stop_cpus - stop multiple cpus
> + * @cpumask: cpus to stop
> + * @fn: function to execute
> + * @arg: argument to @fn
> + *
> + * Execute @fn(@arg) on online cpus in @cpumask. If @cpumask is NULL, @fn
> + * is run on all the online cpus including the current cpu (even if it
> + * is not online).
> + * On each target cpu, @fn is run in a process context (except when run on
> + * the cpu that is in the process of coming online, in which case @fn is run
> + * in the same context as __stop_cpus()) with the highest priority
> + * preempting any task on the cpu and monopolizing it.  This function
> + * returns after all executions are complete.
> + */
>  int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
>  {
> +	int online = percpu_read(cpu_stopper.enabled);
> +	int include_this_offline = 0;
>  	struct cpu_stop_work *work;
>  	struct cpu_stop_done done;
> +	unsigned int weight;
>  	unsigned int cpu;
>  
> +	if (!cpumask) {
> +		cpumask = cpu_online_mask;
> +		include_this_offline = 1;
> +	}
> +
>  	/* initialize works and done */
>  	for_each_cpu(cpu, cpumask) {
>  		work = &per_cpu(stop_cpus_work, cpu);
> @@ -149,7 +172,12 @@ int __stop_cpus(const struct cpumask *cp
>  		work->arg = arg;
>  		work->done = &done;
>  	}
> -	cpu_stop_init_done(&done, cpumask_weight(cpumask));
> +
> +	weight = cpumask_weight(cpumask);
> +	if (!online && include_this_offline)
> +		weight++;
> +
> +	cpu_stop_init_done(&done, weight);
>  
>  	/*
>  	 * Disable preemption while queueing to avoid getting
> @@ -162,7 +190,20 @@ int __stop_cpus(const struct cpumask *cp
>  				    &per_cpu(stop_cpus_work, cpu));
>  	preempt_enable();
>  
> -	wait_for_completion(&done.completion);
> +	if (online)
> +		wait_for_completion(&done.completion);
> +	else {
> +		/*
> +		 * This cpu is not yet online. If @fn needs to be run on this
> +		 * cpu, run it now. Also, we can't afford to sleep here,
> +		 * so poll till the work is completed on all the cpu's.
> +		 */
> +		if (include_this_offline)
> +			fn(arg);
> +		while (atomic_read(&done.nr_todo) > 1)
> +			cpu_relax();
> +	}
> +
>  	return done.executed ? done.ret : -ENOENT;
>  }
>  
> @@ -431,6 +472,7 @@ static int stop_machine_cpu_stop(void *d
>  	struct stop_machine_data *smdata = data;
>  	enum stopmachine_state curstate = STOPMACHINE_NONE;
>  	int cpu = smp_processor_id(), err = 0;
> +	unsigned long flags = 0;
>  	bool is_active;
>  
>  	if (!smdata->active_cpus)
> @@ -446,7 +488,7 @@ static int stop_machine_cpu_stop(void *d
>  			curstate = smdata->state;
>  			switch (curstate) {
>  			case STOPMACHINE_DISABLE_IRQ:
> -				local_irq_disable();
> +				local_irq_save(flags);
>  				hard_irq_disable();
>  				break;
>  			case STOPMACHINE_RUN:
> @@ -460,7 +502,7 @@ static int stop_machine_cpu_stop(void *d
>  		}
>  	} while (curstate != STOPMACHINE_EXIT);
>  
> -	local_irq_enable();
> +	local_irq_restore(flags);
>  	return err;
>  }
>  
> @@ -470,9 +512,19 @@ int __stop_machine(int (*fn)(void *), vo
>  					    .num_threads = num_online_cpus(),
>  					    .active_cpus = cpus };
>  
> +	/* Include the calling cpu that might not be online yet. */
> +	if (!percpu_read(cpu_stopper.enabled))
> +		smdata.num_threads++;
> +
>  	/* Set the initial state and stop all online cpus. */
>  	set_state(&smdata, STOPMACHINE_PREPARE);
> -	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
> +
> +	if (percpu_read(cpu_stopper.enabled))
> +		return stop_cpus(cpu_online_mask, stop_machine_cpu_stop,
> +				 &smdata);
> +	else
> +		return __stop_cpus(NULL, stop_machine_cpu_stop,
> +				   &smdata);
>  }
>  
>  int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
> 

-- 
Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch v2 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path
  2011-06-07 20:14 ` [patch v2 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
  2011-06-08 19:20   ` Ingo Molnar
@ 2011-06-09  9:54   ` Tejun Heo
  2011-06-09 22:50     ` Suresh Siddha
  1 sibling, 1 reply; 6+ messages in thread
From: Tejun Heo @ 2011-06-09  9:54 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: mingo, tglx, hpa, trenn, prarit, linux-kernel, youquan.song, stable

Hello, Suresh, Ingo.

On Tue, Jun 07, 2011 at 01:14:12PM -0700, Suresh Siddha wrote:
> Currently stop machine infrastructure can be called only from a cpu that is
> online. But for !CONFIG_SMP, we do allow calling __stop_machine() before the
> cpu is online.
> 
> x86 for example requires stop machine infrastructure in the cpu online path
> and currently implements its own stop machine (using stop_one_cpu_nowait())
> for MTRR initialization in the cpu online path.
> 
> Enhance the __stop_machine() so that it can be called before the cpu
> is onlined. This will pave the way for code consolidation and address potential
> deadlocks caused by multiple mechanisms of doing system wide rendezvous.
> 
> This will also address the behavioral differences of __stop_machine()
> between SMP and UP builds.
> 
> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
> Cc: stable@kernel.org    # v2.6.35+

First of all, I agree this is the correct thing to do but I don't
agree with some of the details.  Also, this is slightly scary for
-stable.  Maybe we should opt for something less intrusive for
-stable?

> +/**
> + * __stop_cpus - stop multiple cpus
> + * @cpumask: cpus to stop
> + * @fn: function to execute
> + * @arg: argument to @fn
> + *
> + * Execute @fn(@arg) on online cpus in @cpumask. If @cpumask is NULL, @fn
> + * is run on all the online cpus including the current cpu (even if it
> + * is not online).
> + * On each target cpu, @fn is run in a process context (except when run on
> + * the cpu that is in the process of coming online, in which case @fn is run
> + * in the same context as __stop_cpus()) with the highest priority
> + * preempting any task on the cpu and monopolizing it.  This function
> + * returns after all executions are complete.
> + */

It's minor but can you please follow the comment style in the same
file?  ie. Blank line between paragraphs, separate CONTEXT: and
RETURNS: sections.  At the second thought, why is this function even
exported?  It doesn't seem to have any out-of-file user left.  Maybe
best to make it static?

>  int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
>  {
> +	int online = percpu_read(cpu_stopper.enabled);
> +	int include_this_offline = 0;
>  	struct cpu_stop_work *work;
>  	struct cpu_stop_done done;
> +	unsigned int weight;
>  	unsigned int cpu;
>  
> +	if (!cpumask) {
> +		cpumask = cpu_online_mask;
> +		include_this_offline = 1;
> +	}

This seems a bit too subtle.  I would much prefer if it were much more
explicit than using non-obvious combination of conditions to trigger
this special behavior.

Hmm... Wouldn't it be better to change cpu_stop_queue_work() consider
whether the local CPU is offline and then add cpu_stop_wait_done()
which does wait_for_completion() if local is online and otherwise
execute fn locally, call cpu_stop_signal_done() and wait in busy loop?

That would make the whole thing much more generic and easier to
describe.  The current implementation seems quite hacky/subtle and
doesn't fit well with the rest.  It would be much better if we can
just state "if used from local CPU which is not online and the target
@cpumask includes the local CPU, the work item is executed on-stack
and completion is waited in busy-loop" for all cpu_stop functions.

Also, it would be better to factor out work item execution and
completion from cpu_stopper_thread() and call that instead of invoking
fn(arg) directly.

Thank you.

-- 
tejun

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch v2 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path
  2011-06-09  9:54   ` Tejun Heo
@ 2011-06-09 22:50     ` Suresh Siddha
  0 siblings, 0 replies; 6+ messages in thread
From: Suresh Siddha @ 2011-06-09 22:50 UTC (permalink / raw)
  To: Tejun Heo
  Cc: mingo, tglx, hpa, trenn, prarit, linux-kernel, Song, Youquan, stable

On Thu, 2011-06-09 at 02:54 -0700, Tejun Heo wrote:
> First of all, I agree this is the correct thing to do but I don't
> agree with some of the details.  Also, this is slightly scary for
> -stable.  Maybe we should opt for something less intrusive for
> -stable?

Probably you noticed on the suse bug that I first proposed a simple non
intrusive patch to address this problem, before actually posting these
two complete patches. I am ok in pushing that simple patch (which is
appended below to this thread) for -stable and consider these two
patches for mainline.  Ingo, HPA, if we go that route, drop the -stable
tag from the v3 version of the patches.

But I do think these two patches are not that complex and if they are
good for mainline, they should be good aswell for -stable. Anyways I am
open either way (and appended the stable only patch in the end).

> 
> > +/**
> > + * __stop_cpus - stop multiple cpus
> > + * @cpumask: cpus to stop
> > + * @fn: function to execute
> > + * @arg: argument to @fn
> > + *
> > + * Execute @fn(@arg) on online cpus in @cpumask. If @cpumask is NULL, @fn
> > + * is run on all the online cpus including the current cpu (even if it
> > + * is not online).
> > + * On each target cpu, @fn is run in a process context (except when run on
> > + * the cpu that is in the process of coming online, in which case @fn is run
> > + * in the same context as __stop_cpus()) with the highest priority
> > + * preempting any task on the cpu and monopolizing it.  This function
> > + * returns after all executions are complete.
> > + */
> 
> It's minor but can you please follow the comment style in the same
> file?  ie. Blank line between paragraphs, separate CONTEXT: and
> RETURNS: sections.  At the second thought, why is this function even
> exported?  It doesn't seem to have any out-of-file user left.  Maybe
> best to make it static?

Made it static in v3.


> >  int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
> >  {
> > +	int online = percpu_read(cpu_stopper.enabled);
> > +	int include_this_offline = 0;
> >  	struct cpu_stop_work *work;
> >  	struct cpu_stop_done done;
> > +	unsigned int weight;
> >  	unsigned int cpu;
> >  
> > +	if (!cpumask) {
> > +		cpumask = cpu_online_mask;
> > +		include_this_offline = 1;
> > +	}
> 
> This seems a bit too subtle.  I would much prefer if it were much more
> explicit than using non-obvious combination of conditions to trigger
> this special behavior.

I first used cpu_callout_mask when I did the patches. But that is not
available for generic code. And ended up using NULL mask.

But v3 code uses cpu_all_mask, that should cleanup all this code.

> 
> Hmm... Wouldn't it be better to change cpu_stop_queue_work() consider
> whether the local CPU is offline and then add cpu_stop_wait_done()
> which does wait_for_completion() if local is online and otherwise
> execute fn locally, call cpu_stop_signal_done() and wait in busy loop?
> 
> That would make the whole thing much more generic and easier to
> describe.  The current implementation seems quite hacky/subtle and
> doesn't fit well with the rest.  It would be much better if we can
> just state "if used from local CPU which is not online and the target
> @cpumask includes the local CPU, the work item is executed on-stack
> and completion is waited in busy-loop" for all cpu_stop functions.

Did these changes in v3.

> 
> Also, it would be better to factor out work item execution and
> completion from cpu_stopper_thread() and call that instead of invoking
> fn(arg) directly.
> 

I was not sure if it was the right thing to call cpu_stopper_thread()
which was doing set_current_state() etc from an arbitrary context. I
don't see the point. Please review v3 and let me know if I missed
anything.

thanks,
suresh
---

From: Suresh Siddha <suresh.b.siddha@intel.com>
To: stable@kernel.org
Subject: [stable only 2.6.35 - 2.6.39] x86, mtrr: lock stop machine during MTRR rendezvous sequence

MTRR rendezvous seqeuence using stop_one_cpu_nowait() can potentially
happen in parallel with another system wide rendezvous using
stop_machine(). This can lead to deadlock (The order in which
works are queued can be different on different cpu's. Some cpu's
will be running the first rendezvous handler and others will be running
the second rendezvous handler. Each set waiting for the other set to join
for the system wide rendezvous, leading to a deadlock).

MTRR rendezvous sequence is not implemened using stop_machine() as this
gets called both from the process context aswell as the cpu online paths
(where the cpu has not come online and the interrupts are disabled etc).
stop_machine() works with only online cpus.

For now, take the stop_machine mutex in the MTRR rendezvous sequence that
gets called from an online cpu (here we are in the process context
and can potentially sleep while taking the mutex). And the MTRR rendezvous
that gets triggered during cpu online doesn't need to take this stop_machine
lock (as the stop_machine() already ensures that there is no cpu hotplug
going on in parallel by doing get_online_cpus())

For mainline, we will pursue a cleaner solution of extending the stop_machine()
infrastructure to handle the case where the calling cpu is still not online and
use this for MTRR rendezvous sequence.

fixes: https://bugzilla.novell.com/show_bug.cgi?id=672008

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: stable@kernel.org    # v2.6.35 - v2.6.39
---
 arch/x86/kernel/cpu/mtrr/main.c |   16 +++++++++++++++-
 include/linux/stop_machine.h    |   11 +++++++++++
 kernel/stop_machine.c           |   10 ++++++++++
 3 files changed, 36 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 929739a..eb06b5f 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -244,9 +244,21 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
 static void
 set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
 {
+	int cpu = raw_smp_processor_id();
+	int online = cpu_online(cpu);
 	struct set_mtrr_data data;
 	unsigned long flags;
-	int cpu;
+
+	/*
+	 * If we are not yet online, then there can be no stop_machine() in
+	 * parallel. Stop machine ensures that there is no CPU hotplug
+	 * happening in parallel by using get_online_cpus().
+	 *
+	 * Otherwise, we need to prevent a stop_machine() happening in parallel
+	 * by taking this lock.
+	 */
+	if (online)
+		lock_stop_cpus();
 
 	preempt_disable();
 
@@ -330,6 +342,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
 
 	local_irq_restore(flags);
 	preempt_enable();
+	if (online)
+		unlock_stop_cpus();
 }
 
 /**
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 092dc9b..caacea9 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -33,6 +33,9 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 
+void lock_stop_cpus(void);
+void unlock_stop_cpus(void);
+
 #else	/* CONFIG_SMP */
 
 #include <linux/workqueue.h>
@@ -88,6 +91,14 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
 	return stop_cpus(cpumask, fn, arg);
 }
 
+static inline void lock_stop_cpus(void)
+{
+}
+
+static inline void unlock_stop_cpus(void)
+{
+}
+
 #endif	/* CONFIG_SMP */
 
 /*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index e3516b2..abbe6e7 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -136,6 +136,16 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 static DEFINE_MUTEX(stop_cpus_mutex);
 static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 
+void lock_stop_cpus(void)
+{
+	mutex_lock(&stop_cpus_mutex);
+}
+
+void unlock_stop_cpus(void)
+{
+	mutex_unlock(&stop_cpus_mutex);
+}
+
 int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 {
 	struct cpu_stop_work *work;



^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-06-09 22:49 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-07 20:14 [patch v2 0/2] enhance stop machine infrastructure for MTRR rendezvous sequence Suresh Siddha
2011-06-07 20:14 ` [patch v2 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
2011-06-08 19:20   ` Ingo Molnar
2011-06-09  9:54   ` Tejun Heo
2011-06-09 22:50     ` Suresh Siddha
2011-06-07 20:14 ` [patch v2 2/2] x86, mtrr: use __stop_machine() for doing MTRR rendezvous Suresh Siddha

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).