[patch 0/2] enhance stop machine infrastructure for MTRR rendezvous sequence

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [patch 0/2] enhance stop machine infrastructure for MTRR rendezvous sequence
@ 2011-06-06 23:16 Suresh Siddha
  2011-06-06 23:16 ` [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
  2011-06-06 23:16 ` [patch 2/2] x86, mtrr: use __stop_machine() for doing MTRR rendezvous Suresh Siddha
  0 siblings, 2 replies; 9+ messages in thread
From: Suresh Siddha @ 2011-06-06 23:16 UTC (permalink / raw)
  To: mingo, tglx, hpa, trenn, prarit, tj
  Cc: linux-kernel, suresh.b.siddha, youquan.song

First patch enhance the stop machine infrastructure so that we
can call __stop_machine() from the cpu hotplug path, where the calling
cpu is not yet online. We do allow this for already for !CONFIG_SMP. So this
patch brings the CONFIG_SMP behavior inline with !CONFIG_SMP

Second patch uses the enhanced __stop_machine() to implement the x86 MTRR
init rendezvous sequence and thus remove the duplicate implementation
of stop machine using stop_one_cpu_nowait(). This duplicate implementation
of stop machine can potentially lead to deadlock if there is a parallel
system wide rendezvous using __stop_machine().

Both these address one of the deadlocks mentioned in the
https://bugzilla.novell.com/show_bug.cgi?id=672008

thanks,
suresh

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path
  2011-06-06 23:16 [patch 0/2] enhance stop machine infrastructure for MTRR rendezvous sequence Suresh Siddha
@ 2011-06-06 23:16 ` Suresh Siddha
  2011-06-07  0:19   ` [tip:x86/urgent] x86, stop_machine: Enable " tip-bot for Suresh Siddha
  2011-06-07 18:02   ` [patch 1/2] stop_machine: enable " Ingo Molnar
  2011-06-06 23:16 ` [patch 2/2] x86, mtrr: use __stop_machine() for doing MTRR rendezvous Suresh Siddha
  1 sibling, 2 replies; 9+ messages in thread
From: Suresh Siddha @ 2011-06-06 23:16 UTC (permalink / raw)
  To: mingo, tglx, hpa, trenn, prarit, tj
  Cc: linux-kernel, suresh.b.siddha, youquan.song, stable

[-- Attachment #1: stop_machine_from_cpu_hotplug_path.patch --]
[-- Type: text/plain, Size: 4719 bytes --]

Currently stop machine infrastructure can be called only from a cpu that is
online. But for !CONFIG_SMP, we do allow calling __stop_machine() before the
cpu is online.

x86 for example requires stop machine infrastructure in the cpu online path
and currently implements its own stop machine (using stop_one_cpu_nowait())
for MTRR initialization in the cpu online path.

Enhance the __stop_machine() so that it can be called before the cpu
is onlined. This will pave the way for code consolidation and address potential
deadlocks caused by multiple mechanisms of doing system wide rendezvous.

This will also address the behavioral differences of __stop_machine()
between SMP and UP builds.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: stable@kernel.org    # v2.6.35+
---
 kernel/stop_machine.c |   62 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 57 insertions(+), 5 deletions(-)

Index: tree/kernel/stop_machine.c
===================================================================
--- tree.orig/kernel/stop_machine.c
+++ tree/kernel/stop_machine.c
@@ -136,12 +136,35 @@ void stop_one_cpu_nowait(unsigned int cp
 static DEFINE_MUTEX(stop_cpus_mutex);
 static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 
+/**
+ * __stop_cpus - stop multiple cpus
+ * @cpumask: cpus to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on online cpus in @cpumask. If @cpumask is NULL, @fn
+ * is run on all the online cpus including the current cpu (even if it
+ * is not online).
+ * On each target cpu, @fn is run in a process context (except when run on
+ * the cpu that is in the process of coming online, in which case @fn is run
+ * in the same context as __stop_cpus()) with the highest priority
+ * preempting any task on the cpu and monopolizing it.  This function
+ * returns after all executions are complete.
+ */
 int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 {
+	int online = cpu_online(smp_processor_id());
+	int include_this_offline = 0;
 	struct cpu_stop_work *work;
 	struct cpu_stop_done done;
+	unsigned int weight;
 	unsigned int cpu;
 
+	if (!cpumask) {
+		cpumask = cpu_online_mask;
+		include_this_offline = 1;
+	}
+
 	/* initialize works and done */
 	for_each_cpu(cpu, cpumask) {
 		work = &per_cpu(stop_cpus_work, cpu);
@@ -149,7 +172,12 @@ int __stop_cpus(const struct cpumask *cp
 		work->arg = arg;
 		work->done = &done;
 	}
-	cpu_stop_init_done(&done, cpumask_weight(cpumask));
+
+	weight = cpumask_weight(cpumask);
+	if (!online && include_this_offline)
+		weight++;
+
+	cpu_stop_init_done(&done, weight);
 
 	/*
 	 * Disable preemption while queueing to avoid getting
@@ -162,7 +190,20 @@ int __stop_cpus(const struct cpumask *cp
 				    &per_cpu(stop_cpus_work, cpu));
 	preempt_enable();
 
-	wait_for_completion(&done.completion);
+	if (online)
+		wait_for_completion(&done.completion);
+	else {
+		/*
+		 * This cpu is not yet online. If @fn needs to be run on this
+		 * cpu, run it now. Also, we can't afford to sleep here,
+		 * so poll till the work is completed on all the cpu's.
+		 */
+		if (include_this_offline)
+			fn(arg);
+		while (atomic_read(&done.nr_todo) > 1)
+			cpu_relax();
+	}
+
 	return done.executed ? done.ret : -ENOENT;
 }
 
@@ -431,6 +472,7 @@ static int stop_machine_cpu_stop(void *d
 	struct stop_machine_data *smdata = data;
 	enum stopmachine_state curstate = STOPMACHINE_NONE;
 	int cpu = smp_processor_id(), err = 0;
+	unsigned long flags = 0;
 	bool is_active;
 
 	if (!smdata->active_cpus)
@@ -446,7 +488,7 @@ static int stop_machine_cpu_stop(void *d
 			curstate = smdata->state;
 			switch (curstate) {
 			case STOPMACHINE_DISABLE_IRQ:
-				local_irq_disable();
+				local_irq_save(flags);
 				hard_irq_disable();
 				break;
 			case STOPMACHINE_RUN:
@@ -460,7 +502,7 @@ static int stop_machine_cpu_stop(void *d
 		}
 	} while (curstate != STOPMACHINE_EXIT);
 
-	local_irq_enable();
+	local_irq_restore(flags);
 	return err;
 }
 
@@ -470,9 +512,19 @@ int __stop_machine(int (*fn)(void *), vo
 					    .num_threads = num_online_cpus(),
 					    .active_cpus = cpus };
 
+	/* Include the calling cpu that might not be online yet. */
+	if (!cpu_online(smp_processor_id()))
+		smdata.num_threads++;
+
 	/* Set the initial state and stop all online cpus. */
 	set_state(&smdata, STOPMACHINE_PREPARE);
-	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
+
+	if (cpu_online(smp_processor_id()))
+		return stop_cpus(cpu_online_mask, stop_machine_cpu_stop,
+				 &smdata);
+	else
+		return __stop_cpus(NULL, stop_machine_cpu_stop,
+				   &smdata);
 }
 
 int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [patch 2/2] x86, mtrr: use __stop_machine() for doing MTRR rendezvous
  2011-06-06 23:16 [patch 0/2] enhance stop machine infrastructure for MTRR rendezvous sequence Suresh Siddha
  2011-06-06 23:16 ` [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
@ 2011-06-06 23:16 ` Suresh Siddha
  2011-06-07  0:19   ` [tip:x86/urgent] x86, mtrr: Use " tip-bot for Suresh Siddha
  1 sibling, 1 reply; 9+ messages in thread
From: Suresh Siddha @ 2011-06-06 23:16 UTC (permalink / raw)
  To: mingo, tglx, hpa, trenn, prarit, tj
  Cc: linux-kernel, suresh.b.siddha, youquan.song, stable

[-- Attachment #1: use_stop_machine_for_mtrr_rendezvous.patch --]
[-- Type: text/plain, Size: 7606 bytes --]

MTRR rendezvous sequence using stop_one_cpu_nowait() can potentially
happen in parallel with another system wide rendezvous using
stop_machine(). This can lead to deadlock (The order in which
works are queued can be different on different cpu's. Some cpu's
will be running the first rendezvous handler and others will be running
the second rendezvous handler. Each set waiting for the other set to join
for the system wide rendezvous, leading to a deadlock).

MTRR rendezvous sequence is not implemented using stop_machine() before, as this
gets called both from the process context aswell as the cpu online paths
(where the cpu has not come online and the interrupts are disabled etc).

Now that __stop_machine() works even when the calling cpu is not online,
use __stop_machine() to implement the MTRR rendezvous sequence. This
will consolidate the code aswell as avoid the above mentioned deadlock.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: stable@kernel.org    # v2.6.35+
---
 arch/x86/kernel/cpu/mtrr/main.c |  154 +++++++---------------------------------
 1 file changed, 27 insertions(+), 127 deletions(-)

Index: tree/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- tree.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ tree/arch/x86/kernel/cpu/mtrr/main.c
@@ -137,18 +137,15 @@ static void __init init_table(void)
 }
 
 struct set_mtrr_data {
-	atomic_t	count;
-	atomic_t	gate;
 	unsigned long	smp_base;
 	unsigned long	smp_size;
 	unsigned int	smp_reg;
 	mtrr_type	smp_type;
 };
 
-static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work);
-
 /**
- * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs.
+ * mtrr_work_handler - Work done in the synchronisation handler. Executed by
+ * all the CPUs.
  * @info: pointer to mtrr configuration data
  *
  * Returns nothing.
@@ -157,35 +154,26 @@ static int mtrr_work_handler(void *info)
 {
 #ifdef CONFIG_SMP
 	struct set_mtrr_data *data = info;
-	unsigned long flags;
-
-	atomic_dec(&data->count);
-	while (!atomic_read(&data->gate))
-		cpu_relax();
-
-	local_irq_save(flags);
 
-	atomic_dec(&data->count);
-	while (atomic_read(&data->gate))
-		cpu_relax();
-
-	/*  The master has cleared me to execute  */
+	/*
+	 * We use this same function to initialize the mtrrs during boot,
+	 * resume, runtime cpu online and on an explicit request to set a
+	 * specific MTRR.
+	 *
+	 * During boot or suspend, the state of the boot cpu's mtrrs has been
+	 * saved, and we want to replicate that across all the cpus that come
+	 * online (either at the end of boot or resume or during a runtime cpu
+	 * online). If we're doing that, @reg is set to something special and on
+	 * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
+	 * started the boot/resume sequence, this might be a duplicate
+	 * set_all()).
+	 */
 	if (data->smp_reg != ~0U) {
 		mtrr_if->set(data->smp_reg, data->smp_base,
 			     data->smp_size, data->smp_type);
-	} else if (mtrr_aps_delayed_init) {
-		/*
-		 * Initialize the MTRRs inaddition to the synchronisation.
-		 */
+	} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
 		mtrr_if->set_all();
 	}
-
-	atomic_dec(&data->count);
-	while (!atomic_read(&data->gate))
-		cpu_relax();
-
-	atomic_dec(&data->count);
-	local_irq_restore(flags);
 #endif
 	return 0;
 }
@@ -223,20 +211,11 @@ static inline int types_compatible(mtrr_
  * 14. Wait for buddies to catch up
  * 15. Enable interrupts.
  *
- * What does that mean for us? Well, first we set data.count to the number
- * of CPUs. As each CPU announces that it started the rendezvous handler by
- * decrementing the count, We reset data.count and set the data.gate flag
- * allowing all the cpu's to proceed with the work. As each cpu disables
- * interrupts, it'll decrement data.count once. We wait until it hits 0 and
- * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they
- * are waiting for that flag to be cleared. Once it's cleared, each
- * CPU goes through the transition of updating MTRRs.
- * The CPU vendors may each do it differently,
- * so we call mtrr_if->set() callback and let them take care of it.
- * When they're done, they again decrement data->count and wait for data.gate
- * to be set.
- * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
- * Everyone then enables interrupts and we all continue on.
+ * What does that mean for us? Well, __stop_machine() will ensure that
+ * the rendezvous handler is started on each CPU. And in lockstep they
+ * do the state transition of disabling interrupts, updating MTRR's
+ * (the CPU vendors may each do it differently, so we call mtrr_if->set()
+ * callback and let them take care of it.) and enabling interrupts.
  *
  * Note that the mechanism is the same for UP systems, too; all the SMP stuff
  * becomes nops.
@@ -244,92 +223,13 @@ static inline int types_compatible(mtrr_
 static void
 set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
 {
-	struct set_mtrr_data data;
-	unsigned long flags;
-	int cpu;
-
-	preempt_disable();
-
-	data.smp_reg = reg;
-	data.smp_base = base;
-	data.smp_size = size;
-	data.smp_type = type;
-	atomic_set(&data.count, num_booting_cpus() - 1);
-
-	/* Make sure data.count is visible before unleashing other CPUs */
-	smp_wmb();
-	atomic_set(&data.gate, 0);
-
-	/* Start the ball rolling on other CPUs */
-	for_each_online_cpu(cpu) {
-		struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu);
-
-		if (cpu == smp_processor_id())
-			continue;
-
-		stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work);
-	}
-
-
-	while (atomic_read(&data.count))
-		cpu_relax();
-
-	/* Ok, reset count and toggle gate */
-	atomic_set(&data.count, num_booting_cpus() - 1);
-	smp_wmb();
-	atomic_set(&data.gate, 1);
-
-	local_irq_save(flags);
-
-	while (atomic_read(&data.count))
-		cpu_relax();
-
-	/* Ok, reset count and toggle gate */
-	atomic_set(&data.count, num_booting_cpus() - 1);
-	smp_wmb();
-	atomic_set(&data.gate, 0);
-
-	/* Do our MTRR business */
-
-	/*
-	 * HACK!
-	 *
-	 * We use this same function to initialize the mtrrs during boot,
-	 * resume, runtime cpu online and on an explicit request to set a
-	 * specific MTRR.
-	 *
-	 * During boot or suspend, the state of the boot cpu's mtrrs has been
-	 * saved, and we want to replicate that across all the cpus that come
-	 * online (either at the end of boot or resume or during a runtime cpu
-	 * online). If we're doing that, @reg is set to something special and on
-	 * this cpu we still do mtrr_if->set_all(). During boot/resume, this
-	 * is unnecessary if at this point we are still on the cpu that started
-	 * the boot/resume sequence. But there is no guarantee that we are still
-	 * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be
-	 * sure that we are in sync with everyone else.
-	 */
-	if (reg != ~0U)
-		mtrr_if->set(reg, base, size, type);
-	else
-		mtrr_if->set_all();
-
-	/* Wait for the others */
-	while (atomic_read(&data.count))
-		cpu_relax();
-
-	atomic_set(&data.count, num_booting_cpus() - 1);
-	smp_wmb();
-	atomic_set(&data.gate, 1);
-
-	/*
-	 * Wait here for everyone to have seen the gate change
-	 * So we're the last ones to touch 'data'
-	 */
-	while (atomic_read(&data.count))
-		cpu_relax();
+	struct set_mtrr_data data = { .smp_reg = reg,
+				      .smp_base = base,
+				      .smp_size = size,
+				      .smp_type = type
+				    };
 
-	local_irq_restore(flags);
-	preempt_enable();
+	__stop_machine(mtrr_work_handler, &data, cpu_callout_mask);
 }
 
 /**



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tip:x86/urgent] x86, stop_machine: Enable __stop_machine() to be called from the cpu online path
  2011-06-06 23:16 ` [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
@ 2011-06-07  0:19   ` tip-bot for Suresh Siddha
  2011-06-07 18:02   ` [patch 1/2] stop_machine: enable " Ingo Molnar
  1 sibling, 0 replies; 9+ messages in thread
From: tip-bot for Suresh Siddha @ 2011-06-07  0:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, suresh.b.siddha, tglx, hpa

Commit-ID:  e9de5210f6a6a8b403035985b5a65e77e7004690
Gitweb:     http://git.kernel.org/tip/e9de5210f6a6a8b403035985b5a65e77e7004690
Author:     Suresh Siddha <suresh.b.siddha@intel.com>
AuthorDate: Mon, 6 Jun 2011 16:16:56 -0700
Committer:  H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Mon, 6 Jun 2011 16:42:09 -0700

x86, stop_machine: Enable __stop_machine() to be called from the cpu online path

Currently stop machine infrastructure can be called only from a cpu that is
online. But for !CONFIG_SMP, we do allow calling __stop_machine() before the
cpu is online.

x86 for example requires stop machine infrastructure in the cpu online path
and currently implements its own stop machine (using stop_one_cpu_nowait())
for MTRR initialization in the cpu online path.

Enhance the __stop_machine() so that it can be called before the cpu
is onlined. This will pave the way for code consolidation and address potential
deadlocks caused by multiple mechanisms of doing system wide rendezvous.

This will also address the behavioral differences of __stop_machine()
between SMP and UP builds.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/20110606231752.023885847@sbsiddha-MOBL3.sc.intel.com
Cc: stable@kernel.org    # v2.6.35+
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 kernel/stop_machine.c |   62 +++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index e3516b2..c78b0c2 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -136,12 +136,35 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 static DEFINE_MUTEX(stop_cpus_mutex);
 static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 
+/**
+ * __stop_cpus - stop multiple cpus
+ * @cpumask: cpus to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on online cpus in @cpumask. If @cpumask is NULL, @fn
+ * is run on all the online cpus including the current cpu (even if it
+ * is not online).
+ * On each target cpu, @fn is run in a process context (except when run on
+ * the cpu that is in the process of coming online, in which case @fn is run
+ * in the same context as __stop_cpus()) with the highest priority
+ * preempting any task on the cpu and monopolizing it.  This function
+ * returns after all executions are complete.
+ */
 int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 {
+	int online = cpu_online(smp_processor_id());
+	int include_this_offline = 0;
 	struct cpu_stop_work *work;
 	struct cpu_stop_done done;
+	unsigned int weight;
 	unsigned int cpu;
 
+	if (!cpumask) {
+		cpumask = cpu_online_mask;
+		include_this_offline = 1;
+	}
+
 	/* initialize works and done */
 	for_each_cpu(cpu, cpumask) {
 		work = &per_cpu(stop_cpus_work, cpu);
@@ -149,7 +172,12 @@ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 		work->arg = arg;
 		work->done = &done;
 	}
-	cpu_stop_init_done(&done, cpumask_weight(cpumask));
+
+	weight = cpumask_weight(cpumask);
+	if (!online && include_this_offline)
+		weight++;
+
+	cpu_stop_init_done(&done, weight);
 
 	/*
 	 * Disable preemption while queueing to avoid getting
@@ -162,7 +190,20 @@ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 				    &per_cpu(stop_cpus_work, cpu));
 	preempt_enable();
 
-	wait_for_completion(&done.completion);
+	if (online)
+		wait_for_completion(&done.completion);
+	else {
+		/*
+		 * This cpu is not yet online. If @fn needs to be run on this
+		 * cpu, run it now. Also, we can't afford to sleep here,
+		 * so poll till the work is completed on all the cpu's.
+		 */
+		if (include_this_offline)
+			fn(arg);
+		while (atomic_read(&done.nr_todo) > 1)
+			cpu_relax();
+	}
+
 	return done.executed ? done.ret : -ENOENT;
 }
 
@@ -431,6 +472,7 @@ static int stop_machine_cpu_stop(void *data)
 	struct stop_machine_data *smdata = data;
 	enum stopmachine_state curstate = STOPMACHINE_NONE;
 	int cpu = smp_processor_id(), err = 0;
+	unsigned long flags = 0;
 	bool is_active;
 
 	if (!smdata->active_cpus)
@@ -446,7 +488,7 @@ static int stop_machine_cpu_stop(void *data)
 			curstate = smdata->state;
 			switch (curstate) {
 			case STOPMACHINE_DISABLE_IRQ:
-				local_irq_disable();
+				local_irq_save(flags);
 				hard_irq_disable();
 				break;
 			case STOPMACHINE_RUN:
@@ -460,7 +502,7 @@ static int stop_machine_cpu_stop(void *data)
 		}
 	} while (curstate != STOPMACHINE_EXIT);
 
-	local_irq_enable();
+	local_irq_restore(flags);
 	return err;
 }
 
@@ -470,9 +512,19 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 					    .num_threads = num_online_cpus(),
 					    .active_cpus = cpus };
 
+	/* Include the calling cpu that might not be online yet. */
+	if (!cpu_online(smp_processor_id()))
+		smdata.num_threads++;
+
 	/* Set the initial state and stop all online cpus. */
 	set_state(&smdata, STOPMACHINE_PREPARE);
-	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
+
+	if (cpu_online(smp_processor_id()))
+		return stop_cpus(cpu_online_mask, stop_machine_cpu_stop,
+				 &smdata);
+	else
+		return __stop_cpus(NULL, stop_machine_cpu_stop,
+				   &smdata);
 }
 
 int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [tip:x86/urgent] x86, mtrr: Use __stop_machine() for doing MTRR rendezvous
  2011-06-06 23:16 ` [patch 2/2] x86, mtrr: use __stop_machine() for doing MTRR rendezvous Suresh Siddha
@ 2011-06-07  0:19   ` tip-bot for Suresh Siddha
  0 siblings, 0 replies; 9+ messages in thread
From: tip-bot for Suresh Siddha @ 2011-06-07  0:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, suresh.b.siddha, tglx, hpa

Commit-ID:  cbb298aed2b1390c7eaa188e2918197fc99afce9
Gitweb:     http://git.kernel.org/tip/cbb298aed2b1390c7eaa188e2918197fc99afce9
Author:     Suresh Siddha <suresh.b.siddha@intel.com>
AuthorDate: Mon, 6 Jun 2011 16:16:57 -0700
Committer:  H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Mon, 6 Jun 2011 16:42:17 -0700

x86, mtrr: Use __stop_machine() for doing MTRR rendezvous

MTRR rendezvous sequence using stop_one_cpu_nowait() can potentially
happen in parallel with another system wide rendezvous using
stop_machine(). This can lead to deadlock (The order in which
works are queued can be different on different cpu's. Some cpu's
will be running the first rendezvous handler and others will be running
the second rendezvous handler. Each set waiting for the other set to join
for the system wide rendezvous, leading to a deadlock).

MTRR rendezvous sequence is not implemented using stop_machine() before, as this
gets called both from the process context aswell as the cpu online paths
(where the cpu has not come online and the interrupts are disabled etc).

Now that __stop_machine() works even when the calling cpu is not online,
use __stop_machine() to implement the MTRR rendezvous sequence. This
will consolidate the code aswell as avoid the above mentioned deadlock.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/20110606231752.109862236@sbsiddha-MOBL3.sc.intel.com
Cc: stable@kernel.org    # v2.6.35+
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/cpu/mtrr/main.c |  154 +++++++--------------------------------
 1 files changed, 27 insertions(+), 127 deletions(-)

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 929739a..773d26d 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -137,18 +137,15 @@ static void __init init_table(void)
 }
 
 struct set_mtrr_data {
-	atomic_t	count;
-	atomic_t	gate;
 	unsigned long	smp_base;
 	unsigned long	smp_size;
 	unsigned int	smp_reg;
 	mtrr_type	smp_type;
 };
 
-static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work);
-
 /**
- * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs.
+ * mtrr_work_handler - Work done in the synchronisation handler. Executed by
+ * all the CPUs.
  * @info: pointer to mtrr configuration data
  *
  * Returns nothing.
@@ -157,35 +154,26 @@ static int mtrr_work_handler(void *info)
 {
 #ifdef CONFIG_SMP
 	struct set_mtrr_data *data = info;
-	unsigned long flags;
-
-	atomic_dec(&data->count);
-	while (!atomic_read(&data->gate))
-		cpu_relax();
-
-	local_irq_save(flags);
-
-	atomic_dec(&data->count);
-	while (atomic_read(&data->gate))
-		cpu_relax();
 
-	/*  The master has cleared me to execute  */
+	/*
+	 * We use this same function to initialize the mtrrs during boot,
+	 * resume, runtime cpu online and on an explicit request to set a
+	 * specific MTRR.
+	 *
+	 * During boot or suspend, the state of the boot cpu's mtrrs has been
+	 * saved, and we want to replicate that across all the cpus that come
+	 * online (either at the end of boot or resume or during a runtime cpu
+	 * online). If we're doing that, @reg is set to something special and on
+	 * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
+	 * started the boot/resume sequence, this might be a duplicate
+	 * set_all()).
+	 */
 	if (data->smp_reg != ~0U) {
 		mtrr_if->set(data->smp_reg, data->smp_base,
 			     data->smp_size, data->smp_type);
-	} else if (mtrr_aps_delayed_init) {
-		/*
-		 * Initialize the MTRRs inaddition to the synchronisation.
-		 */
+	} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
 		mtrr_if->set_all();
 	}
-
-	atomic_dec(&data->count);
-	while (!atomic_read(&data->gate))
-		cpu_relax();
-
-	atomic_dec(&data->count);
-	local_irq_restore(flags);
 #endif
 	return 0;
 }
@@ -223,20 +211,11 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
  * 14. Wait for buddies to catch up
  * 15. Enable interrupts.
  *
- * What does that mean for us? Well, first we set data.count to the number
- * of CPUs. As each CPU announces that it started the rendezvous handler by
- * decrementing the count, We reset data.count and set the data.gate flag
- * allowing all the cpu's to proceed with the work. As each cpu disables
- * interrupts, it'll decrement data.count once. We wait until it hits 0 and
- * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they
- * are waiting for that flag to be cleared. Once it's cleared, each
- * CPU goes through the transition of updating MTRRs.
- * The CPU vendors may each do it differently,
- * so we call mtrr_if->set() callback and let them take care of it.
- * When they're done, they again decrement data->count and wait for data.gate
- * to be set.
- * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
- * Everyone then enables interrupts and we all continue on.
+ * What does that mean for us? Well, __stop_machine() will ensure that
+ * the rendezvous handler is started on each CPU. And in lockstep they
+ * do the state transition of disabling interrupts, updating MTRR's
+ * (the CPU vendors may each do it differently, so we call mtrr_if->set()
+ * callback and let them take care of it.) and enabling interrupts.
  *
  * Note that the mechanism is the same for UP systems, too; all the SMP stuff
  * becomes nops.
@@ -244,92 +223,13 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
 static void
 set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
 {
-	struct set_mtrr_data data;
-	unsigned long flags;
-	int cpu;
-
-	preempt_disable();
-
-	data.smp_reg = reg;
-	data.smp_base = base;
-	data.smp_size = size;
-	data.smp_type = type;
-	atomic_set(&data.count, num_booting_cpus() - 1);
-
-	/* Make sure data.count is visible before unleashing other CPUs */
-	smp_wmb();
-	atomic_set(&data.gate, 0);
-
-	/* Start the ball rolling on other CPUs */
-	for_each_online_cpu(cpu) {
-		struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu);
-
-		if (cpu == smp_processor_id())
-			continue;
-
-		stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work);
-	}
-
-
-	while (atomic_read(&data.count))
-		cpu_relax();
-
-	/* Ok, reset count and toggle gate */
-	atomic_set(&data.count, num_booting_cpus() - 1);
-	smp_wmb();
-	atomic_set(&data.gate, 1);
-
-	local_irq_save(flags);
-
-	while (atomic_read(&data.count))
-		cpu_relax();
-
-	/* Ok, reset count and toggle gate */
-	atomic_set(&data.count, num_booting_cpus() - 1);
-	smp_wmb();
-	atomic_set(&data.gate, 0);
-
-	/* Do our MTRR business */
-
-	/*
-	 * HACK!
-	 *
-	 * We use this same function to initialize the mtrrs during boot,
-	 * resume, runtime cpu online and on an explicit request to set a
-	 * specific MTRR.
-	 *
-	 * During boot or suspend, the state of the boot cpu's mtrrs has been
-	 * saved, and we want to replicate that across all the cpus that come
-	 * online (either at the end of boot or resume or during a runtime cpu
-	 * online). If we're doing that, @reg is set to something special and on
-	 * this cpu we still do mtrr_if->set_all(). During boot/resume, this
-	 * is unnecessary if at this point we are still on the cpu that started
-	 * the boot/resume sequence. But there is no guarantee that we are still
-	 * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be
-	 * sure that we are in sync with everyone else.
-	 */
-	if (reg != ~0U)
-		mtrr_if->set(reg, base, size, type);
-	else
-		mtrr_if->set_all();
-
-	/* Wait for the others */
-	while (atomic_read(&data.count))
-		cpu_relax();
-
-	atomic_set(&data.count, num_booting_cpus() - 1);
-	smp_wmb();
-	atomic_set(&data.gate, 1);
-
-	/*
-	 * Wait here for everyone to have seen the gate change
-	 * So we're the last ones to touch 'data'
-	 */
-	while (atomic_read(&data.count))
-		cpu_relax();
+	struct set_mtrr_data data = { .smp_reg = reg,
+				      .smp_base = base,
+				      .smp_size = size,
+				      .smp_type = type
+				    };
 
-	local_irq_restore(flags);
-	preempt_enable();
+	__stop_machine(mtrr_work_handler, &data, cpu_callout_mask);
 }
 
 /**

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path
  2011-06-06 23:16 ` [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
  2011-06-07  0:19   ` [tip:x86/urgent] x86, stop_machine: Enable " tip-bot for Suresh Siddha
@ 2011-06-07 18:02   ` Ingo Molnar
  2011-06-07 18:38     ` Suresh Siddha
  1 sibling, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2011-06-07 18:02 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: tglx, hpa, trenn, prarit, tj, linux-kernel, youquan.song, stable,
	Rusty Russell, Steven Rostedt


* Suresh Siddha <suresh.b.siddha@intel.com> wrote:

> Currently stop machine infrastructure can be called only from a cpu 
> that is online. But for !CONFIG_SMP, we do allow calling 
> __stop_machine() before the cpu is online.
> 
> x86 for example requires stop machine infrastructure in the cpu 
> online path and currently implements its own stop machine (using 
> stop_one_cpu_nowait()) for MTRR initialization in the cpu online 
> path.
> 
> Enhance the __stop_machine() so that it can be called before the 
> cpu is onlined. This will pave the way for code consolidation and 
> address potential deadlocks caused by multiple mechanisms of doing 
> system wide rendezvous.
> 
> This will also address the behavioral differences of 
> __stop_machine() between SMP and UP builds.
> 
> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
> Cc: stable@kernel.org    # v2.6.35+
> ---
>  kernel/stop_machine.c |   62 +++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 57 insertions(+), 5 deletions(-)

This patch is causing problems:

 [   19.835435] BUG: using smp_processor_id() in preemptible [00000000] code: perf/701
 [   19.838718] caller is __stop_machine+0x3c/0xbe
 [   19.842079] Pid: 701, comm: perf Not tainted 3.0.0-rc2-tip+ #132432
 [   19.845378] Call Trace:
 [   19.847838]  [<c1112431>] ? debug_smp_processor_id+0xbd/0xd0
 [   19.848712]  [<c1050be6>] ? __stop_machine+0x3c/0xbe
 [   19.852046]  [<c1005b74>] ? text_poke+0xec/0xec
 [   19.855379]  [<c1014447>] ? do_page_fault+0xb0/0x361
 [   19.858711]  [<c1005f00>] ? text_poke_smp+0x48/0x4f
 [   19.862044]  [<c1014447>] ? do_page_fault+0xb0/0x361
 [   19.865378]  [<c100519c>] ? arch_jump_label_transform+0x50/0x64
 [   19.868712]  [<c103d61a>] ? __jump_label_update+0x28/0x39
 [   19.872044]  [<c103d647>] ? jump_label_update+0x1c/0x49
 [   19.875377]  [<c103d892>] ? jump_label_inc+0x38/0x40
 [   19.878710]  [<c105cfaf>] ? perf_swevent_init+0x118/0x129
 [   19.882044]  [<c10625b6>] ? perf_init_event+0x47/0x7c
 [   19.885376]  [<c10627f7>] ? perf_event_alloc+0x20c/0x416
 [   19.888710]  [<c1062fb1>] ? sys_perf_event_open+0x313/0x634
 [   19.892042]  [<c101469a>] ? do_page_fault+0x303/0x361
 [   19.895379]  [<c1281e70>] ? sysenter_do_call+0x12/0x26


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path
  2011-06-07 18:02   ` [patch 1/2] stop_machine: enable " Ingo Molnar
@ 2011-06-07 18:38     ` Suresh Siddha
  2011-06-07 18:42       ` H. Peter Anvin
  0 siblings, 1 reply; 9+ messages in thread
From: Suresh Siddha @ 2011-06-07 18:38 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: tglx, hpa, trenn, prarit, tj, linux-kernel, Song, Youquan,
	stable, Rusty Russell, Steven Rostedt

On Tue, 2011-06-07 at 11:02 -0700, Ingo Molnar wrote:
> * Suresh Siddha <suresh.b.siddha@intel.com> wrote:
> 
> > Currently stop machine infrastructure can be called only from a cpu 
> > that is online. But for !CONFIG_SMP, we do allow calling 
> > __stop_machine() before the cpu is online.
> > 
> > x86 for example requires stop machine infrastructure in the cpu 
> > online path and currently implements its own stop machine (using 
> > stop_one_cpu_nowait()) for MTRR initialization in the cpu online 
> > path.
> > 
> > Enhance the __stop_machine() so that it can be called before the 
> > cpu is onlined. This will pave the way for code consolidation and 
> > address potential deadlocks caused by multiple mechanisms of doing 
> > system wide rendezvous.
> > 
> > This will also address the behavioral differences of 
> > __stop_machine() between SMP and UP builds.
> > 
> > Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
> > Cc: stable@kernel.org    # v2.6.35+
> > ---
> >  kernel/stop_machine.c |   62 +++++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 57 insertions(+), 5 deletions(-)
> 
> This patch is causing problems:
> 
>  [   19.835435] BUG: using smp_processor_id() in preemptible [00000000] code: perf/701
>  [   19.838718] caller is __stop_machine+0x3c/0xbe

This is kind of a false positive. We are just checking if the calling
cpu is online/offline (and there is no possible process migration
between an online and offline cpu).

So I can use either raw_smp_processor_id() or probably wrap it around
get/put_cpu()'s. I would prefer raw_smp_processor_id() instead of the
unnecessary get/put_cpu().

We already have preempt disable/enable down this code path and would
like to keep the preempt disable section to the minimum needed code,
instead of increasing it to address this false positive.

Thoughts?

thanks,
suresh

>  [   19.842079] Pid: 701, comm: perf Not tainted 3.0.0-rc2-tip+ #132432
>  [   19.845378] Call Trace:
>  [   19.847838]  [<c1112431>] ? debug_smp_processor_id+0xbd/0xd0
>  [   19.848712]  [<c1050be6>] ? __stop_machine+0x3c/0xbe
>  [   19.852046]  [<c1005b74>] ? text_poke+0xec/0xec
>  [   19.855379]  [<c1014447>] ? do_page_fault+0xb0/0x361
>  [   19.858711]  [<c1005f00>] ? text_poke_smp+0x48/0x4f
>  [   19.862044]  [<c1014447>] ? do_page_fault+0xb0/0x361
>  [   19.865378]  [<c100519c>] ? arch_jump_label_transform+0x50/0x64
>  [   19.868712]  [<c103d61a>] ? __jump_label_update+0x28/0x39
>  [   19.872044]  [<c103d647>] ? jump_label_update+0x1c/0x49
>  [   19.875377]  [<c103d892>] ? jump_label_inc+0x38/0x40
>  [   19.878710]  [<c105cfaf>] ? perf_swevent_init+0x118/0x129
>  [   19.882044]  [<c10625b6>] ? perf_init_event+0x47/0x7c
>  [   19.885376]  [<c10627f7>] ? perf_event_alloc+0x20c/0x416
>  [   19.888710]  [<c1062fb1>] ? sys_perf_event_open+0x313/0x634
>  [   19.892042]  [<c101469a>] ? do_page_fault+0x303/0x361
>  [   19.895379]  [<c1281e70>] ? sysenter_do_call+0x12/0x26
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path
  2011-06-07 18:38     ` Suresh Siddha
@ 2011-06-07 18:42       ` H. Peter Anvin
  2011-06-07 19:12         ` Suresh Siddha
  0 siblings, 1 reply; 9+ messages in thread
From: H. Peter Anvin @ 2011-06-07 18:42 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: Ingo Molnar, tglx, trenn, prarit, tj, linux-kernel, Song,
	Youquan, stable, Rusty Russell, Steven Rostedt

On 06/07/2011 11:38 AM, Suresh Siddha wrote:
> 
> This is kind of a false positive. We are just checking if the calling
> cpu is online/offline (and there is no possible process migration
> between an online and offline cpu).
> 

Is the state not available in a percpu variable of some sort?  If not,
shouldn't it be rather than indirecting via a CPU number?

	-hpa

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path
  2011-06-07 18:42       ` H. Peter Anvin
@ 2011-06-07 19:12         ` Suresh Siddha
  0 siblings, 0 replies; 9+ messages in thread
From: Suresh Siddha @ 2011-06-07 19:12 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, tglx, trenn, prarit, tj, linux-kernel, Song,
	Youquan, stable, Rusty Russell, Steven Rostedt

On Tue, 2011-06-07 at 11:42 -0700, H. Peter Anvin wrote:
> On 06/07/2011 11:38 AM, Suresh Siddha wrote:
> > 
> > This is kind of a false positive. We are just checking if the calling
> > cpu is online/offline (and there is no possible process migration
> > between an online and offline cpu).
> > 
> 
> Is the state not available in a percpu variable of some sort?  If not,
> shouldn't it be rather than indirecting via a CPU number?

This is what I first thought about but the current percpu cpu_state is
arch specific and not all architectures define it.

Thinking a bit more, I should be able to use the stop machine's percpu
thread state and use it here. Something like the appended. I am testing
and will send the modified patches shortly.

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c78b0c2..97a1770 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -153,7 +153,7 @@ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
  */
 int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 {
-	int online = cpu_online(smp_processor_id());
+	int online = percpu_read(cpu_stopper.enabled);
 	int include_this_offline = 0;
 	struct cpu_stop_work *work;
 	struct cpu_stop_done done;
@@ -513,13 +513,13 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 					    .active_cpus = cpus };
 
 	/* Include the calling cpu that might not be online yet. */
-	if (!cpu_online(smp_processor_id()))
+	if (!percpu_read(cpu_stopper.enabled))
 		smdata.num_threads++;
 
 	/* Set the initial state and stop all online cpus. */
 	set_state(&smdata, STOPMACHINE_PREPARE);
 
-	if (cpu_online(smp_processor_id()))
+	if (percpu_read(cpu_stopper.enabled))
 		return stop_cpus(cpu_online_mask, stop_machine_cpu_stop,
 				 &smdata);
 	else



^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2011-06-07 19:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-06 23:16 [patch 0/2] enhance stop machine infrastructure for MTRR rendezvous sequence Suresh Siddha
2011-06-06 23:16 ` [patch 1/2] stop_machine: enable __stop_machine() to be called from the cpu online path Suresh Siddha
2011-06-07  0:19   ` [tip:x86/urgent] x86, stop_machine: Enable " tip-bot for Suresh Siddha
2011-06-07 18:02   ` [patch 1/2] stop_machine: enable " Ingo Molnar
2011-06-07 18:38     ` Suresh Siddha
2011-06-07 18:42       ` H. Peter Anvin
2011-06-07 19:12         ` Suresh Siddha
2011-06-06 23:16 ` [patch 2/2] x86, mtrr: use __stop_machine() for doing MTRR rendezvous Suresh Siddha
2011-06-07  0:19   ` [tip:x86/urgent] x86, mtrr: Use " tip-bot for Suresh Siddha

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).