linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 3.6-rc6] cpufreq/powernow-k8: workqueue user shouldn't migrate the kworker to another CPU
@ 2012-09-17 20:17 Tejun Heo
  2012-09-17 20:36 ` Borislav Petkov
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Tejun Heo @ 2012-09-17 20:17 UTC (permalink / raw)
  To: Linus Torvalds, Rafael J. Wysocki
  Cc: linux-kernel, cpufreq, linux-pm, Duncan, Andreas Herrmann

powernowk8_target() runs off a per-cpu work item and if the
cpufreq_policy->cpu is different from the current one, it migrates the
kworker to the target CPU by manipulating current->cpus_allowed.  The
function migrates the kworker back to the original CPU but this is
still broken.  Workqueue concurrency management requires the kworkers
to stay on the same CPU and powernowk8_target() ends up triggerring
BUG_ON(rq != this_rq()) in try_to_wake_up_local() if it contends on
fidvid_mutex and sleeps.

It is unclear why this bug is being reported now.  Duncan says it
appeared to be a regression of 3.6-rc1 and couldn't reproduce it on
3.5.  Bisection seemed to point to 63d95a91 "workqueue: use @pool
instead of @gcwq or @cpu where applicable" which is an non-functional
change.  Given that the reproduce case sometimes took upto days to
trigger, it's easy to be misled while bisecting.  Maybe something made
contention on fidvid_mutex more likely?  I don't know.

This patch fixes the bug by punting to another per-cpu work item on
the target CPU if it isn't the same as the current one.  The code
assumes that cpufreq_policy->cpu is kept online by the caller, which
Rafael tells me is the case.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Duncan <1i5t5.duncan@cox.net>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: stable@kernel.org
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=47301
---

While it's very late in the merge cycle, the fix is limited in scope
and fairly safe, so it wouldn't be too crazy to merge but then again
this can go through the next -rc1 and then -stable.  Linus, Rafael,
what do you guys think?

Thanks.

 drivers/cpufreq/powernow-k8.c |   89 +++++++++++++++++++++++++-----------------
 1 file changed, 53 insertions(+), 36 deletions(-)

diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index c0e8164..53db9de 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -35,7 +35,6 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/cpumask.h>
-#include <linux/sched.h>	/* for current / set_cpus_allowed() */
 #include <linux/io.h>
 #include <linux/delay.h>
 
@@ -1139,46 +1138,43 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
 	return res;
 }
 
-/* Driver entry point to switch to the target frequency */
-static int powernowk8_target(struct cpufreq_policy *pol,
-		unsigned targfreq, unsigned relation)
+struct powernowk8_target_work {
+	struct work_struct		work;
+	struct cpufreq_policy		*pol;
+	unsigned			targfreq;
+	unsigned			relation;
+	int				ret;
+};
+
+static void powernowk8_target_on_cpu(struct work_struct *work)
 {
-	cpumask_var_t oldmask;
+	struct powernowk8_target_work *tw =
+		container_of(work, struct powernowk8_target_work, work);
+	struct cpufreq_policy *pol = tw->pol;
 	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
 	u32 checkfid;
 	u32 checkvid;
 	unsigned int newstate;
-	int ret = -EIO;
 
+	tw->ret = -EINVAL;
 	if (!data)
-		return -EINVAL;
+		return;
+
+	tw->ret = -EIO;
 
 	checkfid = data->currfid;
 	checkvid = data->currvid;
 
-	/* only run on specific CPU from here on. */
-	/* This is poor form: use a workqueue or smp_call_function_single */
-	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
-		return -ENOMEM;
-
-	cpumask_copy(oldmask, tsk_cpus_allowed(current));
-	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
-
-	if (smp_processor_id() != pol->cpu) {
-		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
-		goto err_out;
-	}
-
 	if (pending_bit_stuck()) {
 		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
-		goto err_out;
+		return;
 	}
 
 	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
-		pol->cpu, targfreq, pol->min, pol->max, relation);
+		pol->cpu, tw->targfreq, pol->min, pol->max, tw->relation);
 
 	if (query_current_values_with_pending_wait(data))
-		goto err_out;
+		return;
 
 	if (cpu_family != CPU_HW_PSTATE) {
 		pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
@@ -1195,23 +1191,23 @@ static int powernowk8_target(struct cpufreq_policy *pol,
 	}
 
 	if (cpufreq_frequency_table_target(pol, data->powernow_table,
-				targfreq, relation, &newstate))
-		goto err_out;
+				tw->targfreq, tw->relation, &newstate))
+		return;
 
 	mutex_lock(&fidvid_mutex);
 
 	powernow_k8_acpi_pst_values(data, newstate);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		ret = transition_frequency_pstate(data,
-			data->powernow_table[newstate].index);
+		tw->ret = transition_frequency_pstate(data,
+				data->powernow_table[newstate].index);
 	else
-		ret = transition_frequency_fidvid(data, newstate);
-	if (ret) {
+		tw->ret = transition_frequency_fidvid(data, newstate);
+	if (tw->ret) {
 		printk(KERN_ERR PFX "transition frequency failed\n");
-		ret = 1;
+		tw->ret = 1;
 		mutex_unlock(&fidvid_mutex);
-		goto err_out;
+		return;
 	}
 	mutex_unlock(&fidvid_mutex);
 
@@ -1220,12 +1216,33 @@ static int powernowk8_target(struct cpufreq_policy *pol,
 				data->powernow_table[newstate].index);
 	else
 		pol->cur = find_khz_freq_from_fid(data->currfid);
-	ret = 0;
 
-err_out:
-	set_cpus_allowed_ptr(current, oldmask);
-	free_cpumask_var(oldmask);
-	return ret;
+	tw->ret = 0;
+}
+
+/* Driver entry point to switch to the target frequency */
+static int powernowk8_target(struct cpufreq_policy *pol,
+		unsigned targfreq, unsigned relation)
+{
+	struct powernowk8_target_work tw;
+
+	/*
+	 * Must run on @pol->cpu.  Bounce to workqueue if necessary.
+	 * cpufreq core is responsible for ensuring the cpu stays online.
+	 */
+	INIT_WORK_ONSTACK(&tw.work, powernowk8_target_on_cpu);
+	tw.pol = pol;
+	tw.targfreq = targfreq;
+	tw.relation = relation;
+
+	if (smp_processor_id() == pol->cpu) {
+		powernowk8_target_on_cpu(&tw.work);
+	} else {
+		schedule_work_on(pol->cpu, &tw.work);
+		flush_work(&tw.work);
+	}
+
+	return tw.ret;
 }
 
 /* Driver entry point to verify the policy and range of frequencies */

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2012-09-23  1:56 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-09-17 20:17 [PATCH 3.6-rc6] cpufreq/powernow-k8: workqueue user shouldn't migrate the kworker to another CPU Tejun Heo
2012-09-17 20:36 ` Borislav Petkov
2012-09-17 20:53   ` Tejun Heo
2012-09-17 21:22     ` Borislav Petkov
2012-09-17 20:38 ` Rafael J. Wysocki
2012-09-23  1:53   ` Thomas Renninger
2012-09-18 20:12 ` Tejun Heo
2012-09-18 20:27   ` Linus Torvalds
2012-09-18 20:49     ` [PATCH 3.6-rc6 1/2] workqueue: reimplement work_on_cpu() using system_wq Tejun Heo
2012-09-18 20:51       ` [PATCH 3.6-rc6 2/2] cpufreq/powernow-k8: workqueue user shouldn't migrate the kworker to another CPU Tejun Heo
2012-09-18 20:30   ` [PATCH 3.6-rc6] " Rafael J. Wysocki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).