linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
@ 2014-07-10  2:37 Saravana Kannan
  2014-07-11  4:18 ` [PATCH v2] " Saravana Kannan
  2014-07-16 22:02 ` [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Rafael J. Wysocki
  0 siblings, 2 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-10  2:37 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel, Saravana Kannan

Preliminary patch. Not tested. Just sending out to give an idea of what I'm
looking to do. Expect a lot more simplification when it's done.

Benefits:
* A lot more simpler code.
* Less stability issues.
* Suspend/resume time would improve.
* Hotplug time would improve.
* Sysfs file permissions would be maintained.
* More policy settings would be maintained across suspend/resume.
* cpufreq stats would be maintained across hotplug for all CPUs.

Change-Id: I39c395e1fee8731880c0fd7c8a9c1d83e2e4b8d0
Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---
 drivers/cpufreq/cpufreq.c | 293 +++++++++-------------------------------------
 1 file changed, 55 insertions(+), 238 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 62259d2..8ca1b6f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -859,13 +859,13 @@ void cpufreq_sysfs_remove_file(const struct attribute *attr)
 }
 EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
 
-/* symlink affected CPUs */
+/* symlink related CPUs */
 static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 {
 	unsigned int j;
 	int ret = 0;
 
-	for_each_cpu(j, policy->cpus) {
+	for_each_cpu(j, policy->related_cpus) {
 		struct device *cpu_dev;
 
 		if (j == policy->cpu)
@@ -881,12 +881,16 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 	return ret;
 }
 
-static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
-				     struct device *dev)
+static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
 {
 	struct freq_attr **drv_attr;
+	struct device *dev;
 	int ret = 0;
 
+	dev = get_cpu_device(policy->cpu);
+	if (!dev)
+		return -EINVAL;
+
 	/* prepare interface data */
 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
 				   &dev->kobj, "cpufreq");
@@ -961,12 +965,13 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
-				  unsigned int cpu, struct device *dev)
+static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
+				  unsigned int cpu, bool add)
 {
 	int ret = 0;
 	unsigned long flags;
 
+	/* FIXME: Don't send START/STOP when going from/to 0 cpus */
 	if (has_target()) {
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 		if (ret) {
@@ -979,7 +984,11 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 
-	cpumask_set_cpu(cpu, policy->cpus);
+	if (add)
+		cpumask_set_cpu(cpu, policy->cpus);
+	else
+		cpumask_clear_cpu(cpu, policy->cpus);
+
 	per_cpu(cpufreq_cpu_data, cpu) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
@@ -995,27 +1004,9 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 			return ret;
 		}
 	}
-
-	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
 }
 #endif
 
-static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-	unsigned long flags;
-
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-
-	policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
-
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-	policy->governor = NULL;
-
-	return policy;
-}
-
 static struct cpufreq_policy *cpufreq_policy_alloc(void)
 {
 	struct cpufreq_policy *policy;
@@ -1076,22 +1067,6 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 	kfree(policy);
 }
 
-static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
-{
-	if (WARN_ON(cpu == policy->cpu))
-		return;
-
-	down_write(&policy->rwsem);
-
-	policy->last_cpu = policy->cpu;
-	policy->cpu = cpu;
-
-	up_write(&policy->rwsem);
-
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-			CPUFREQ_UPDATE_POLICY_CPU, policy);
-}
-
 static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 {
 	unsigned int j, cpu = dev->id;
@@ -1111,55 +1086,28 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 #ifdef CONFIG_SMP
 	/* check whether a different CPU already registered this
 	 * CPU because it is in the same boat. */
+	/* FIXME: This probably needs fixing to avoid "try lock" from
+	 * returning NULL. Also, change to likely() */
 	policy = cpufreq_cpu_get(cpu);
 	if (unlikely(policy)) {
+		cpufreq_change_policy_cpus(policy, cpu, true);
 		cpufreq_cpu_put(policy);
 		return 0;
 	}
 #endif
 
+	/* FIXME: Is returning 0 the right thing to do?! Existing code */
 	if (!down_read_trylock(&cpufreq_rwsem))
 		return 0;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* Check if this cpu was hot-unplugged earlier and has siblings */
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
-		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
-			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
-			up_read(&cpufreq_rwsem);
-			return ret;
-		}
-	}
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-#endif
-
-	/*
-	 * Restore the saved policy when doing light-weight init and fall back
-	 * to the full init if that fails.
-	 */
-	policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
-	if (!policy) {
-		recover_policy = false;
-		policy = cpufreq_policy_alloc();
-		if (!policy)
-			goto nomem_out;
-	}
-
-	/*
-	 * In the resume path, since we restore a saved policy, the assignment
-	 * to policy->cpu is like an update of the existing policy, rather than
-	 * the creation of a brand new one. So we need to perform this update
-	 * by invoking update_policy_cpu().
-	 */
-	if (recover_policy && cpu != policy->cpu)
-		update_policy_cpu(policy, cpu);
-	else
-		policy->cpu = cpu;
+	/* If we get this far, this is the first time we are adding the
+	 * policy */
+	policy = cpufreq_policy_alloc();
+	if (!policy)
+		goto nomem_out;
+	policy->cpu = cpu;
 
 	cpumask_copy(policy->cpus, cpumask_of(cpu));
-
 	init_completion(&policy->kobj_unregister);
 	INIT_WORK(&policy->update, handle_update);
 
@@ -1175,20 +1123,23 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	/* related cpus should atleast have policy->cpus */
 	cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
 
+	/* Weed out impossible CPUs. */
+	cpumask_and(policy->related_cpus, policy->related_cpus,
+			cpu_possible_mask);
+
+	/* Just make the first CPU in the policy as the permanent owner of
+	 * the sysfs nodes. It doesn't need to be online to host the nodes */
+	policy->cpu = cpumask_first(policy->related_cpus);
+
 	/*
 	 * affected cpus must always be the one, which are online. We aren't
 	 * managing offline cpus here.
 	 */
 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
 
-	if (!recover_policy) {
-		policy->user_policy.min = policy->min;
-		policy->user_policy.max = policy->max;
-	}
-
 	down_write(&policy->rwsem);
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
-	for_each_cpu(j, policy->cpus)
+	for_each_cpu(j, policy->related_cpus)
 		per_cpu(cpufreq_cpu_data, j) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
@@ -1243,13 +1194,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				     CPUFREQ_START, policy);
 
-	if (!recover_policy) {
-		ret = cpufreq_add_dev_interface(policy, dev);
-		if (ret)
-			goto err_out_unregister;
-		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-				CPUFREQ_CREATE_POLICY, policy);
-	}
+	ret = cpufreq_add_dev_interface(policy);
+	if (ret)
+		goto err_out_unregister;
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_CREATE_POLICY, policy);
 
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 	list_add(&policy->policy_list, &cpufreq_policy_list);
@@ -1257,10 +1206,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	cpufreq_init_policy(policy);
 
-	if (!recover_policy) {
-		policy->user_policy.policy = policy->policy;
-		policy->user_policy.governor = policy->governor;
-	}
 	up_write(&policy->rwsem);
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
@@ -1307,161 +1252,43 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	return __cpufreq_add_dev(dev, sif);
 }
 
-static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
-					   unsigned int old_cpu)
-{
-	struct device *cpu_dev;
-	int ret;
-
-	/* first sibling now owns the new sysfs dir */
-	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
-
-	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
-	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
-	if (ret) {
-		pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
-
-		down_write(&policy->rwsem);
-		cpumask_set_cpu(old_cpu, policy->cpus);
-		up_write(&policy->rwsem);
-
-		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
-					"cpufreq");
-
-		return -EINVAL;
-	}
-
-	return cpu_dev->id;
-}
-
-static int __cpufreq_remove_dev_prepare(struct device *dev,
-					struct subsys_interface *sif)
+static int __cpufreq_remove_dev(struct device *dev,
+				struct subsys_interface *sif)
 {
 	unsigned int cpu = dev->id, cpus;
-	int new_cpu, ret;
+	int ret;
 	unsigned long flags;
 	struct cpufreq_policy *policy;
 
 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
-	write_lock_irqsave(&cpufreq_driver_lock, flags);
-
+	read_lock_irqsave(&cpufreq_driver_lock, flags);
 	policy = per_cpu(cpufreq_cpu_data, cpu);
-
-	/* Save the policy somewhere when doing a light-weight tear-down */
-	if (cpufreq_suspended)
-		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
-
-	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
 	if (!policy) {
 		pr_debug("%s: No cpu_data found\n", __func__);
 		return -EINVAL;
 	}
 
-	if (has_target()) {
-		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
-		if (ret) {
-			pr_err("%s: Failed to stop governor\n", __func__);
-			return ret;
-		}
-	}
-
-	if (!cpufreq_driver->setpolicy)
-		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
-			policy->governor->name, CPUFREQ_NAME_LEN);
+#ifdef CONFIG_HOTPLUG_CPU
+	ret = cpufreq_change_policy_cpus(policy, cpu, false);
+	/* FIXME: Handle error */
+#endif
 
+	/* FIXME: This stuff below would get pulled into change_policy_cpus.
+	 * Keeping it here just for the RFC diff to be easy to read. */
 	down_read(&policy->rwsem);
 	cpus = cpumask_weight(policy->cpus);
 	up_read(&policy->rwsem);
 
-	if (cpu != policy->cpu) {
-		sysfs_remove_link(&dev->kobj, "cpufreq");
-	} else if (cpus > 1) {
-		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
-		if (new_cpu >= 0) {
-			update_policy_cpu(policy, new_cpu);
-
-			if (!cpufreq_suspended)
-				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
-					 __func__, new_cpu, cpu);
-		}
-	} else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
+	if (cpus < 1 && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
 		cpufreq_driver->stop_cpu(policy);
 	}
 
 	return 0;
 }
 
-static int __cpufreq_remove_dev_finish(struct device *dev,
-				       struct subsys_interface *sif)
-{
-	unsigned int cpu = dev->id, cpus;
-	int ret;
-	unsigned long flags;
-	struct cpufreq_policy *policy;
-
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-	policy = per_cpu(cpufreq_cpu_data, cpu);
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-	if (!policy) {
-		pr_debug("%s: No cpu_data found\n", __func__);
-		return -EINVAL;
-	}
-
-	down_write(&policy->rwsem);
-	cpus = cpumask_weight(policy->cpus);
-
-	if (cpus > 1)
-		cpumask_clear_cpu(cpu, policy->cpus);
-	up_write(&policy->rwsem);
-
-	/* If cpu is last user of policy, free policy */
-	if (cpus == 1) {
-		if (has_target()) {
-			ret = __cpufreq_governor(policy,
-					CPUFREQ_GOV_POLICY_EXIT);
-			if (ret) {
-				pr_err("%s: Failed to exit governor\n",
-				       __func__);
-				return ret;
-			}
-		}
-
-		if (!cpufreq_suspended)
-			cpufreq_policy_put_kobj(policy);
-
-		/*
-		 * Perform the ->exit() even during light-weight tear-down,
-		 * since this is a core component, and is essential for the
-		 * subsequent light-weight ->init() to succeed.
-		 */
-		if (cpufreq_driver->exit)
-			cpufreq_driver->exit(policy);
-
-		/* Remove policy from list of active policies */
-		write_lock_irqsave(&cpufreq_driver_lock, flags);
-		list_del(&policy->policy_list);
-		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-		if (!cpufreq_suspended)
-			cpufreq_policy_free(policy);
-	} else if (has_target()) {
-		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
-		if (!ret)
-			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-
-		if (ret) {
-			pr_err("%s: Failed to start governor\n", __func__);
-			return ret;
-		}
-	}
-
-	per_cpu(cpufreq_cpu_data, cpu) = NULL;
-	return 0;
-}
-
 /**
  * cpufreq_remove_dev - remove a CPU device
  *
@@ -1475,10 +1302,7 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 	if (cpu_is_offline(cpu))
 		return 0;
 
-	ret = __cpufreq_remove_dev_prepare(dev, sif);
-
-	if (!ret)
-		ret = __cpufreq_remove_dev_finish(dev, sif);
+	ret = __cpufreq_remove_dev(dev, sif);
 
 	return ret;
 }
@@ -2295,19 +2119,12 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 	if (dev) {
 		switch (action & ~CPU_TASKS_FROZEN) {
 		case CPU_ONLINE:
+		case CPU_DOWN_FAILED:
 			__cpufreq_add_dev(dev, NULL);
 			break;
 
 		case CPU_DOWN_PREPARE:
-			__cpufreq_remove_dev_prepare(dev, NULL);
-			break;
-
-		case CPU_POST_DEAD:
-			__cpufreq_remove_dev_finish(dev, NULL);
-			break;
-
-		case CPU_DOWN_FAILED:
-			__cpufreq_add_dev(dev, NULL);
+			__cpufreq_remove_dev(dev, NULL);
 			break;
 		}
 	}
-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-10  2:37 [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
@ 2014-07-11  4:18 ` Saravana Kannan
  2014-07-11  6:19   ` Viresh Kumar
                     ` (2 more replies)
  2014-07-16 22:02 ` [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Rafael J. Wysocki
  1 sibling, 3 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-11  4:18 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

The CPUfreq driver moves the cpufreq policy ownership between CPUs when
CPUs within a cluster (CPUs sharing same policy) go ONLINE/OFFLINE. When
moving policy ownership between CPUs, it also moves the cpufreq sysfs
directory between CPUs and also fixes up the symlinks of the other CPUs in
the cluster.

Also, when all the CPUs in a cluster go OFFLINE, all the sysfs nodes and
directories are deleted, the kobject is released and the policy is freed.
And when the first CPU in a cluster comes up, the policy is reallocated and
initialized, kobject is acquired, the sysfs nodes are created or symlinked,
etc.

All these steps end up creating unnecessarily complicated code and locking.
There's no real benefit to adding/removing/moving the sysfs nodes and the
policy between CPUs. Other per CPU sysfs directories like power and cpuidle
are left alone during hotplug. So there's some precedence to what this
patch is trying to do.

This patch simplifies a lot of the code and locking by removing the
adding/removing/moving of policy/sysfs/kobj and just leaves the cpufreq
directory and policy in place irrespective of whether the CPUs are
ONLINE/OFFLINE.

Leaving the policy, sysfs and kobject in place also brings these additional
benefits:
* Faster suspend/resume.
* Faster hotplug.
* Sysfs file permissions maintained across hotplug without userspace
  workarounds.
* Policy settings and governor tunables maintained across suspend/resume
  and hotplug.
* Cpufreq stats would be maintained across hotplug for all CPUs and can be
  queried even after CPU goes OFFLINE.

Change-Id: I39c395e1fee8731880c0fd7c8a9c1d83e2e4b8d0
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---

Preliminary testing has been done. cpufreq directories are getting created
properly. Online/offline of CPUs work. Policies remain unmodifiable from
userspace when all policy CPUs are offline.

Error handling code has NOT been updated.

I've added a bunch of FIXME comments next to where I'm not sure about the
locking in the existing code. I believe most of the try_lock's were present
to prevent a deadlock between sysfs lock and the cpufreq locks. Now that
the sysfs entries are not touched after creating them, we should be able to
replace most/all of these try_lock's with a normal lock.

This patch has more room for code simplification, but I would like to get
some acks for the functionality and this code before I do further
simplification.

I should also be able to remove get_online_cpus() in the store function and
replace it with just a check for policy->governor_enabled. That should
theoretically reduce some contention between cpufreq stats check and
hotplug of unrelated CPUs.

Appreciate all the feedback.

Thanks,
Saravana

 drivers/cpufreq/cpufreq.c | 331 ++++++++++------------------------------------
 1 file changed, 69 insertions(+), 262 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 62259d2..e350b15 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -859,16 +859,16 @@ void cpufreq_sysfs_remove_file(const struct attribute *attr)
 }
 EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
 
-/* symlink affected CPUs */
+/* symlink related CPUs */
 static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 {
-	unsigned int j;
+	unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
 	int ret = 0;
 
-	for_each_cpu(j, policy->cpus) {
+	for_each_cpu(j, policy->related_cpus) {
 		struct device *cpu_dev;
 
-		if (j == policy->cpu)
+		if (j == first_cpu)
 			continue;
 
 		pr_debug("Adding link for CPU: %u\n", j);
@@ -881,12 +881,16 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 	return ret;
 }
 
-static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
-				     struct device *dev)
+static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
 {
 	struct freq_attr **drv_attr;
+	struct device *dev;
 	int ret = 0;
 
+	dev = get_cpu_device(cpumask_first(policy->related_cpus));
+	if (!dev)
+		return -EINVAL;
+
 	/* prepare interface data */
 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
 				   &dev->kobj, "cpufreq");
@@ -961,60 +965,53 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
-				  unsigned int cpu, struct device *dev)
+static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
+				  unsigned int cpu, bool add)
 {
 	int ret = 0;
-	unsigned long flags;
+	unsigned int cpus;
 
-	if (has_target()) {
+	down_write(&policy->rwsem);
+	cpus = cpumask_weight(policy->cpus);
+	if (has_target() && cpus > 0) {
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 		if (ret) {
 			pr_err("%s: Failed to stop governor\n", __func__);
-			return ret;
+			goto unlock;
 		}
 	}
 
-	down_write(&policy->rwsem);
-
-	write_lock_irqsave(&cpufreq_driver_lock, flags);
-
-	cpumask_set_cpu(cpu, policy->cpus);
-	per_cpu(cpufreq_cpu_data, cpu) = policy;
-	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	if (add)
+		cpumask_set_cpu(cpu, policy->cpus);
+	else
+		cpumask_clear_cpu(cpu, policy->cpus);
 
-	up_write(&policy->rwsem);
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+					CPUFREQ_UPDATE_POLICY_CPU, policy);
 
-	if (has_target()) {
+	cpus = cpumask_weight(policy->cpus);
+	policy->cpu = cpumask_first(policy->cpus);
+	if (has_target() && cpus > 0) {
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
 		if (!ret)
 			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 
 		if (ret) {
 			pr_err("%s: Failed to start governor\n", __func__);
-			return ret;
+			goto unlock;
 		}
 	}
 
-	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
-}
-#endif
-
-static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-	unsigned long flags;
-
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-
-	policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
-
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	if (cpus < 1 && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
+		cpufreq_driver->stop_cpu(policy);
+	}
 
-	policy->governor = NULL;
+unlock:
+	up_write(&policy->rwsem);
 
-	return policy;
+	return ret;
 }
+#endif
 
 static struct cpufreq_policy *cpufreq_policy_alloc(void)
 {
@@ -1076,22 +1073,6 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 	kfree(policy);
 }
 
-static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
-{
-	if (WARN_ON(cpu == policy->cpu))
-		return;
-
-	down_write(&policy->rwsem);
-
-	policy->last_cpu = policy->cpu;
-	policy->cpu = cpu;
-
-	up_write(&policy->rwsem);
-
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-			CPUFREQ_UPDATE_POLICY_CPU, policy);
-}
-
 static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 {
 	unsigned int j, cpu = dev->id;
@@ -1099,9 +1080,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	struct cpufreq_policy *policy;
 	unsigned long flags;
 	bool recover_policy = cpufreq_suspended;
-#ifdef CONFIG_HOTPLUG_CPU
-	struct cpufreq_policy *tpolicy;
-#endif
 
 	if (cpu_is_offline(cpu))
 		return 0;
@@ -1111,55 +1089,28 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 #ifdef CONFIG_SMP
 	/* check whether a different CPU already registered this
 	 * CPU because it is in the same boat. */
+	/* FIXME: This probably needs fixing to avoid "try lock" from
+	 * returning NULL. Also, change to likely() */
 	policy = cpufreq_cpu_get(cpu);
 	if (unlikely(policy)) {
+		cpufreq_change_policy_cpus(policy, cpu, true);
 		cpufreq_cpu_put(policy);
 		return 0;
 	}
 #endif
 
+	/* FIXME: Is returning 0 the right thing to do?! Existing code */
 	if (!down_read_trylock(&cpufreq_rwsem))
 		return 0;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* Check if this cpu was hot-unplugged earlier and has siblings */
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
-		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
-			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
-			up_read(&cpufreq_rwsem);
-			return ret;
-		}
-	}
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-#endif
-
-	/*
-	 * Restore the saved policy when doing light-weight init and fall back
-	 * to the full init if that fails.
-	 */
-	policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
-	if (!policy) {
-		recover_policy = false;
-		policy = cpufreq_policy_alloc();
-		if (!policy)
-			goto nomem_out;
-	}
-
-	/*
-	 * In the resume path, since we restore a saved policy, the assignment
-	 * to policy->cpu is like an update of the existing policy, rather than
-	 * the creation of a brand new one. So we need to perform this update
-	 * by invoking update_policy_cpu().
-	 */
-	if (recover_policy && cpu != policy->cpu)
-		update_policy_cpu(policy, cpu);
-	else
-		policy->cpu = cpu;
+	/* If we get this far, this is the first time we are adding the
+	 * policy */
+	policy = cpufreq_policy_alloc();
+	if (!policy)
+		goto nomem_out;
+	policy->cpu = cpu;
 
 	cpumask_copy(policy->cpus, cpumask_of(cpu));
-
 	init_completion(&policy->kobj_unregister);
 	INIT_WORK(&policy->update, handle_update);
 
@@ -1175,20 +1126,19 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	/* related cpus should atleast have policy->cpus */
 	cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
 
+	/* Weed out impossible CPUs. */
+	cpumask_and(policy->related_cpus, policy->related_cpus,
+			cpu_possible_mask);
+
 	/*
 	 * affected cpus must always be the one, which are online. We aren't
 	 * managing offline cpus here.
 	 */
 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
 
-	if (!recover_policy) {
-		policy->user_policy.min = policy->min;
-		policy->user_policy.max = policy->max;
-	}
-
 	down_write(&policy->rwsem);
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
-	for_each_cpu(j, policy->cpus)
+	for_each_cpu(j, policy->related_cpus)
 		per_cpu(cpufreq_cpu_data, j) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
@@ -1243,13 +1193,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				     CPUFREQ_START, policy);
 
-	if (!recover_policy) {
-		ret = cpufreq_add_dev_interface(policy, dev);
-		if (ret)
-			goto err_out_unregister;
-		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-				CPUFREQ_CREATE_POLICY, policy);
-	}
+	ret = cpufreq_add_dev_interface(policy);
+	if (ret)
+		goto err_out_unregister;
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_CREATE_POLICY, policy);
 
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 	list_add(&policy->policy_list, &cpufreq_policy_list);
@@ -1257,10 +1205,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	cpufreq_init_policy(policy);
 
-	if (!recover_policy) {
-		policy->user_policy.policy = policy->policy;
-		policy->user_policy.governor = policy->governor;
-	}
 	up_write(&policy->rwsem);
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
@@ -1307,100 +1251,16 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	return __cpufreq_add_dev(dev, sif);
 }
 
-static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
-					   unsigned int old_cpu)
-{
-	struct device *cpu_dev;
-	int ret;
-
-	/* first sibling now owns the new sysfs dir */
-	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
-
-	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
-	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
-	if (ret) {
-		pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
-
-		down_write(&policy->rwsem);
-		cpumask_set_cpu(old_cpu, policy->cpus);
-		up_write(&policy->rwsem);
-
-		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
-					"cpufreq");
-
-		return -EINVAL;
-	}
-
-	return cpu_dev->id;
-}
-
-static int __cpufreq_remove_dev_prepare(struct device *dev,
-					struct subsys_interface *sif)
+static int __cpufreq_remove_dev(struct device *dev,
+				struct subsys_interface *sif)
 {
-	unsigned int cpu = dev->id, cpus;
-	int new_cpu, ret;
+	unsigned int cpu = dev->id;
+	int ret = 0;
 	unsigned long flags;
 	struct cpufreq_policy *policy;
 
 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
-	write_lock_irqsave(&cpufreq_driver_lock, flags);
-
-	policy = per_cpu(cpufreq_cpu_data, cpu);
-
-	/* Save the policy somewhere when doing a light-weight tear-down */
-	if (cpufreq_suspended)
-		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
-
-	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-	if (!policy) {
-		pr_debug("%s: No cpu_data found\n", __func__);
-		return -EINVAL;
-	}
-
-	if (has_target()) {
-		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
-		if (ret) {
-			pr_err("%s: Failed to stop governor\n", __func__);
-			return ret;
-		}
-	}
-
-	if (!cpufreq_driver->setpolicy)
-		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
-			policy->governor->name, CPUFREQ_NAME_LEN);
-
-	down_read(&policy->rwsem);
-	cpus = cpumask_weight(policy->cpus);
-	up_read(&policy->rwsem);
-
-	if (cpu != policy->cpu) {
-		sysfs_remove_link(&dev->kobj, "cpufreq");
-	} else if (cpus > 1) {
-		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
-		if (new_cpu >= 0) {
-			update_policy_cpu(policy, new_cpu);
-
-			if (!cpufreq_suspended)
-				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
-					 __func__, new_cpu, cpu);
-		}
-	} else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
-		cpufreq_driver->stop_cpu(policy);
-	}
-
-	return 0;
-}
-
-static int __cpufreq_remove_dev_finish(struct device *dev,
-				       struct subsys_interface *sif)
-{
-	unsigned int cpu = dev->id, cpus;
-	int ret;
-	unsigned long flags;
-	struct cpufreq_policy *policy;
-
 	read_lock_irqsave(&cpufreq_driver_lock, flags);
 	policy = per_cpu(cpufreq_cpu_data, cpu);
 	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
@@ -1410,56 +1270,11 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 		return -EINVAL;
 	}
 
-	down_write(&policy->rwsem);
-	cpus = cpumask_weight(policy->cpus);
-
-	if (cpus > 1)
-		cpumask_clear_cpu(cpu, policy->cpus);
-	up_write(&policy->rwsem);
-
-	/* If cpu is last user of policy, free policy */
-	if (cpus == 1) {
-		if (has_target()) {
-			ret = __cpufreq_governor(policy,
-					CPUFREQ_GOV_POLICY_EXIT);
-			if (ret) {
-				pr_err("%s: Failed to exit governor\n",
-				       __func__);
-				return ret;
-			}
-		}
-
-		if (!cpufreq_suspended)
-			cpufreq_policy_put_kobj(policy);
-
-		/*
-		 * Perform the ->exit() even during light-weight tear-down,
-		 * since this is a core component, and is essential for the
-		 * subsequent light-weight ->init() to succeed.
-		 */
-		if (cpufreq_driver->exit)
-			cpufreq_driver->exit(policy);
-
-		/* Remove policy from list of active policies */
-		write_lock_irqsave(&cpufreq_driver_lock, flags);
-		list_del(&policy->policy_list);
-		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-		if (!cpufreq_suspended)
-			cpufreq_policy_free(policy);
-	} else if (has_target()) {
-		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
-		if (!ret)
-			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-
-		if (ret) {
-			pr_err("%s: Failed to start governor\n", __func__);
-			return ret;
-		}
-	}
+#ifdef CONFIG_HOTPLUG_CPU
+	ret = cpufreq_change_policy_cpus(policy, cpu, false);
+#endif
 
-	per_cpu(cpufreq_cpu_data, cpu) = NULL;
-	return 0;
+	return ret;
 }
 
 /**
@@ -1475,10 +1290,7 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 	if (cpu_is_offline(cpu))
 		return 0;
 
-	ret = __cpufreq_remove_dev_prepare(dev, sif);
-
-	if (!ret)
-		ret = __cpufreq_remove_dev_finish(dev, sif);
+	ret = __cpufreq_remove_dev(dev, sif);
 
 	return ret;
 }
@@ -2141,7 +1953,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 				struct cpufreq_policy *new_policy)
 {
 	struct cpufreq_governor *old_gov;
-	int ret;
+	int ret = 0;
 
 	pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
 		 new_policy->cpu, new_policy->min, new_policy->max);
@@ -2226,7 +2038,9 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 
  out:
 	pr_debug("governor: change or update limits\n");
-	return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+	if (policy->governor_enabled)
+		ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+	return ret;
 }
 
 /**
@@ -2295,19 +2109,12 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 	if (dev) {
 		switch (action & ~CPU_TASKS_FROZEN) {
 		case CPU_ONLINE:
+		case CPU_DOWN_FAILED:
 			__cpufreq_add_dev(dev, NULL);
 			break;
 
 		case CPU_DOWN_PREPARE:
-			__cpufreq_remove_dev_prepare(dev, NULL);
-			break;
-
-		case CPU_POST_DEAD:
-			__cpufreq_remove_dev_finish(dev, NULL);
-			break;
-
-		case CPU_DOWN_FAILED:
-			__cpufreq_add_dev(dev, NULL);
+			__cpufreq_remove_dev(dev, NULL);
 			break;
 		}
 	}
-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-11  4:18 ` [PATCH v2] " Saravana Kannan
@ 2014-07-11  6:19   ` Viresh Kumar
  2014-07-11  9:59     ` skannan
  2014-07-12  3:06     ` Saravana Kannan
  2014-07-11  7:43   ` Srivatsa S. Bhat
  2014-07-15 22:47   ` [PATCH v3 0/2] Simplify hotplug/suspend handling Saravana Kannan
  2 siblings, 2 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-07-11  6:19 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

Hi Saravana,

Thanks for trying this..

On 11 July 2014 09:48, Saravana Kannan <skannan@codeaurora.org> wrote:
> The CPUfreq driver moves the cpufreq policy ownership between CPUs when

s/driver/core

> CPUs within a cluster (CPUs sharing same policy) go ONLINE/OFFLINE. When
> moving policy ownership between CPUs, it also moves the cpufreq sysfs
> directory between CPUs and also fixes up the symlinks of the other CPUs in
> the cluster.
>
> Also, when all the CPUs in a cluster go OFFLINE, all the sysfs nodes and
> directories are deleted, the kobject is released and the policy is freed.
> And when the first CPU in a cluster comes up, the policy is reallocated and
> initialized, kobject is acquired, the sysfs nodes are created or symlinked,
> etc.
>
> All these steps end up creating unnecessarily complicated code and locking.
> There's no real benefit to adding/removing/moving the sysfs nodes and the
> policy between CPUs. Other per CPU sysfs directories like power and cpuidle
> are left alone during hotplug. So there's some precedence to what this
> patch is trying to do.
>
> This patch simplifies a lot of the code and locking by removing the
> adding/removing/moving of policy/sysfs/kobj and just leaves the cpufreq
> directory and policy in place irrespective of whether the CPUs are
> ONLINE/OFFLINE.
>
> Leaving the policy, sysfs and kobject in place also brings these additional
> benefits:
> * Faster suspend/resume.
> * Faster hotplug.
> * Sysfs file permissions maintained across hotplug without userspace
>   workarounds.
> * Policy settings and governor tunables maintained across suspend/resume
>   and hotplug.

Its already maintained during suspend/resume.

> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>   queried even after CPU goes OFFLINE.
>
> Change-Id: I39c395e1fee8731880c0fd7c8a9c1d83e2e4b8d0

remove these while sending stuff upstream..

> Tested-by: Stephen Boyd <sboyd@codeaurora.org>
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>
> Preliminary testing has been done. cpufreq directories are getting created
> properly. Online/offline of CPUs work. Policies remain unmodifiable from
> userspace when all policy CPUs are offline.
>
> Error handling code has NOT been updated.
>
> I've added a bunch of FIXME comments next to where I'm not sure about the
> locking in the existing code. I believe most of the try_lock's were present
> to prevent a deadlock between sysfs lock and the cpufreq locks. Now that
> the sysfs entries are not touched after creating them, we should be able to
> replace most/all of these try_lock's with a normal lock.
>
> This patch has more room for code simplification, but I would like to get
> some acks for the functionality and this code before I do further
> simplification.
>
> I should also be able to remove get_online_cpus() in the store function and
> replace it with just a check for policy->governor_enabled. That should
> theoretically reduce some contention between cpufreq stats check and
> hotplug of unrelated CPUs.

Its just too much stuff in a single patch, I can still review it as I
am very much
aware of every bit of code written here. But would be very difficult for others
to review it. These are so many cases, configuration we have to think of
and adding bugs with such a large patch is so so so easy.

>  drivers/cpufreq/cpufreq.c | 331 ++++++++++------------------------------------
>  1 file changed, 69 insertions(+), 262 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 62259d2..e350b15 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -859,16 +859,16 @@ void cpufreq_sysfs_remove_file(const struct attribute *attr)
>  }
>  EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
>
> -/* symlink affected CPUs */
> +/* symlink related CPUs */
>  static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>  {
> -       unsigned int j;
> +       unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
>         int ret = 0;
>
> -       for_each_cpu(j, policy->cpus) {
> +       for_each_cpu(j, policy->related_cpus) {
>                 struct device *cpu_dev;
>
> -               if (j == policy->cpu)
> +               if (j == first_cpu)

why?

>                         continue;
>
>                 pr_debug("Adding link for CPU: %u\n", j);
> @@ -881,12 +881,16 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>         return ret;
>  }
>
> -static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
> -                                    struct device *dev)
> +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
>  {
>         struct freq_attr **drv_attr;
> +       struct device *dev;
>         int ret = 0;
>
> +       dev = get_cpu_device(cpumask_first(policy->related_cpus));
> +       if (!dev)
> +               return -EINVAL;
> +

Why?

>         /* prepare interface data */
>         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
>                                    &dev->kobj, "cpufreq");
> @@ -961,60 +965,53 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
>  }
>
>  #ifdef CONFIG_HOTPLUG_CPU
> -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
> -                                 unsigned int cpu, struct device *dev)
> +static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
> +                                 unsigned int cpu, bool add)
>  {
>         int ret = 0;
> -       unsigned long flags;
> +       unsigned int cpus;
>
> -       if (has_target()) {
> +       down_write(&policy->rwsem);
> +       cpus = cpumask_weight(policy->cpus);
> +       if (has_target() && cpus > 0) {
>                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>                 if (ret) {
>                         pr_err("%s: Failed to stop governor\n", __func__);
> -                       return ret;
> +                       goto unlock;
>                 }
>         }
>
> -       down_write(&policy->rwsem);
> -
> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -       cpumask_set_cpu(cpu, policy->cpus);
> -       per_cpu(cpufreq_cpu_data, cpu) = policy;
> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +       if (add)
> +               cpumask_set_cpu(cpu, policy->cpus);
> +       else
> +               cpumask_clear_cpu(cpu, policy->cpus);
>
> -       up_write(&policy->rwsem);
> +       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> +                                       CPUFREQ_UPDATE_POLICY_CPU, policy);

This should be only called when policy->cpu is updated. And shouldn't
be called anymore and can be dropped as you might not wanna change
policy->cpu after this patch.

>
> -       if (has_target()) {
> +       cpus = cpumask_weight(policy->cpus);
> +       policy->cpu = cpumask_first(policy->cpus);

why update it at all? Also, as per your logic what if cpus == 0?

> +       if (has_target() && cpus > 0) {

Instead of > or < use cpus or !cpus.

>                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
>                 if (!ret)
>                         ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
>
>                 if (ret) {
>                         pr_err("%s: Failed to start governor\n", __func__);
> -                       return ret;
> +                       goto unlock;
>                 }
>         }
>
> -       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
> -}
> -#endif
> -
> -static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
> -{
> -       struct cpufreq_policy *policy;
> -       unsigned long flags;
> -
> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -       policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
> -
> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +       if (cpus < 1 && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {

Can be made 'else' part of above 'if', just need to move if(target)
inside the cpus > 1
block.

> +               cpufreq_driver->stop_cpu(policy);

Where is ->exit() gone?

> +       }
>
> -       policy->governor = NULL;
> +unlock:
> +       up_write(&policy->rwsem);
>
> -       return policy;
> +       return ret;
>  }
> +#endif
>
>  static struct cpufreq_policy *cpufreq_policy_alloc(void)
>  {
> @@ -1076,22 +1073,6 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
>         kfree(policy);
>  }
>
> -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
> -{
> -       if (WARN_ON(cpu == policy->cpu))
> -               return;
> -
> -       down_write(&policy->rwsem);
> -
> -       policy->last_cpu = policy->cpu;
> -       policy->cpu = cpu;
> -
> -       up_write(&policy->rwsem);
> -
> -       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> -                       CPUFREQ_UPDATE_POLICY_CPU, policy);
> -}
> -
>  static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  {
>         unsigned int j, cpu = dev->id;
> @@ -1099,9 +1080,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         struct cpufreq_policy *policy;
>         unsigned long flags;
>         bool recover_policy = cpufreq_suspended;
> -#ifdef CONFIG_HOTPLUG_CPU
> -       struct cpufreq_policy *tpolicy;
> -#endif
>
>         if (cpu_is_offline(cpu))
>                 return 0;
> @@ -1111,55 +1089,28 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  #ifdef CONFIG_SMP
>         /* check whether a different CPU already registered this
>          * CPU because it is in the same boat. */
> +       /* FIXME: This probably needs fixing to avoid "try lock" from
> +        * returning NULL. Also, change to likely() */

I wanted to give this comment later, but that's fine ..

- First, I couldn't understand the try-lock fixme
- Second likely() would be better
- policy will not be available only while adding first CPU of every cluster on
every driver registration..
- And you aren't freeing 'struct cpufreq_policy' at all now. Would result in
Memory leak cpufreq driver is compiled as a module and inserted/removed
multiple times.

>         policy = cpufreq_cpu_get(cpu);
>         if (unlikely(policy)) {
> +               cpufreq_change_policy_cpus(policy, cpu, true);
>                 cpufreq_cpu_put(policy);
>                 return 0;
>         }

This optimization wasn't for the hotplug case, but for adding non-policy->cpu
cpus for the first time.

In your case policy->cpus would already be updated and so calling
cpufreq_change_policy_cpus() isn't required.

>  #endif
>
> +       /* FIXME: Is returning 0 the right thing to do?! Existing code */
>         if (!down_read_trylock(&cpufreq_rwsem))
>                 return 0;

Yeah, must have a better return value. But as I said, these kind of changes
must be added in separate patches.

>
> -#ifdef CONFIG_HOTPLUG_CPU
> -       /* Check if this cpu was hot-unplugged earlier and has siblings */
> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
> -       list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
> -               if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
> -                       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -                       ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
> -                       up_read(&cpufreq_rwsem);
> -                       return ret;
> -               }
> -       }
> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -#endif

And this one was for the hotplug case which you could have reused.

> -       /*
> -        * Restore the saved policy when doing light-weight init and fall back
> -        * to the full init if that fails.
> -        */
> -       policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
> -       if (!policy) {
> -               recover_policy = false;
> -               policy = cpufreq_policy_alloc();
> -               if (!policy)
> -                       goto nomem_out;
> -       }

You might need to use it somehow.. Currently you are never doing
this:  per_cpu(cpufreq_cpu_data, cpu) = NULL;

which would result in crazy things once you try {un}registering your
driver...

> -       /*
> -        * In the resume path, since we restore a saved policy, the assignment
> -        * to policy->cpu is like an update of the existing policy, rather than
> -        * the creation of a brand new one. So we need to perform this update
> -        * by invoking update_policy_cpu().
> -        */
> -       if (recover_policy && cpu != policy->cpu)
> -               update_policy_cpu(policy, cpu);
> -       else
> -               policy->cpu = cpu;
> +       /* If we get this far, this is the first time we are adding the
> +        * policy */
> +       policy = cpufreq_policy_alloc();
> +       if (!policy)
> +               goto nomem_out;
> +       policy->cpu = cpu;
>
>         cpumask_copy(policy->cpus, cpumask_of(cpu));
> -
>         init_completion(&policy->kobj_unregister);
>         INIT_WORK(&policy->update, handle_update);
>
> @@ -1175,20 +1126,19 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         /* related cpus should atleast have policy->cpus */
>         cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
>
> +       /* Weed out impossible CPUs. */
> +       cpumask_and(policy->related_cpus, policy->related_cpus,
> +                       cpu_possible_mask);

why?


Sorry but I am stopping now. I have already pointed out some issues
which make this unusable.

Over that, its way too hard to review all this in a single patch. For every
piece of line you add/remove I have to spend 10 mins thinking about
all the possible cases that were solved with this.. And if the rest of the
patch is going to fix them or not, etc.. To make it simple I did apply
your patch and had a close look at the new state of code, but its getting
tougher and tougher.

Please make sure you take care of these issues:
- suspend/resume
- hotplug
- module insert/remove
- Memory leaks
- multi cluster systems (with one and multiple CPU per cluster)
*by cluster I mean group of CPUs sharing clock line
- single cluster ones, one and multiple CPUs

Will see how V3 goes. Thanks.

--
viresh

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-11  4:18 ` [PATCH v2] " Saravana Kannan
  2014-07-11  6:19   ` Viresh Kumar
@ 2014-07-11  7:43   ` Srivatsa S. Bhat
  2014-07-11 10:02     ` skannan
  2014-07-15 22:47   ` [PATCH v3 0/2] Simplify hotplug/suspend handling Saravana Kannan
  2 siblings, 1 reply; 76+ messages in thread
From: Srivatsa S. Bhat @ 2014-07-11  7:43 UTC (permalink / raw)
  To: Saravana Kannan, Rafael J . Wysocki, Viresh Kumar, Todd Poynor
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel, Stephen Boyd

On 07/11/2014 09:48 AM, Saravana Kannan wrote:
> The CPUfreq driver moves the cpufreq policy ownership between CPUs when
> CPUs within a cluster (CPUs sharing same policy) go ONLINE/OFFLINE. When
> moving policy ownership between CPUs, it also moves the cpufreq sysfs
> directory between CPUs and also fixes up the symlinks of the other CPUs in
> the cluster.
> 
> Also, when all the CPUs in a cluster go OFFLINE, all the sysfs nodes and
> directories are deleted, the kobject is released and the policy is freed.
> And when the first CPU in a cluster comes up, the policy is reallocated and
> initialized, kobject is acquired, the sysfs nodes are created or symlinked,
> etc.
> 
> All these steps end up creating unnecessarily complicated code and locking.
> There's no real benefit to adding/removing/moving the sysfs nodes and the
> policy between CPUs. Other per CPU sysfs directories like power and cpuidle
> are left alone during hotplug. So there's some precedence to what this
> patch is trying to do.
> 
> This patch simplifies a lot of the code and locking by removing the
> adding/removing/moving of policy/sysfs/kobj and just leaves the cpufreq
> directory and policy in place irrespective of whether the CPUs are
> ONLINE/OFFLINE.
> 
> Leaving the policy, sysfs and kobject in place also brings these additional
> benefits:
> * Faster suspend/resume.
> * Faster hotplug.
> * Sysfs file permissions maintained across hotplug without userspace
>   workarounds.
> * Policy settings and governor tunables maintained across suspend/resume
>   and hotplug.
> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>   queried even after CPU goes OFFLINE.
> 
> Change-Id: I39c395e1fee8731880c0fd7c8a9c1d83e2e4b8d0
> Tested-by: Stephen Boyd <sboyd@codeaurora.org>
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
> 
> Preliminary testing has been done. cpufreq directories are getting created
> properly. Online/offline of CPUs work. Policies remain unmodifiable from
> userspace when all policy CPUs are offline.
> 
> Error handling code has NOT been updated.
> 
> I've added a bunch of FIXME comments next to where I'm not sure about the
> locking in the existing code. I believe most of the try_lock's were present
> to prevent a deadlock between sysfs lock and the cpufreq locks. Now that
> the sysfs entries are not touched after creating them, we should be able to
> replace most/all of these try_lock's with a normal lock.
> 
> This patch has more room for code simplification, but I would like to get
> some acks for the functionality and this code before I do further
> simplification.
> 

The idea behind this work is very welcome indeed! IMHO, there is nothing
conceptually wrong in maintaining the per-cpu sysfs files across CPU hotplug
(as long as we take care to return appropriate error codes if userspace
tries to set values using the control files of offline CPUs). So, it really
boils down to whether or not we get the implementation right; the idea itself
looks fine as of now. Hence, your efforts in making this patch(set) easier to
review will certainly help. Perhaps you can simplify the code later, but at
this point, splitting up this patch into multiple smaller, reviewable pieces
(accompanied by well-written changelogs that explain the intent) is the utmost
priority. Just like Viresh, even I had a hard time reviewing all of this in
one go.

Thank you for taking up this work!

Regards,
Srivatsa S. Bhat

> I should also be able to remove get_online_cpus() in the store function and
> replace it with just a check for policy->governor_enabled. That should
> theoretically reduce some contention between cpufreq stats check and
> hotplug of unrelated CPUs.
> 
> Appreciate all the feedback.
> 
> Thanks,
> Saravana
> 
>  drivers/cpufreq/cpufreq.c | 331 ++++++++++------------------------------------
>  1 file changed, 69 insertions(+), 262 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 62259d2..e350b15 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -859,16 +859,16 @@ void cpufreq_sysfs_remove_file(const struct attribute *attr)
>  }
>  EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
> 
> -/* symlink affected CPUs */
> +/* symlink related CPUs */
>  static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>  {
> -	unsigned int j;
> +	unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
>  	int ret = 0;
> 
> -	for_each_cpu(j, policy->cpus) {
> +	for_each_cpu(j, policy->related_cpus) {
>  		struct device *cpu_dev;
> 
> -		if (j == policy->cpu)
> +		if (j == first_cpu)
>  			continue;
> 
>  		pr_debug("Adding link for CPU: %u\n", j);
> @@ -881,12 +881,16 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>  	return ret;
>  }
> 
> -static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
> -				     struct device *dev)
> +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
>  {
>  	struct freq_attr **drv_attr;
> +	struct device *dev;
>  	int ret = 0;
> 
> +	dev = get_cpu_device(cpumask_first(policy->related_cpus));
> +	if (!dev)
> +		return -EINVAL;
> +
>  	/* prepare interface data */
>  	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
>  				   &dev->kobj, "cpufreq");
> @@ -961,60 +965,53 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
>  }
> 
>  #ifdef CONFIG_HOTPLUG_CPU
> -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
> -				  unsigned int cpu, struct device *dev)
> +static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
> +				  unsigned int cpu, bool add)
>  {
>  	int ret = 0;
> -	unsigned long flags;
> +	unsigned int cpus;
> 
> -	if (has_target()) {
> +	down_write(&policy->rwsem);
> +	cpus = cpumask_weight(policy->cpus);
> +	if (has_target() && cpus > 0) {
>  		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>  		if (ret) {
>  			pr_err("%s: Failed to stop governor\n", __func__);
> -			return ret;
> +			goto unlock;
>  		}
>  	}
> 
> -	down_write(&policy->rwsem);
> -
> -	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -	cpumask_set_cpu(cpu, policy->cpus);
> -	per_cpu(cpufreq_cpu_data, cpu) = policy;
> -	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +	if (add)
> +		cpumask_set_cpu(cpu, policy->cpus);
> +	else
> +		cpumask_clear_cpu(cpu, policy->cpus);
> 
> -	up_write(&policy->rwsem);
> +	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> +					CPUFREQ_UPDATE_POLICY_CPU, policy);
> 
> -	if (has_target()) {
> +	cpus = cpumask_weight(policy->cpus);
> +	policy->cpu = cpumask_first(policy->cpus);
> +	if (has_target() && cpus > 0) {
>  		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
>  		if (!ret)
>  			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> 
>  		if (ret) {
>  			pr_err("%s: Failed to start governor\n", __func__);
> -			return ret;
> +			goto unlock;
>  		}
>  	}
> 
> -	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
> -}
> -#endif
> -
> -static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
> -{
> -	struct cpufreq_policy *policy;
> -	unsigned long flags;
> -
> -	read_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -	policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
> -
> -	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +	if (cpus < 1 && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
> +		cpufreq_driver->stop_cpu(policy);
> +	}
> 
> -	policy->governor = NULL;
> +unlock:
> +	up_write(&policy->rwsem);
> 
> -	return policy;
> +	return ret;
>  }
> +#endif
> 
>  static struct cpufreq_policy *cpufreq_policy_alloc(void)
>  {
> @@ -1076,22 +1073,6 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
>  	kfree(policy);
>  }
> 
> -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
> -{
> -	if (WARN_ON(cpu == policy->cpu))
> -		return;
> -
> -	down_write(&policy->rwsem);
> -
> -	policy->last_cpu = policy->cpu;
> -	policy->cpu = cpu;
> -
> -	up_write(&policy->rwsem);
> -
> -	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> -			CPUFREQ_UPDATE_POLICY_CPU, policy);
> -}
> -
>  static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  {
>  	unsigned int j, cpu = dev->id;
> @@ -1099,9 +1080,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  	struct cpufreq_policy *policy;
>  	unsigned long flags;
>  	bool recover_policy = cpufreq_suspended;
> -#ifdef CONFIG_HOTPLUG_CPU
> -	struct cpufreq_policy *tpolicy;
> -#endif
> 
>  	if (cpu_is_offline(cpu))
>  		return 0;
> @@ -1111,55 +1089,28 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  #ifdef CONFIG_SMP
>  	/* check whether a different CPU already registered this
>  	 * CPU because it is in the same boat. */
> +	/* FIXME: This probably needs fixing to avoid "try lock" from
> +	 * returning NULL. Also, change to likely() */
>  	policy = cpufreq_cpu_get(cpu);
>  	if (unlikely(policy)) {
> +		cpufreq_change_policy_cpus(policy, cpu, true);
>  		cpufreq_cpu_put(policy);
>  		return 0;
>  	}
>  #endif
> 
> +	/* FIXME: Is returning 0 the right thing to do?! Existing code */
>  	if (!down_read_trylock(&cpufreq_rwsem))
>  		return 0;
> 
> -#ifdef CONFIG_HOTPLUG_CPU
> -	/* Check if this cpu was hot-unplugged earlier and has siblings */
> -	read_lock_irqsave(&cpufreq_driver_lock, flags);
> -	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
> -		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
> -			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
> -			up_read(&cpufreq_rwsem);
> -			return ret;
> -		}
> -	}
> -	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -#endif
> -
> -	/*
> -	 * Restore the saved policy when doing light-weight init and fall back
> -	 * to the full init if that fails.
> -	 */
> -	policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
> -	if (!policy) {
> -		recover_policy = false;
> -		policy = cpufreq_policy_alloc();
> -		if (!policy)
> -			goto nomem_out;
> -	}
> -
> -	/*
> -	 * In the resume path, since we restore a saved policy, the assignment
> -	 * to policy->cpu is like an update of the existing policy, rather than
> -	 * the creation of a brand new one. So we need to perform this update
> -	 * by invoking update_policy_cpu().
> -	 */
> -	if (recover_policy && cpu != policy->cpu)
> -		update_policy_cpu(policy, cpu);
> -	else
> -		policy->cpu = cpu;
> +	/* If we get this far, this is the first time we are adding the
> +	 * policy */
> +	policy = cpufreq_policy_alloc();
> +	if (!policy)
> +		goto nomem_out;
> +	policy->cpu = cpu;
> 
>  	cpumask_copy(policy->cpus, cpumask_of(cpu));
> -
>  	init_completion(&policy->kobj_unregister);
>  	INIT_WORK(&policy->update, handle_update);
> 
> @@ -1175,20 +1126,19 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  	/* related cpus should atleast have policy->cpus */
>  	cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
> 
> +	/* Weed out impossible CPUs. */
> +	cpumask_and(policy->related_cpus, policy->related_cpus,
> +			cpu_possible_mask);
> +
>  	/*
>  	 * affected cpus must always be the one, which are online. We aren't
>  	 * managing offline cpus here.
>  	 */
>  	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
> 
> -	if (!recover_policy) {
> -		policy->user_policy.min = policy->min;
> -		policy->user_policy.max = policy->max;
> -	}
> -
>  	down_write(&policy->rwsem);
>  	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -	for_each_cpu(j, policy->cpus)
> +	for_each_cpu(j, policy->related_cpus)
>  		per_cpu(cpufreq_cpu_data, j) = policy;
>  	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> 
> @@ -1243,13 +1193,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>  				     CPUFREQ_START, policy);
> 
> -	if (!recover_policy) {
> -		ret = cpufreq_add_dev_interface(policy, dev);
> -		if (ret)
> -			goto err_out_unregister;
> -		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> -				CPUFREQ_CREATE_POLICY, policy);
> -	}
> +	ret = cpufreq_add_dev_interface(policy);
> +	if (ret)
> +		goto err_out_unregister;
> +	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> +			CPUFREQ_CREATE_POLICY, policy);
> 
>  	write_lock_irqsave(&cpufreq_driver_lock, flags);
>  	list_add(&policy->policy_list, &cpufreq_policy_list);
> @@ -1257,10 +1205,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
> 
>  	cpufreq_init_policy(policy);
> 
> -	if (!recover_policy) {
> -		policy->user_policy.policy = policy->policy;
> -		policy->user_policy.governor = policy->governor;
> -	}
>  	up_write(&policy->rwsem);
> 
>  	kobject_uevent(&policy->kobj, KOBJ_ADD);
> @@ -1307,100 +1251,16 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  	return __cpufreq_add_dev(dev, sif);
>  }
> 
> -static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
> -					   unsigned int old_cpu)
> -{
> -	struct device *cpu_dev;
> -	int ret;
> -
> -	/* first sibling now owns the new sysfs dir */
> -	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
> -
> -	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
> -	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
> -	if (ret) {
> -		pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
> -
> -		down_write(&policy->rwsem);
> -		cpumask_set_cpu(old_cpu, policy->cpus);
> -		up_write(&policy->rwsem);
> -
> -		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
> -					"cpufreq");
> -
> -		return -EINVAL;
> -	}
> -
> -	return cpu_dev->id;
> -}
> -
> -static int __cpufreq_remove_dev_prepare(struct device *dev,
> -					struct subsys_interface *sif)
> +static int __cpufreq_remove_dev(struct device *dev,
> +				struct subsys_interface *sif)
>  {
> -	unsigned int cpu = dev->id, cpus;
> -	int new_cpu, ret;
> +	unsigned int cpu = dev->id;
> +	int ret = 0;
>  	unsigned long flags;
>  	struct cpufreq_policy *policy;
> 
>  	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
> 
> -	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -	policy = per_cpu(cpufreq_cpu_data, cpu);
> -
> -	/* Save the policy somewhere when doing a light-weight tear-down */
> -	if (cpufreq_suspended)
> -		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
> -
> -	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -
> -	if (!policy) {
> -		pr_debug("%s: No cpu_data found\n", __func__);
> -		return -EINVAL;
> -	}
> -
> -	if (has_target()) {
> -		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
> -		if (ret) {
> -			pr_err("%s: Failed to stop governor\n", __func__);
> -			return ret;
> -		}
> -	}
> -
> -	if (!cpufreq_driver->setpolicy)
> -		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
> -			policy->governor->name, CPUFREQ_NAME_LEN);
> -
> -	down_read(&policy->rwsem);
> -	cpus = cpumask_weight(policy->cpus);
> -	up_read(&policy->rwsem);
> -
> -	if (cpu != policy->cpu) {
> -		sysfs_remove_link(&dev->kobj, "cpufreq");
> -	} else if (cpus > 1) {
> -		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
> -		if (new_cpu >= 0) {
> -			update_policy_cpu(policy, new_cpu);
> -
> -			if (!cpufreq_suspended)
> -				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
> -					 __func__, new_cpu, cpu);
> -		}
> -	} else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
> -		cpufreq_driver->stop_cpu(policy);
> -	}
> -
> -	return 0;
> -}
> -
> -static int __cpufreq_remove_dev_finish(struct device *dev,
> -				       struct subsys_interface *sif)
> -{
> -	unsigned int cpu = dev->id, cpus;
> -	int ret;
> -	unsigned long flags;
> -	struct cpufreq_policy *policy;
> -
>  	read_lock_irqsave(&cpufreq_driver_lock, flags);
>  	policy = per_cpu(cpufreq_cpu_data, cpu);
>  	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> @@ -1410,56 +1270,11 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>  		return -EINVAL;
>  	}
> 
> -	down_write(&policy->rwsem);
> -	cpus = cpumask_weight(policy->cpus);
> -
> -	if (cpus > 1)
> -		cpumask_clear_cpu(cpu, policy->cpus);
> -	up_write(&policy->rwsem);
> -
> -	/* If cpu is last user of policy, free policy */
> -	if (cpus == 1) {
> -		if (has_target()) {
> -			ret = __cpufreq_governor(policy,
> -					CPUFREQ_GOV_POLICY_EXIT);
> -			if (ret) {
> -				pr_err("%s: Failed to exit governor\n",
> -				       __func__);
> -				return ret;
> -			}
> -		}
> -
> -		if (!cpufreq_suspended)
> -			cpufreq_policy_put_kobj(policy);
> -
> -		/*
> -		 * Perform the ->exit() even during light-weight tear-down,
> -		 * since this is a core component, and is essential for the
> -		 * subsequent light-weight ->init() to succeed.
> -		 */
> -		if (cpufreq_driver->exit)
> -			cpufreq_driver->exit(policy);
> -
> -		/* Remove policy from list of active policies */
> -		write_lock_irqsave(&cpufreq_driver_lock, flags);
> -		list_del(&policy->policy_list);
> -		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -
> -		if (!cpufreq_suspended)
> -			cpufreq_policy_free(policy);
> -	} else if (has_target()) {
> -		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
> -		if (!ret)
> -			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> -
> -		if (ret) {
> -			pr_err("%s: Failed to start governor\n", __func__);
> -			return ret;
> -		}
> -	}
> +#ifdef CONFIG_HOTPLUG_CPU
> +	ret = cpufreq_change_policy_cpus(policy, cpu, false);
> +#endif
> 
> -	per_cpu(cpufreq_cpu_data, cpu) = NULL;
> -	return 0;
> +	return ret;
>  }
> 
>  /**
> @@ -1475,10 +1290,7 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>  	if (cpu_is_offline(cpu))
>  		return 0;
> 
> -	ret = __cpufreq_remove_dev_prepare(dev, sif);
> -
> -	if (!ret)
> -		ret = __cpufreq_remove_dev_finish(dev, sif);
> +	ret = __cpufreq_remove_dev(dev, sif);
> 
>  	return ret;
>  }
> @@ -2141,7 +1953,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
>  				struct cpufreq_policy *new_policy)
>  {
>  	struct cpufreq_governor *old_gov;
> -	int ret;
> +	int ret = 0;
> 
>  	pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
>  		 new_policy->cpu, new_policy->min, new_policy->max);
> @@ -2226,7 +2038,9 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
> 
>   out:
>  	pr_debug("governor: change or update limits\n");
> -	return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> +	if (policy->governor_enabled)
> +		ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> +	return ret;
>  }
> 
>  /**
> @@ -2295,19 +2109,12 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>  	if (dev) {
>  		switch (action & ~CPU_TASKS_FROZEN) {
>  		case CPU_ONLINE:
> +		case CPU_DOWN_FAILED:
>  			__cpufreq_add_dev(dev, NULL);
>  			break;
> 
>  		case CPU_DOWN_PREPARE:
> -			__cpufreq_remove_dev_prepare(dev, NULL);
> -			break;
> -
> -		case CPU_POST_DEAD:
> -			__cpufreq_remove_dev_finish(dev, NULL);
> -			break;
> -
> -		case CPU_DOWN_FAILED:
> -			__cpufreq_add_dev(dev, NULL);
> +			__cpufreq_remove_dev(dev, NULL);
>  			break;
>  		}
>  	}
> 


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-11  6:19   ` Viresh Kumar
@ 2014-07-11  9:59     ` skannan
  2014-07-11 10:07       ` skannan
  2014-07-11 10:52       ` Viresh Kumar
  2014-07-12  3:06     ` Saravana Kannan
  1 sibling, 2 replies; 76+ messages in thread
From: skannan @ 2014-07-11  9:59 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Saravana Kannan, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd


Viresh Kumar wrote:
> Hi Saravana,
>
> Thanks for trying this..
>
> On 11 July 2014 09:48, Saravana Kannan <skannan@codeaurora.org> wrote:
>> The CPUfreq driver moves the cpufreq policy ownership between CPUs when
>
> s/driver/core

Will do

>
>> CPUs within a cluster (CPUs sharing same policy) go ONLINE/OFFLINE. When
>> moving policy ownership between CPUs, it also moves the cpufreq sysfs
>> directory between CPUs and also fixes up the symlinks of the other CPUs
>> in
>> the cluster.
>>
>> Also, when all the CPUs in a cluster go OFFLINE, all the sysfs nodes and
>> directories are deleted, the kobject is released and the policy is
>> freed.
>> And when the first CPU in a cluster comes up, the policy is reallocated
>> and
>> initialized, kobject is acquired, the sysfs nodes are created or
>> symlinked,
>> etc.
>>
>> All these steps end up creating unnecessarily complicated code and
>> locking.
>> There's no real benefit to adding/removing/moving the sysfs nodes and
>> the
>> policy between CPUs. Other per CPU sysfs directories like power and
>> cpuidle
>> are left alone during hotplug. So there's some precedence to what this
>> patch is trying to do.
>>
>> This patch simplifies a lot of the code and locking by removing the
>> adding/removing/moving of policy/sysfs/kobj and just leaves the cpufreq
>> directory and policy in place irrespective of whether the CPUs are
>> ONLINE/OFFLINE.
>>
>> Leaving the policy, sysfs and kobject in place also brings these
>> additional
>> benefits:
>> * Faster suspend/resume.
>> * Faster hotplug.
>> * Sysfs file permissions maintained across hotplug without userspace
>>   workarounds.
>> * Policy settings and governor tunables maintained across suspend/resume
>>   and hotplug.
>
> Its already maintained during suspend/resume.

But not across hotplug. Which is also very useful when you have 2 clusters
and one gets hotplugged offline due to thermal and then reinserted.
Userspace has to come and restore it back today. In our tree, we "stitched
up" the governor. Also, this make the suspend/resume code a tiny bit
simpler -- in the sense, it's not a special case anymore.

>> * Cpufreq stats would be maintained across hotplug for all CPUs and can
>> be
>>   queried even after CPU goes OFFLINE.
>>
>> Change-Id: I39c395e1fee8731880c0fd7c8a9c1d83e2e4b8d0
>
> remove these while sending stuff upstream..

Yeah, I always think of doing this but keep forgetting in the last minute.

>> Tested-by: Stephen Boyd <sboyd@codeaurora.org>
>> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
>> ---
>>
>> Preliminary testing has been done. cpufreq directories are getting
>> created
>> properly. Online/offline of CPUs work. Policies remain unmodifiable from
>> userspace when all policy CPUs are offline.
>>
>> Error handling code has NOT been updated.
>>
>> I've added a bunch of FIXME comments next to where I'm not sure about
>> the
>> locking in the existing code. I believe most of the try_lock's were
>> present
>> to prevent a deadlock between sysfs lock and the cpufreq locks. Now that
>> the sysfs entries are not touched after creating them, we should be able
>> to
>> replace most/all of these try_lock's with a normal lock.
>>
>> This patch has more room for code simplification, but I would like to
>> get
>> some acks for the functionality and this code before I do further
>> simplification.
>>
>> I should also be able to remove get_online_cpus() in the store function
>> and
>> replace it with just a check for policy->governor_enabled. That should
>> theoretically reduce some contention between cpufreq stats check and
>> hotplug of unrelated CPUs.
>
> Its just too much stuff in a single patch, I can still review it as I
> am very much
> aware of every bit of code written here. But would be very difficult for
> others
> to review it. These are so many cases, configuration we have to think of
> and adding bugs with such a large patch is so so so easy.

Actually this is the smallest bit of code that will work. Well, after I
fix suspend/resume. I'm trying to make each patch such that the tree
continues to work after it's pulled in.

Unfortunately, I'm throwing away a lot of code that it ends up with a
fairly large diff. But if you look at the actual final code, it's very
simple.

But I do see your point. I'll try to keep the patch as small as possible
to continue working and make additional improvements as additional
patches.

>>  drivers/cpufreq/cpufreq.c | 331
>> ++++++++++------------------------------------
>>  1 file changed, 69 insertions(+), 262 deletions(-)
>>
>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>> index 62259d2..e350b15 100644
>> --- a/drivers/cpufreq/cpufreq.c
>> +++ b/drivers/cpufreq/cpufreq.c
>> @@ -859,16 +859,16 @@ void cpufreq_sysfs_remove_file(const struct
>> attribute *attr)
>>  }
>>  EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
>>
>> -/* symlink affected CPUs */
>> +/* symlink related CPUs */
>>  static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>>  {
>> -       unsigned int j;
>> +       unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
>>         int ret = 0;
>>
>> -       for_each_cpu(j, policy->cpus) {
>> +       for_each_cpu(j, policy->related_cpus) {
>>                 struct device *cpu_dev;
>>
>> -               if (j == policy->cpu)
>> +               if (j == first_cpu)
>
> why?

The first CPU is a cluster always own the real nodes.

>>                         continue;
>>
>>                 pr_debug("Adding link for CPU: %u\n", j);
>> @@ -881,12 +881,16 @@ static int cpufreq_add_dev_symlink(struct
>> cpufreq_policy *policy)
>>         return ret;
>>  }
>>
>> -static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
>> -                                    struct device *dev)
>> +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
>>  {
>>         struct freq_attr **drv_attr;
>> +       struct device *dev;
>>         int ret = 0;
>>
>> +       dev = get_cpu_device(cpumask_first(policy->related_cpus));
>> +       if (!dev)
>> +               return -EINVAL;
>> +
>
> Why?

I'm just always adding the real nodes to the first CPU in a cluster
independent of which CPU gets added first. Makes it easier to know which
ones to symlink. See comment next to policy->cpu for full context.

>>         /* prepare interface data */
>>         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
>>                                    &dev->kobj, "cpufreq");
>> @@ -961,60 +965,53 @@ static void cpufreq_init_policy(struct
>> cpufreq_policy *policy)
>>  }
>>
>>  #ifdef CONFIG_HOTPLUG_CPU
>> -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>> -                                 unsigned int cpu, struct device *dev)
>> +static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
>> +                                 unsigned int cpu, bool add)
>>  {
>>         int ret = 0;
>> -       unsigned long flags;
>> +       unsigned int cpus;
>>
>> -       if (has_target()) {
>> +       down_write(&policy->rwsem);
>> +       cpus = cpumask_weight(policy->cpus);
>> +       if (has_target() && cpus > 0) {
>>                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>>                 if (ret) {
>>                         pr_err("%s: Failed to stop governor\n",
>> __func__);
>> -                       return ret;
>> +                       goto unlock;
>>                 }
>>         }
>>
>> -       down_write(&policy->rwsem);
>> -
>> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
>> -
>> -       cpumask_set_cpu(cpu, policy->cpus);
>> -       per_cpu(cpufreq_cpu_data, cpu) = policy;
>> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +       if (add)
>> +               cpumask_set_cpu(cpu, policy->cpus);
>> +       else
>> +               cpumask_clear_cpu(cpu, policy->cpus);
>>
>> -       up_write(&policy->rwsem);
>> +       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>> +                                       CPUFREQ_UPDATE_POLICY_CPU,
>> policy);
>
> This should be only called when policy->cpu is updated. And shouldn't
> be called anymore and can be dropped as you might not wanna change
> policy->cpu after this patch.

Right. I should have reordered this to after.

But I always HAVE to send it when I add/remove a CPU. That's how I believe
cpufreq stats can keep of CPUs going offline. Oh, yeah. Which bring me to
another point. If I'm not mistaken, cpufreq stats keeps track of policy
stats. Really, it should track a CPU's stats. If it's hotplugged out, it
should count that time towards it's current freq.

So, yeah, for now, I can sent this only when policy->cpu changes. I can
fix that.

>>
>> -       if (has_target()) {
>> +       cpus = cpumask_weight(policy->cpus);
>> +       policy->cpu = cpumask_first(policy->cpus);
>
> why update it at all? Also, as per your logic what if cpus == 0?

Yeah, I didn't write it this way at first. But the governors are making
the assumption that policy->cpu is always an online CPU. So, they try to
queue work there and use data structs of that CPU (even if they free it in
the STOP event since it went offline).

Another option is to leave policy->cpu unchanged and then fix all the
governors. But this patch would get even more complicated. So, we can
leave this as is, or fix that up in a separate patch.

>> +       if (has_target() && cpus > 0) {
>
> Instead of > or < use cpus or !cpus.

Kinda personal style I guess. cpus > 0 reads like "there's more than 0
CPUs" which makes sense in English too. But sure, I can change if you
really want to.

>
>>                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
>>                 if (!ret)
>>                         ret = __cpufreq_governor(policy,
>> CPUFREQ_GOV_LIMITS);
>>
>>                 if (ret) {
>>                         pr_err("%s: Failed to start governor\n",
>> __func__);
>> -                       return ret;
>> +                       goto unlock;
>>                 }
>>         }
>>
>> -       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
>> -}
>> -#endif
>> -
>> -static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
>> -{
>> -       struct cpufreq_policy *policy;
>> -       unsigned long flags;
>> -
>> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
>> -
>> -       policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
>> -
>> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +       if (cpus < 1 && cpufreq_driver->stop_cpu &&
>> cpufreq_driver->setpolicy) {
>
> Can be made 'else' part of above 'if', just need to move if(target)
> inside the cpus > 1
> block.

Sorry if I gave the impression that this patch is complete. I'm just
sending out updates so people can see where I'm going with this instead of
dumping it all in one go.

Yes, I'll refactor the if/elses once I have a logic right.

>
>> +               cpufreq_driver->stop_cpu(policy);
>
> Where is ->exit() gone?
>

Why should I exit? I don't think we need to. I'm not planning on exit()
and init() every time an entire cluster is offlined or onlined. Just stop
the governor and let if go quiet.

>> +       }
>>
>> -       policy->governor = NULL;
>> +unlock:
>> +       up_write(&policy->rwsem);
>>
>> -       return policy;
>> +       return ret;
>>  }
>> +#endif
>>
>>  static struct cpufreq_policy *cpufreq_policy_alloc(void)
>>  {
>> @@ -1076,22 +1073,6 @@ static void cpufreq_policy_free(struct
>> cpufreq_policy *policy)
>>         kfree(policy);
>>  }
>>
>> -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned
>> int cpu)
>> -{
>> -       if (WARN_ON(cpu == policy->cpu))
>> -               return;
>> -
>> -       down_write(&policy->rwsem);
>> -
>> -       policy->last_cpu = policy->cpu;
>> -       policy->cpu = cpu;
>> -
>> -       up_write(&policy->rwsem);
>> -
>> -       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>> -                       CPUFREQ_UPDATE_POLICY_CPU, policy);
>> -}
>> -
>>  static int __cpufreq_add_dev(struct device *dev, struct
>> subsys_interface *sif)
>>  {
>>         unsigned int j, cpu = dev->id;
>> @@ -1099,9 +1080,6 @@ static int __cpufreq_add_dev(struct device *dev,
>> struct subsys_interface *sif)
>>         struct cpufreq_policy *policy;
>>         unsigned long flags;
>>         bool recover_policy = cpufreq_suspended;
>> -#ifdef CONFIG_HOTPLUG_CPU
>> -       struct cpufreq_policy *tpolicy;
>> -#endif
>>
>>         if (cpu_is_offline(cpu))
>>                 return 0;
>> @@ -1111,55 +1089,28 @@ static int __cpufreq_add_dev(struct device *dev,
>> struct subsys_interface *sif)
>>  #ifdef CONFIG_SMP
>>         /* check whether a different CPU already registered this
>>          * CPU because it is in the same boat. */
>> +       /* FIXME: This probably needs fixing to avoid "try lock" from
>> +        * returning NULL. Also, change to likely() */
>
> I wanted to give this comment later, but that's fine ..
>
> - First, I couldn't understand the try-lock fixme

If trylock fails when we are adding a new CPU to an existing cluster, I
should skip adding it just because someone else was holding the lock and
the try lock failed.

But I don't think trylocks are needed anymore. Maybe I can fix it in later
patches. We'll see.

> - Second likely() would be better
> - policy will not be available only while adding first CPU of every
> cluster on
> every driver registration..

Yes. I agree.

> - And you aren't freeing 'struct cpufreq_policy' at all now. Would result
> in
> Memory leak cpufreq driver is compiled as a module and inserted/removed
> multiple times.

Again, sorry, if I gave the impression this patch was done. If you don't
want me to send intermediate RFC patches, I can hold off. I'm well aware
that this is not completed yet. When I'm done, there should be no memory
leak when modules get added/removed.

>
>>         policy = cpufreq_cpu_get(cpu);
>>         if (unlikely(policy)) {
>> +               cpufreq_change_policy_cpus(policy, cpu, true);
>>                 cpufreq_cpu_put(policy);
>>                 return 0;
>>         }
>
> This optimization wasn't for the hotplug case, but for adding
> non-policy->cpu
> cpus for the first time.

Sure, but I'm making it an optimization for any time a CPU is added except
for the first time. Since I'm not always freeing and allocating policies
or moving them around, I can simplify it as such.

> In your case policy->cpus would already be updated and so calling
> cpufreq_change_policy_cpus() isn't required.

How? I don't you misunderstood the intent here.

>>  #endif
>>
>> +       /* FIXME: Is returning 0 the right thing to do?! Existing code
>> */
>>         if (!down_read_trylock(&cpufreq_rwsem))
>>                 return 0;
>
> Yeah, must have a better return value. But as I said, these kind of
> changes
> must be added in separate patches.

Yeah, that's why I didn't fix it here yet. Leaving existing bugs as is.

>>
>> -#ifdef CONFIG_HOTPLUG_CPU
>> -       /* Check if this cpu was hot-unplugged earlier and has siblings
>> */
>> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
>> -       list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list)
>> {
>> -               if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
>> -                       read_unlock_irqrestore(&cpufreq_driver_lock,
>> flags);
>> -                       ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
>> -                       up_read(&cpufreq_rwsem);
>> -                       return ret;
>> -               }
>> -       }
>> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> -#endif
>
> And this one was for the hotplug case which you could have reused.

Nope. The whole point is to remove all this complexity.

>
>> -       /*
>> -        * Restore the saved policy when doing light-weight init and
>> fall back
>> -        * to the full init if that fails.
>> -        */
>> -       policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
>> -       if (!policy) {
>> -               recover_policy = false;
>> -               policy = cpufreq_policy_alloc();
>> -               if (!policy)
>> -                       goto nomem_out;
>> -       }
>
> You might need to use it somehow.. Currently you are never doing
> this:  per_cpu(cpufreq_cpu_data, cpu) = NULL;

Intentionally not setting it to NULL.

> which would result in crazy things once you try {un}registering your
> driver...

I haven't handled that part yet. Planning to. But it should be pretty simple.

>> -       /*
>> -        * In the resume path, since we restore a saved policy, the
>> assignment
>> -        * to policy->cpu is like an update of the existing policy,
>> rather than
>> -        * the creation of a brand new one. So we need to perform this
>> update
>> -        * by invoking update_policy_cpu().
>> -        */
>> -       if (recover_policy && cpu != policy->cpu)
>> -               update_policy_cpu(policy, cpu);
>> -       else
>> -               policy->cpu = cpu;
>> +       /* If we get this far, this is the first time we are adding the
>> +        * policy */
>> +       policy = cpufreq_policy_alloc();
>> +       if (!policy)
>> +               goto nomem_out;
>> +       policy->cpu = cpu;
>>
>>         cpumask_copy(policy->cpus, cpumask_of(cpu));
>> -
>>         init_completion(&policy->kobj_unregister);
>>         INIT_WORK(&policy->update, handle_update);
>>
>> @@ -1175,20 +1126,19 @@ static int __cpufreq_add_dev(struct device *dev,
>> struct subsys_interface *sif)
>>         /* related cpus should atleast have policy->cpus */
>>         cpumask_or(policy->related_cpus, policy->related_cpus,
>> policy->cpus);
>>
>> +       /* Weed out impossible CPUs. */
>> +       cpumask_and(policy->related_cpus, policy->related_cpus,
>> +                       cpu_possible_mask);
>
> why?

Why not? It should make the future bit ops faster? Also, it keep the
"first cpu" to a CPU that's actually possible. Also, if a CPU isn't
"possible" I wasn't sure if the cpuX directory would even get created in
the first place. That was another reason.

>
>
> Sorry but I am stopping now. I have already pointed out some issues
> which make this unusable.

Most of them aren't really bugs, but I should clarify the intent though.
Sorry about that. But yes, the patch as is is not usable. It's incomplete.

> Over that, its way too hard to review all this in a single patch. For
> every
> piece of line you add/remove I have to spend 10 mins thinking about
> all the possible cases that were solved with this.. And if the rest of the
> patch is going to fix them or not, etc.. To make it simple I did apply
> your patch and had a close look at the new state of code, but its getting
> tougher and tougher.

In this patch, I'm ONLY touching hotplug and resume related code (except
for one line). I'll give some description in my next patch on how I'm
expecting the events to be across hotplug/suspend and what happens with
the policies. Once we are on the same page on the intent of the patch, it
should be easier.

> Please make sure you take care of these issues:
> - suspend/resume
Didn't test. I expect it to be broken in v2.

> - hotplug
Tested.

> - module insert/remove
Didn't test. Expected to be broken.

> - Memory leaks
Will do.

> - multi cluster systems (with one and multiple CPU per cluster)
> *by cluster I mean group of CPUs sharing clock line
> - single cluster ones, one and multiple CPUs

I actually tested hotplug for all these cases. That's how I found the
governor issue.

>
> Will see how V3 goes. Thanks.

Thanks for taking the time to review and being open to these changes.
Appreciate the cooperation.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-11  7:43   ` Srivatsa S. Bhat
@ 2014-07-11 10:02     ` skannan
  0 siblings, 0 replies; 76+ messages in thread
From: skannan @ 2014-07-11 10:02 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: Saravana Kannan, Rafael J . Wysocki, Viresh Kumar, Todd Poynor,
	linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd


Srivatsa S. Bhat wrote:
> On 07/11/2014 09:48 AM, Saravana Kannan wrote:
>> The CPUfreq driver moves the cpufreq policy ownership between CPUs when
>> CPUs within a cluster (CPUs sharing same policy) go ONLINE/OFFLINE. When
>> moving policy ownership between CPUs, it also moves the cpufreq sysfs
>> directory between CPUs and also fixes up the symlinks of the other CPUs
>> in
>> the cluster.
>>
>> Also, when all the CPUs in a cluster go OFFLINE, all the sysfs nodes and
>> directories are deleted, the kobject is released and the policy is
>> freed.
>> And when the first CPU in a cluster comes up, the policy is reallocated
>> and
>> initialized, kobject is acquired, the sysfs nodes are created or
>> symlinked,
>> etc.
>>
>> All these steps end up creating unnecessarily complicated code and
>> locking.
>> There's no real benefit to adding/removing/moving the sysfs nodes and
>> the
>> policy between CPUs. Other per CPU sysfs directories like power and
>> cpuidle
>> are left alone during hotplug. So there's some precedence to what this
>> patch is trying to do.
>>
>> This patch simplifies a lot of the code and locking by removing the
>> adding/removing/moving of policy/sysfs/kobj and just leaves the cpufreq
>> directory and policy in place irrespective of whether the CPUs are
>> ONLINE/OFFLINE.
>>
>> Leaving the policy, sysfs and kobject in place also brings these
>> additional
>> benefits:
>> * Faster suspend/resume.
>> * Faster hotplug.
>> * Sysfs file permissions maintained across hotplug without userspace
>>   workarounds.
>> * Policy settings and governor tunables maintained across suspend/resume
>>   and hotplug.
>> * Cpufreq stats would be maintained across hotplug for all CPUs and can
>> be
>>   queried even after CPU goes OFFLINE.
>>
>> Change-Id: I39c395e1fee8731880c0fd7c8a9c1d83e2e4b8d0
>> Tested-by: Stephen Boyd <sboyd@codeaurora.org>
>> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
>> ---
>>
>> Preliminary testing has been done. cpufreq directories are getting
>> created
>> properly. Online/offline of CPUs work. Policies remain unmodifiable from
>> userspace when all policy CPUs are offline.
>>
>> Error handling code has NOT been updated.
>>
>> I've added a bunch of FIXME comments next to where I'm not sure about
>> the
>> locking in the existing code. I believe most of the try_lock's were
>> present
>> to prevent a deadlock between sysfs lock and the cpufreq locks. Now that
>> the sysfs entries are not touched after creating them, we should be able
>> to
>> replace most/all of these try_lock's with a normal lock.
>>
>> This patch has more room for code simplification, but I would like to
>> get
>> some acks for the functionality and this code before I do further
>> simplification.
>>
>
> The idea behind this work is very welcome indeed! IMHO, there is nothing
> conceptually wrong in maintaining the per-cpu sysfs files across CPU
> hotplug
> (as long as we take care to return appropriate error codes if userspace
> tries to set values using the control files of offline CPUs). So, it
> really
> boils down to whether or not we get the implementation right; the idea
> itself
> looks fine as of now. Hence, your efforts in making this patch(set) easier
> to
> review will certainly help. Perhaps you can simplify the code later, but
> at
> this point, splitting up this patch into multiple smaller, reviewable
> pieces
> (accompanied by well-written changelogs that explain the intent) is the
> utmost
> priority. Just like Viresh, even I had a hard time reviewing all of this
> in
> one go.
>
> Thank you for taking up this work!

Thanks for the support. I'll keep in mind to keep the patches simple and
not do unnecessary optimizations. But the first patch diff unfortunately
is going to be a bit big since it'll delete a lot of code. :( But I'll add
more detailed commit text or "cover" text in the next one. I don't want to
split up the patch so much that individual ones don't compile or boot.

Maybe after patch v3, if you guys can suggest splitting it up into chunks
that won't involve huge rewrites, I can try to do that.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-11  9:59     ` skannan
@ 2014-07-11 10:07       ` skannan
  2014-07-11 10:52       ` Viresh Kumar
  1 sibling, 0 replies; 76+ messages in thread
From: skannan @ 2014-07-11 10:07 UTC (permalink / raw)
  To: skannan
  Cc: Viresh Kumar, Saravana Kannan, Rafael J . Wysocki, Todd Poynor,
	linux-pm, Linux Kernel Mailing List, linux-arm-msm,
	linux-arm-kernel, Stephen Boyd


skannan@codeaurora.org wrote:
>
> Viresh Kumar wrote:
>> Hi Saravana,
>>
>> Thanks for trying this..
>>
>> On 11 July 2014 09:48, Saravana Kannan <skannan@codeaurora.org> wrote:
>>> The CPUfreq driver moves the cpufreq policy ownership between CPUs when
>>
>> s/driver/core
>
> Will do
>

<snip>

Soooo many typos. This is what happens when I send a late night email! If
a sentence sounds incomplete, this is why.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-11  9:59     ` skannan
  2014-07-11 10:07       ` skannan
@ 2014-07-11 10:52       ` Viresh Kumar
  2014-07-12  2:44         ` Saravana Kannan
  1 sibling, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-11 10:52 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 11 July 2014 15:29,  <skannan@codeaurora.org> wrote:
> Viresh Kumar wrote:
>> On 11 July 2014 09:48, Saravana Kannan <skannan@codeaurora.org> wrote:

>>> * Policy settings and governor tunables maintained across suspend/resume
>>>   and hotplug.
>>
>> Its already maintained during suspend/resume.
>
> But not across hotplug. Which is also very useful when you have 2 clusters
> and one gets hotplugged offline due to thermal and then reinserted.
> Userspace has to come and restore it back today. In our tree, we "stitched
> up" the governor. Also, this make the suspend/resume code a tiny bit
> simpler -- in the sense, it's not a special case anymore.

Yeah, I understood that. I was just pointing that you need to mention
hotplug alone in the bullet point.

>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c

>>>  static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>>>  {
>>> -       unsigned int j;
>>> +       unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
>>>         int ret = 0;
>>>
>>> -       for_each_cpu(j, policy->cpus) {
>>> +       for_each_cpu(j, policy->related_cpus) {
>>>                 struct device *cpu_dev;
>>>
>>> -               if (j == policy->cpu)
>>> +               if (j == first_cpu)
>>
>> why?
>
> The first CPU is a cluster always own the real nodes.

What I meant was, why not use policy->cpu?

>>> +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
>>>  {
>>>         struct freq_attr **drv_attr;
>>> +       struct device *dev;
>>>         int ret = 0;
>>>
>>> +       dev = get_cpu_device(cpumask_first(policy->related_cpus));
>>> +       if (!dev)
>>> +               return -EINVAL;
>>> +
>>
>> Why?
>
> I'm just always adding the real nodes to the first CPU in a cluster
> independent of which CPU gets added first. Makes it easier to know which
> ones to symlink. See comment next to policy->cpu for full context.

Yeah, and that is the order in which CPUs will boot and cpufreq_add_dev()
will be called. So, isn't policy->cpu the right CPU always?

>>> +       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>>> +                                       CPUFREQ_UPDATE_POLICY_CPU,
>>> policy);
>>
>> This should be only called when policy->cpu is updated. And shouldn't
>> be called anymore and can be dropped as you might not wanna change
>> policy->cpu after this patch.
>
> Right. I should have reordered this to after.
>
> But I always HAVE to send it when I add/remove a CPU. That's how I believe
> cpufreq stats can keep of CPUs going offline.

No. cpufreq-stats doesn't have to care about CPUs going in/out. It just
cares about poilcy->cpu and so we notify it when that gets updated.

> Oh, yeah. Which bring me to
> another point. If I'm not mistaken, cpufreq stats keeps track of policy
> stats. Really, it should track a CPU's stats. If it's hotplugged out, it
> should count that time towards it's current freq.

Even if CPU is hotplugged out, it is getting clock from the same pll. And
so if clock for rest of the CPUs change, it changes for the hotplugged
out one as well.. Even if it is not running.

So, we must account on the new frequencies.

>>> -       if (has_target()) {
>>> +       cpus = cpumask_weight(policy->cpus);
>>> +       policy->cpu = cpumask_first(policy->cpus);
>>
>> why update it at all? Also, as per your logic what if cpus == 0?
>
> Yeah, I didn't write it this way at first. But the governors are making
> the assumption that policy->cpu is always an online CPU. So, they try to

Are you sure? I had a quick look and failed to see that..

> queue work there and use data structs of that CPU (even if they free it in
> the STOP event since it went offline).

So, it queues work on all policy->cpus, not policy->cpu. And the data structures
are just allocated with a CPU number, its fine if its offline.

And where are we freeing that stuff in STOP ?

Sorry if I am really really tired and couldn't read it correctly.

> Another option is to leave policy->cpu unchanged and then fix all the
> governors. But this patch would get even more complicated. So, we can
> leave this as is, or fix that up in a separate patch.

Since we are simplifying it here, I think we should NOT change policy->cpu
at all. It will make life simple (probably).

>>> +       if (has_target() && cpus > 0) {
>>
>> Instead of > or < use cpus or !cpus.
>
> Kinda personal style I guess. cpus > 0 reads like "there's more than 0
> CPUs" which makes sense in English too. But sure, I can change if you
> really want to.

Actually at places its compared with 0 or 1, and so thought cpus, !cpus
would be better..

> Sorry if I gave the impression that this patch is complete.

No, you didn't :)

>>> +               cpufreq_driver->stop_cpu(policy);
>>
>> Where is ->exit() gone?
>>
>
> Why should I exit? I don't think we need to. I'm not planning on exit()
> and init() every time an entire cluster is offlined or onlined. Just stop
> the governor and let if go quiet.

Okay, my driver is compiled as a module. I insert/remove it multiple
times. Only ->init() will be called multiple times, no exit ?

>>>  #ifdef CONFIG_SMP
>>>         /* check whether a different CPU already registered this
>>>          * CPU because it is in the same boat. */
>>> +       /* FIXME: This probably needs fixing to avoid "try lock" from
>>> +        * returning NULL. Also, change to likely() */
>>
>> I wanted to give this comment later, but that's fine ..
>>
>> - First, I couldn't understand the try-lock fixme
>
> If trylock fails when we are adding a new CPU to an existing cluster, I
> should skip adding it just because someone else was holding the lock and
> the try lock failed.

But that trylock thing was later in the code. How does it affect the
'if' block you commented on?

> But I don't think trylocks are needed anymore. Maybe I can fix it in later
> patches. We'll see.

I don't know yet :)

>> - And you aren't freeing 'struct cpufreq_policy' at all now. Would result
>> in
>> Memory leak cpufreq driver is compiled as a module and inserted/removed
>> multiple times.
>
> Again, sorry, if I gave the impression this patch was done. If you don't
> want me to send intermediate RFC patches, I can hold off. I'm well aware
> that this is not completed yet. When I'm done, there should be no memory
> leak when modules get added/removed.

Oh yes. I believed that atleast the basic things are all working. It even had
a Tested-by from Stephen :)

Okay, this stuff isn't THAT big. So, hold-on your patches for sometime and
send when they are almost ready.

I understand that you wanted to have some early feedback, but its already
there with you. YES we want this change to retain settings during hotplug.

The problem is, even when I didn't review it completely it took over an hour
to do the reviews I did :)

So, I would like to see something *much more* stable. Finishing can be
done later.

>>>         policy = cpufreq_cpu_get(cpu);
>>>         if (unlikely(policy)) {
>>> +               cpufreq_change_policy_cpus(policy, cpu, true);
>>>                 cpufreq_cpu_put(policy);
>>>                 return 0;
>>>         }
>>
>> This optimization wasn't for the hotplug case, but for adding
>> non-policy->cpu
>> cpus for the first time.
>
> Sure, but I'm making it an optimization for any time a CPU is added except
> for the first time. Since I'm not always freeing and allocating policies
> or moving them around, I can simplify it as such.

Hmm, but that would be an overhead of calling cpufreq_change_policy_cpus()
for every cpu on boot. Wouldn't be that nice for big servers.

>> In your case policy->cpus would already be updated and so calling
>> cpufreq_change_policy_cpus() isn't required.
>
> How? I don't you misunderstood the intent here.

Only on first boot policy->cpus would be updated. Later on you can do it
separately as it was done in existing code.

> Yeah, that's why I didn't fix it here yet. Leaving existing bugs as is.

I would prefer cleaning them up first, so that the new changes you are
making are rock solid.

>>> +       /* Weed out impossible CPUs. */
>>> +       cpumask_and(policy->related_cpus, policy->related_cpus,
>>> +                       cpu_possible_mask);
>>
>> why?
>
> Why not? It should make the future bit ops faster?

Drivers shouldn't set non-populatable CPUs to this mask.

>> Sorry but I am stopping now. I have already pointed out some issues
>> which make this unusable.
>
> Most of them aren't really bugs, but I should clarify the intent though.

I wasn't worried about most of the comments, but exit(), memory leaks,
etc..

> In this patch, I'm ONLY touching hotplug and resume related code (except
> for one line). I'll give some description in my next patch on how I'm
> expecting the events to be across hotplug/suspend and what happens with
> the policies. Once we are on the same page on the intent of the patch, it
> should be easier.

>> Please make sure you take care of these issues:
>> - suspend/resume
> Didn't test. I expect it to be broken in v2.

:)

>> - module insert/remove
> Didn't test. Expected to be broken.
>
>> - Memory leaks
> Will do.
>
>> - multi cluster systems (with one and multiple CPU per cluster)
>> *by cluster I mean group of CPUs sharing clock line
>> - single cluster ones, one and multiple CPUs
>
> I actually tested hotplug for all these cases. That's how I found the
> governor issue.
>
>>
>> Will see how V3 goes. Thanks.
>
> Thanks for taking the time to review and being open to these changes.
> Appreciate the cooperation.

Thanks Saravana.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-11 10:52       ` Viresh Kumar
@ 2014-07-12  2:44         ` Saravana Kannan
  2014-07-14  6:09           ` Viresh Kumar
  0 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-12  2:44 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/11/2014 03:52 AM, Viresh Kumar wrote:

Just responding to one comment. The one about policy->cpu.

>
>>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>
>>>>   static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>>>>   {
>>>> -       unsigned int j;
>>>> +       unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
>>>>          int ret = 0;
>>>>
>>>> -       for_each_cpu(j, policy->cpus) {
>>>> +       for_each_cpu(j, policy->related_cpus) {
>>>>                  struct device *cpu_dev;
>>>>
>>>> -               if (j == policy->cpu)
>>>> +               if (j == first_cpu)
>>>
>>> why?
>>
>> The first CPU is a cluster always own the real nodes.
>
> What I meant was, why not use policy->cpu?
>
>>>> +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
>>>>   {
>>>>          struct freq_attr **drv_attr;
>>>> +       struct device *dev;
>>>>          int ret = 0;
>>>>
>>>> +       dev = get_cpu_device(cpumask_first(policy->related_cpus));
>>>> +       if (!dev)
>>>> +               return -EINVAL;
>>>> +
>>>
>>> Why?
>>
>> I'm just always adding the real nodes to the first CPU in a cluster
>> independent of which CPU gets added first. Makes it easier to know which
>> ones to symlink. See comment next to policy->cpu for full context.
>
> Yeah, and that is the order in which CPUs will boot and cpufreq_add_dev()
> will be called. So, isn't policy->cpu the right CPU always?

No, the "first" cpu in a cluster doesn't need to be the first one to be 
added. An example is 2x2 cluster system where the system is booted with 
max cpus = 2 and then cpu3 could be onlined first by userspace.

>
>>>> -       if (has_target()) {
>>>> +       cpus = cpumask_weight(policy->cpus);
>>>> +       policy->cpu = cpumask_first(policy->cpus);
>>>
>>> why update it at all? Also, as per your logic what if cpus == 0?
>>
>> Yeah, I didn't write it this way at first. But the governors are making
>> the assumption that policy->cpu is always an online CPU. So, they try to
>
> Are you sure? I had a quick look and failed to see that..
>
>> queue work there and use data structs of that CPU (even if they free it in
>> the STOP event since it went offline).
>
> So, it queues work on all policy->cpus, not policy->cpu.
> And the data structures
> are just allocated with a CPU number, its fine if its offline.
>
> And where are we freeing that stuff in STOP ?
>
> Sorry if I am really really tired and couldn't read it correctly.

Yeah, it is pretty convolution. But pretty much anywhere in the gov code 
where policy->cpu is used could cause this. The specific crash I hit was 
in this code:

static void od_dbs_timer(struct work_struct *work)
{
	struct od_cpu_dbs_info_s *dbs_info =
		container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
	unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;

======= CPU is policy->cpu here.

	struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info,
			cpu);

======= Picks the per CPU struct of an offline CPU

<snip>

	mutex_lock(&core_dbs_info->cdbs.timer_mutex);

======= Dies trying to lock a destroyed mutex

>
>> Another option is to leave policy->cpu unchanged and then fix all the
>> governors. But this patch would get even more complicated. So, we can
>> leave this as is, or fix that up in a separate patch.
>
> Since we are simplifying it here, I think we should NOT change policy->cpu
> at all. It will make life simple (probably).

I agree, but then I would have to fix up the governors. In the interest 
of keeping this patch small. I'll continue with what I'm doing and fix 
it up in another patch.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-11  6:19   ` Viresh Kumar
  2014-07-11  9:59     ` skannan
@ 2014-07-12  3:06     ` Saravana Kannan
  2014-07-14  6:13       ` Viresh Kumar
  1 sibling, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-12  3:06 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/10/2014 11:19 PM, Viresh Kumar wrote:

>
> Please make sure you take care of these issues:
> - suspend/resume
> - hotplug
> - module insert/remove
Ok, I was just at the current code. Does cpufreq_unregister_driver() 
even really work correctly as it stands?

It doesn't even seem to stop any of the governors/policies before it 
just set the cpufreq_driver pointer to NULL.

So, technically my v2 patch doesn't even make anything worse when it 
comes to unregistering the cpufreq driver.

Similar issues for unregister_governor too!

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-12  2:44         ` Saravana Kannan
@ 2014-07-14  6:09           ` Viresh Kumar
  2014-07-14 19:08             ` Saravana Kannan
  0 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-14  6:09 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 12 July 2014 08:14, Saravana Kannan <skannan@codeaurora.org> wrote:

>>> I'm just always adding the real nodes to the first CPU in a cluster
>>> independent of which CPU gets added first. Makes it easier to know which
>>> ones to symlink. See comment next to policy->cpu for full context.
>>
>>
>> Yeah, and that is the order in which CPUs will boot and cpufreq_add_dev()
>> will be called. So, isn't policy->cpu the right CPU always?
>
>
> No, the "first" cpu in a cluster doesn't need to be the first one to be
> added. An example is 2x2 cluster system where the system is booted with max
> cpus = 2 and then cpu3 could be onlined first by userspace.

Because we are getting rid of much of the complexity now, I do not want
policy->cpu to keep changing. Just fix it up to the cpu for which the policy
gets created first. That's it. No more changes required. It doesn't matter at
userspace which cpu owns it as symlinks would anyway duplicate it under
every cpu.

> Yeah, it is pretty convolution. But pretty much anywhere in the gov code
> where policy->cpu is used could cause this. The specific crash I hit was in
> this code:
>
> static void od_dbs_timer(struct work_struct *work)
> {
>         struct od_cpu_dbs_info_s *dbs_info =
>                 container_of(work, struct od_cpu_dbs_info_s,
> cdbs.work.work);
>         unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
>
> ======= CPU is policy->cpu here.
>
>         struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info,
>                         cpu);
>
> ======= Picks the per CPU struct of an offline CPU
>
> <snip>
>
>         mutex_lock(&core_dbs_info->cdbs.timer_mutex);
>
> ======= Dies trying to lock a destroyed mutex

I am still not getting it. Why would we get into this if policy->cpu is fixed
once at boot ?

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-12  3:06     ` Saravana Kannan
@ 2014-07-14  6:13       ` Viresh Kumar
  2014-07-14 19:10         ` Saravana Kannan
  0 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-14  6:13 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 12 July 2014 08:36, Saravana Kannan <skannan@codeaurora.org> wrote:
> On 07/10/2014 11:19 PM, Viresh Kumar wrote:
>
>>
>> Please make sure you take care of these issues:
>> - suspend/resume
>> - hotplug
>> - module insert/remove
>
> Ok, I was just at the current code. Does cpufreq_unregister_driver() even
> really work correctly as it stands?
>
> It doesn't even seem to stop any of the governors/policies before it just
> set the cpufreq_driver pointer to NULL.
>
> So, technically my v2 patch doesn't even make anything worse when it comes
> to unregistering the cpufreq driver.

Are you really sure about this? I have tested this *myself* earlier..

subsys_interface_unregister() should take care of stopping/freeing governor
stuff..

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-14  6:09           ` Viresh Kumar
@ 2014-07-14 19:08             ` Saravana Kannan
  2014-07-15  4:35               ` Viresh Kumar
  0 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-14 19:08 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/13/2014 11:09 PM, Viresh Kumar wrote:
> On 12 July 2014 08:14, Saravana Kannan <skannan@codeaurora.org> wrote:
>
>>>> I'm just always adding the real nodes to the first CPU in a cluster
>>>> independent of which CPU gets added first. Makes it easier to know which
>>>> ones to symlink. See comment next to policy->cpu for full context.
>>>
>>>
>>> Yeah, and that is the order in which CPUs will boot and cpufreq_add_dev()
>>> will be called. So, isn't policy->cpu the right CPU always?
>>
>>
>> No, the "first" cpu in a cluster doesn't need to be the first one to be
>> added. An example is 2x2 cluster system where the system is booted with max
>> cpus = 2 and then cpu3 could be onlined first by userspace.
>
> Because we are getting rid of much of the complexity now, I do not want
> policy->cpu to keep changing. Just fix it up to the cpu for which the policy
> gets created first. That's it. No more changes required. It doesn't matter at
> userspace which cpu owns it as symlinks would anyway duplicate it under
> every cpu.

I think you missed one my of comments in the email. I agree with what 
you are saying here. I'll just do it as a separate patch to keep this 
one simpler. I don't want to touch all the governors and other potential 
uses of policy->cpu in this patch.

>> Yeah, it is pretty convolution. But pretty much anywhere in the gov code
>> where policy->cpu is used could cause this. The specific crash I hit was in
>> this code:
>>
>> static void od_dbs_timer(struct work_struct *work)
>> {
>>          struct od_cpu_dbs_info_s *dbs_info =
>>                  container_of(work, struct od_cpu_dbs_info_s,
>> cdbs.work.work);
>>          unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
>>
>> ======= CPU is policy->cpu here.
>>
>>          struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info,
>>                          cpu);
>>
>> ======= Picks the per CPU struct of an offline CPU
>>
>> <snip>
>>
>>          mutex_lock(&core_dbs_info->cdbs.timer_mutex);
>>
>> ======= Dies trying to lock a destroyed mutex
>
> I am still not getting it. Why would we get into this if policy->cpu is fixed
> once at boot ?
>

Yeah, it definitely crashes if policy->cpu if an offline cpu. Because 
the mutex would be uninitialized if it's stopped after boot or it would 
never have been initialized (depending on how you fix policy->cpu at boot).

Look at this snippet on the actual tree and it should be pretty evident.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-14  6:13       ` Viresh Kumar
@ 2014-07-14 19:10         ` Saravana Kannan
  0 siblings, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-14 19:10 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/13/2014 11:13 PM, Viresh Kumar wrote:
> On 12 July 2014 08:36, Saravana Kannan <skannan@codeaurora.org> wrote:
>> On 07/10/2014 11:19 PM, Viresh Kumar wrote:
>>
>>>
>>> Please make sure you take care of these issues:
>>> - suspend/resume
>>> - hotplug
>>> - module insert/remove
>>
>> Ok, I was just at the current code. Does cpufreq_unregister_driver() even
>> really work correctly as it stands?
>>
>> It doesn't even seem to stop any of the governors/policies before it just
>> set the cpufreq_driver pointer to NULL.
>>
>> So, technically my v2 patch doesn't even make anything worse when it comes
>> to unregistering the cpufreq driver.
>
> Are you really sure about this? I have tested this *myself* earlier..
>
> subsys_interface_unregister() should take care of stopping/freeing governor
> stuff..
>

I was asking this question based on looking at the code. Didn't actually 
try it -- sent it just before being done for the day. I didn't know 
about the subsys_interface_unregister() coming into play here. I'll take 
a look.

Thanks for the pointer.

-Saravana


-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-14 19:08             ` Saravana Kannan
@ 2014-07-15  4:35               ` Viresh Kumar
  2014-07-15  5:36                 ` Saravana Kannan
  0 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-15  4:35 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 15 July 2014 00:38, Saravana Kannan <skannan@codeaurora.org> wrote:
> Yeah, it definitely crashes if policy->cpu if an offline cpu. Because the
> mutex would be uninitialized if it's stopped after boot or it would never
> have been initialized (depending on how you fix policy->cpu at boot).
>
> Look at this snippet on the actual tree and it should be pretty evident.

Yeah, I missed it. So the problem is we initialize timer_mutex's for
policy->cpus. So we need to do that just for policy->cpu and also we don't
need a per-cpu timer_mutex anymore.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15  4:35               ` Viresh Kumar
@ 2014-07-15  5:36                 ` Saravana Kannan
  2014-07-15  5:52                   ` Viresh Kumar
  2014-07-15  6:58                   ` Srivatsa S. Bhat
  0 siblings, 2 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-15  5:36 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/14/2014 09:35 PM, Viresh Kumar wrote:
> On 15 July 2014 00:38, Saravana Kannan <skannan@codeaurora.org> wrote:
>> Yeah, it definitely crashes if policy->cpu if an offline cpu. Because the
>> mutex would be uninitialized if it's stopped after boot or it would never
>> have been initialized (depending on how you fix policy->cpu at boot).
>>
>> Look at this snippet on the actual tree and it should be pretty evident.
>
> Yeah, I missed it. So the problem is we initialize timer_mutex's for
> policy->cpus. So we need to do that just for policy->cpu and also we don't
> need a per-cpu timer_mutex anymore.
>

Btw, I tried to take a stab at removing any assumption in cpufreq code 
about policy->cpu being ONLINE. There are 160 instances of those of with 
23 are in cpufreq.c

So, even if we are sure cpufreq.c is fine, it's 137 other uses spread 
across all the other files. I definitely don't want to try and fix those 
as part of this patch. Way too risky and hard to get the test coverage 
it would need. Even some of the acpi cpufreq drivers seem to be making 
this assumption.

Btw, I think v3 is done. I did some testing and it was fine. But made 
some minor changes. Will test tomorrow to make sure I didn't break 
anything with the minor changes and then send them out.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15  5:36                 ` Saravana Kannan
@ 2014-07-15  5:52                   ` Viresh Kumar
  2014-07-15  6:58                   ` Srivatsa S. Bhat
  1 sibling, 0 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-07-15  5:52 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 15 July 2014 11:06, Saravana Kannan <skannan@codeaurora.org> wrote:
> Btw, I tried to take a stab at removing any assumption in cpufreq code about
> policy->cpu being ONLINE. There are 160 instances of those of with 23 are in
> cpufreq.c
>
> So, even if we are sure cpufreq.c is fine, it's 137 other uses spread across
> all the other files. I definitely don't want to try and fix those as part of
> this patch. Way too risky and hard to get the test coverage it would need.
> Even some of the acpi cpufreq drivers seem to be making this assumption.

Hmm, yeah that would be an issue. So this is what you should do now:
- Left policy->cpu as it is, i.e. updated only when policy->cpu goes down.
- Just make sure sysfs nodes are untouched when any cpu goes down

> Btw, I think v3 is done. I did some testing and it was fine. But made some
> minor changes. Will test tomorrow to make sure I didn't break anything with
> the minor changes and then send them out.

Ok, just comply to the above comments.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15  5:36                 ` Saravana Kannan
  2014-07-15  5:52                   ` Viresh Kumar
@ 2014-07-15  6:58                   ` Srivatsa S. Bhat
  2014-07-15 17:35                     ` skannan
  2014-07-16  5:44                     ` Viresh Kumar
  1 sibling, 2 replies; 76+ messages in thread
From: Srivatsa S. Bhat @ 2014-07-15  6:58 UTC (permalink / raw)
  To: Saravana Kannan, Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/15/2014 11:06 AM, Saravana Kannan wrote:
> On 07/14/2014 09:35 PM, Viresh Kumar wrote:
>> On 15 July 2014 00:38, Saravana Kannan <skannan@codeaurora.org> wrote:
>>> Yeah, it definitely crashes if policy->cpu if an offline cpu. Because
>>> the
>>> mutex would be uninitialized if it's stopped after boot or it would
>>> never
>>> have been initialized (depending on how you fix policy->cpu at boot).
>>>
>>> Look at this snippet on the actual tree and it should be pretty evident.
>>
>> Yeah, I missed it. So the problem is we initialize timer_mutex's for
>> policy->cpus. So we need to do that just for policy->cpu and also we
>> don't
>> need a per-cpu timer_mutex anymore.
>>
> 
> Btw, I tried to take a stab at removing any assumption in cpufreq code
> about policy->cpu being ONLINE.

Wait, allowing an offline CPU to be the policy->cpu (i.e., the CPU which is
considered as the master of the policy/group) is just absurd. If there is
no leader, there is no army. We should NOT sacrifice sane semantics for the
sake of simplifying the code.

> There are 160 instances of those of with
> 23 are in cpufreq.c
>

And that explains why. It is just *natural* to assume that the CPUs governed
by a policy are online. Especially so for the CPU which is supposed to be
the policy leader. Let us please not change that - it will become
counter-intuitive if we do so. [ The other reason is that physical hotplug
is also possible on some systems... in that case your code might make a CPU
which is not even present (but possible) as the policy->cpu.. and great 'fun'
will ensue after that ;-( ]

The goal of this patchset should be to just de-couple the sysfs files/ownership
from the policy->cpu to an extent where it doesn't matter who owns those
files, and probably make it easier to do CPU hotplug without having to
destroy and recreate the files on every hotplug operation.

This is exactly why the _implementation_ matters in this particular case -
if we can't achieve the simplification by keeping sane semantics, then we
shouldn't do the simplification!

That said, I think we should keep trying - we haven't exhausted all ideas
yet :-)

Regards,
Srivatsa S. Bhat

> So, even if we are sure cpufreq.c is fine, it's 137 other uses spread
> across all the other files. I definitely don't want to try and fix those
> as part of this patch. Way too risky and hard to get the test coverage
> it would need. Even some of the acpi cpufreq drivers seem to be making
> this assumption.
> 
> Btw, I think v3 is done. I did some testing and it was fine. But made
> some minor changes. Will test tomorrow to make sure I didn't break
> anything with the minor changes and then send them out.
> 

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15  6:58                   ` Srivatsa S. Bhat
@ 2014-07-15 17:35                     ` skannan
  2014-07-16  7:44                       ` Srivatsa S. Bhat
  2014-07-16  5:44                     ` Viresh Kumar
  1 sibling, 1 reply; 76+ messages in thread
From: skannan @ 2014-07-15 17:35 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: Saravana Kannan, Viresh Kumar, Rafael J . Wysocki, Todd Poynor,
	linux-pm, Linux Kernel Mailing List, linux-arm-msm,
	linux-arm-kernel, Stephen Boyd


Srivatsa S. Bhat wrote:
> On 07/15/2014 11:06 AM, Saravana Kannan wrote:
>> On 07/14/2014 09:35 PM, Viresh Kumar wrote:
>>> On 15 July 2014 00:38, Saravana Kannan <skannan@codeaurora.org> wrote:
>>>> Yeah, it definitely crashes if policy->cpu if an offline cpu. Because
>>>> the
>>>> mutex would be uninitialized if it's stopped after boot or it would
>>>> never
>>>> have been initialized (depending on how you fix policy->cpu at boot).
>>>>
>>>> Look at this snippet on the actual tree and it should be pretty
>>>> evident.
>>>
>>> Yeah, I missed it. So the problem is we initialize timer_mutex's for
>>> policy->cpus. So we need to do that just for policy->cpu and also we
>>> don't
>>> need a per-cpu timer_mutex anymore.
>>>
>>
>> Btw, I tried to take a stab at removing any assumption in cpufreq code
>> about policy->cpu being ONLINE.
>
> Wait, allowing an offline CPU to be the policy->cpu (i.e., the CPU which
> is
> considered as the master of the policy/group) is just absurd. If there is
> no leader, there is no army. We should NOT sacrifice sane semantics for
> the
> sake of simplifying the code.
>
>> There are 160 instances of those of with
>> 23 are in cpufreq.c
>>
>
> And that explains why. It is just *natural* to assume that the CPUs
> governed
> by a policy are online. Especially so for the CPU which is supposed to be
> the policy leader. Let us please not change that - it will become
> counter-intuitive if we do so. [ The other reason is that physical hotplug
> is also possible on some systems... in that case your code might make a
> CPU
> which is not even present (but possible) as the policy->cpu.. and great
> 'fun'
> will ensue after that ;-( ]
>
> The goal of this patchset should be to just de-couple the sysfs
> files/ownership
> from the policy->cpu to an extent where it doesn't matter who owns those
> files, and probably make it easier to do CPU hotplug without having to
> destroy and recreate the files on every hotplug operation.
>
> This is exactly why the _implementation_ matters in this particular case -
> if we can't achieve the simplification by keeping sane semantics, then we
> shouldn't do the simplification!
>
> That said, I think we should keep trying - we haven't exhausted all ideas
> yet :-)
>

I don't think we disagree. To summarize this topic: I tried to keep the
policy->cpu an actual online CPU so as to not break existing semantics in
this patch. Viresh asked "why not fix it at boot?". My response was to
keep it an online CPU and give it a shot in a separate patch if we really
want that. It's too risky to do that in this patch and also not a
mandatory change for this patch.

I think we can work out the details on the need to fixing policy->cpu at
boot and whether there's even a need for policy->cpu (when we already have
policy->cpus) in a separate thread after the dust settles on this one?

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 76+ messages in thread

* [PATCH v3 0/2] Simplify hotplug/suspend handling
  2014-07-11  4:18 ` [PATCH v2] " Saravana Kannan
  2014-07-11  6:19   ` Viresh Kumar
  2014-07-11  7:43   ` Srivatsa S. Bhat
@ 2014-07-15 22:47   ` Saravana Kannan
  2014-07-15 22:47     ` [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
                       ` (2 more replies)
  2 siblings, 3 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-15 22:47 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

Series of patchs to simplify policy/sysfs/kobj/locking handling across
suspend/resume

The following have been tested so far on a 2x2 cluster environment:
- Boot with 2 cpus and no cpufreq driver.
- mod probe driver and see cpufreq sysfs files show up only for the 1st cluster.
- Online the rest of the 2 CPUs and have files show up correctly.
- rmmod the driver and see the files go away.
- modprobe again (or back and forth multiples times) and see it work.
- suspend/resume works as expected.
- When a cluster is offline, all read/writes to its sysfs files return an error

Saravana Kannan (2):
  cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  cpufreq: Simplify and fix mutual exclusion with hotplug

 drivers/cpufreq/cpufreq.c | 432 ++++++++++++++--------------------------------
 1 file changed, 127 insertions(+), 305 deletions(-)

-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15 22:47   ` [PATCH v3 0/2] Simplify hotplug/suspend handling Saravana Kannan
@ 2014-07-15 22:47     ` Saravana Kannan
  2014-07-16  0:28       ` Saravana Kannan
                         ` (2 more replies)
  2014-07-15 22:47     ` [PATCH v3 2/2] cpufreq: Simplify and fix mutual exclusion with hotplug Saravana Kannan
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
  2 siblings, 3 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-15 22:47 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

The CPUfreq core moves the cpufreq policy ownership between CPUs when CPUs
within a cluster (CPUs sharing same policy) go ONLINE/OFFLINE. When moving
policy ownership between CPUs, it also moves the cpufreq sysfs directory
between CPUs and also fixes up the symlinks of the other CPUs in the
cluster.

Also, when all the CPUs in a cluster go OFFLINE, all the sysfs nodes and
directories are deleted, the kobject is released and the policy is freed.
And when the first CPU in a cluster comes up, the policy is reallocated and
initialized, kobject is acquired, the sysfs nodes are created or symlinked,
etc.

All these steps end up creating unnecessarily complicated code and locking.
There's no real benefit to adding/removing/moving the sysfs nodes and the
policy between CPUs. Other per CPU sysfs directories like power and cpuidle
are left alone during hotplug. So there's some precedence to what this
patch is trying to do.

This patch simplifies a lot of the code and locking by removing the
adding/removing/moving of policy/sysfs/kobj and just leaves the cpufreq
directory and policy in place irrespective of whether the CPUs are
ONLINE/OFFLINE.

Leaving the policy, sysfs and kobject in place also brings these additional
benefits:
* Faster suspend/resume
* Faster hotplug
* Sysfs file permissions maintained across hotplug
* Policy settings and governor tunables maintained across hotplug
* Cpufreq stats would be maintained across hotplug for all CPUs and can be
  queried even after CPU goes OFFLINE

Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---
 drivers/cpufreq/cpufreq.c | 388 +++++++++++++---------------------------------
 1 file changed, 107 insertions(+), 281 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 62259d2..a0a2ec2 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -37,7 +37,6 @@
  */
 static struct cpufreq_driver *cpufreq_driver;
 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
-static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback);
 static DEFINE_RWLOCK(cpufreq_driver_lock);
 DEFINE_MUTEX(cpufreq_governor_lock);
 static LIST_HEAD(cpufreq_policy_list);
@@ -859,34 +858,41 @@ void cpufreq_sysfs_remove_file(const struct attribute *attr)
 }
 EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
 
-/* symlink affected CPUs */
-static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
+/* symlink related CPUs */
+static int cpufreq_dev_symlink(struct cpufreq_policy *policy, bool add)
 {
-	unsigned int j;
+	unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
 	int ret = 0;
 
-	for_each_cpu(j, policy->cpus) {
+	for_each_cpu(j, policy->related_cpus) {
 		struct device *cpu_dev;
 
-		if (j == policy->cpu)
+		if (j == first_cpu)
 			continue;
 
-		pr_debug("Adding link for CPU: %u\n", j);
 		cpu_dev = get_cpu_device(j);
-		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
-					"cpufreq");
+		if (add)
+			ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
+						"cpufreq");
+		else
+			sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
+
 		if (ret)
 			break;
 	}
 	return ret;
 }
 
-static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
-				     struct device *dev)
+static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
 {
 	struct freq_attr **drv_attr;
+	struct device *dev;
 	int ret = 0;
 
+	dev = get_cpu_device(cpumask_first(policy->related_cpus));
+	if (!dev)
+		return -EINVAL;
+
 	/* prepare interface data */
 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
 				   &dev->kobj, "cpufreq");
@@ -917,7 +923,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
 			goto err_out_kobj_put;
 	}
 
-	ret = cpufreq_add_dev_symlink(policy);
+	ret = cpufreq_dev_symlink(policy, true);
 	if (ret)
 		goto err_out_kobj_put;
 
@@ -961,60 +967,58 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
-				  unsigned int cpu, struct device *dev)
+static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
+				  unsigned int cpu, bool add)
 {
 	int ret = 0;
-	unsigned long flags;
+	unsigned int cpus, pcpu;
 
-	if (has_target()) {
+	down_write(&policy->rwsem);
+
+	cpus = !cpumask_empty(policy->cpus);
+	if (has_target() && cpus) {
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 		if (ret) {
 			pr_err("%s: Failed to stop governor\n", __func__);
-			return ret;
+			goto unlock;
 		}
 	}
 
-	down_write(&policy->rwsem);
-
-	write_lock_irqsave(&cpufreq_driver_lock, flags);
-
-	cpumask_set_cpu(cpu, policy->cpus);
-	per_cpu(cpufreq_cpu_data, cpu) = policy;
-	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	if (add)
+		cpumask_set_cpu(cpu, policy->cpus);
+	else
+		cpumask_clear_cpu(cpu, policy->cpus);
 
-	up_write(&policy->rwsem);
+	pcpu = cpumask_first(policy->cpus);
+	if (pcpu < nr_cpu_ids && policy->cpu != pcpu) {
+		policy->last_cpu = policy->cpu;
+		policy->cpu = pcpu;
+		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+					CPUFREQ_UPDATE_POLICY_CPU, policy);
+	}
 
-	if (has_target()) {
+	cpus = !cpumask_empty(policy->cpus);
+	if (has_target() && cpus) {
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
 		if (!ret)
 			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 
 		if (ret) {
 			pr_err("%s: Failed to start governor\n", __func__);
-			return ret;
+			goto unlock;
 		}
 	}
 
-	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
-}
-#endif
-
-static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-	unsigned long flags;
-
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-
-	policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
-
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	if (!cpus && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
+		cpufreq_driver->stop_cpu(policy);
+	}
 
-	policy->governor = NULL;
+unlock:
+	up_write(&policy->rwsem);
 
-	return policy;
+	return ret;
 }
+#endif
 
 static struct cpufreq_policy *cpufreq_policy_alloc(void)
 {
@@ -1053,10 +1057,8 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 			CPUFREQ_REMOVE_POLICY, policy);
 
-	down_read(&policy->rwsem);
 	kobj = &policy->kobj;
 	cmp = &policy->kobj_unregister;
-	up_read(&policy->rwsem);
 	kobject_put(kobj);
 
 	/*
@@ -1076,32 +1078,12 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 	kfree(policy);
 }
 
-static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
-{
-	if (WARN_ON(cpu == policy->cpu))
-		return;
-
-	down_write(&policy->rwsem);
-
-	policy->last_cpu = policy->cpu;
-	policy->cpu = cpu;
-
-	up_write(&policy->rwsem);
-
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-			CPUFREQ_UPDATE_POLICY_CPU, policy);
-}
-
 static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 {
 	unsigned int j, cpu = dev->id;
 	int ret = -ENOMEM;
 	struct cpufreq_policy *policy;
 	unsigned long flags;
-	bool recover_policy = cpufreq_suspended;
-#ifdef CONFIG_HOTPLUG_CPU
-	struct cpufreq_policy *tpolicy;
-#endif
 
 	if (cpu_is_offline(cpu))
 		return 0;
@@ -1110,9 +1092,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 #ifdef CONFIG_SMP
 	/* check whether a different CPU already registered this
-	 * CPU because it is in the same boat. */
+	 * CPU because it is one of the related CPUs. */
 	policy = cpufreq_cpu_get(cpu);
-	if (unlikely(policy)) {
+	if (policy) {
+		cpufreq_change_policy_cpus(policy, cpu, true);
 		cpufreq_cpu_put(policy);
 		return 0;
 	}
@@ -1121,45 +1104,14 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	if (!down_read_trylock(&cpufreq_rwsem))
 		return 0;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* Check if this cpu was hot-unplugged earlier and has siblings */
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
-		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
-			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
-			up_read(&cpufreq_rwsem);
-			return ret;
-		}
-	}
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-#endif
-
-	/*
-	 * Restore the saved policy when doing light-weight init and fall back
-	 * to the full init if that fails.
-	 */
-	policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
-	if (!policy) {
-		recover_policy = false;
-		policy = cpufreq_policy_alloc();
-		if (!policy)
-			goto nomem_out;
-	}
-
-	/*
-	 * In the resume path, since we restore a saved policy, the assignment
-	 * to policy->cpu is like an update of the existing policy, rather than
-	 * the creation of a brand new one. So we need to perform this update
-	 * by invoking update_policy_cpu().
-	 */
-	if (recover_policy && cpu != policy->cpu)
-		update_policy_cpu(policy, cpu);
-	else
-		policy->cpu = cpu;
+	/* If we get this far, this is the first time we are adding the
+	 * policy */
+	policy = cpufreq_policy_alloc();
+	if (!policy)
+		goto nomem_out;
+	policy->cpu = cpu;
 
 	cpumask_copy(policy->cpus, cpumask_of(cpu));
-
 	init_completion(&policy->kobj_unregister);
 	INIT_WORK(&policy->update, handle_update);
 
@@ -1169,26 +1121,25 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	ret = cpufreq_driver->init(policy);
 	if (ret) {
 		pr_debug("initialization failed\n");
-		goto err_set_policy_cpu;
+		goto err_init;
 	}
 
 	/* related cpus should atleast have policy->cpus */
 	cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
 
+	/* Weed out impossible CPUs. */
+	cpumask_and(policy->related_cpus, policy->related_cpus,
+			cpu_possible_mask);
+
 	/*
 	 * affected cpus must always be the one, which are online. We aren't
 	 * managing offline cpus here.
 	 */
 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
 
-	if (!recover_policy) {
-		policy->user_policy.min = policy->min;
-		policy->user_policy.max = policy->max;
-	}
-
 	down_write(&policy->rwsem);
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
-	for_each_cpu(j, policy->cpus)
+	for_each_cpu(j, policy->related_cpus)
 		per_cpu(cpufreq_cpu_data, j) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
@@ -1243,13 +1194,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				     CPUFREQ_START, policy);
 
-	if (!recover_policy) {
-		ret = cpufreq_add_dev_interface(policy, dev);
-		if (ret)
-			goto err_out_unregister;
-		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-				CPUFREQ_CREATE_POLICY, policy);
-	}
+	ret = cpufreq_add_dev_interface(policy);
+	if (ret)
+		goto err_out_unregister;
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_CREATE_POLICY, policy);
 
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 	list_add(&policy->policy_list, &cpufreq_policy_list);
@@ -1257,10 +1206,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	cpufreq_init_policy(policy);
 
-	if (!recover_policy) {
-		policy->user_policy.policy = policy->policy;
-		policy->user_policy.governor = policy->governor;
-	}
 	up_write(&policy->rwsem);
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
@@ -1273,20 +1218,14 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 err_out_unregister:
 err_get_freq:
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
-	for_each_cpu(j, policy->cpus)
+	for_each_cpu(j, policy->related_cpus)
 		per_cpu(cpufreq_cpu_data, j) = NULL;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
+	up_write(&policy->rwsem);
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
-err_set_policy_cpu:
-	if (recover_policy) {
-		/* Do not leave stale fallback data behind. */
-		per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL;
-		cpufreq_policy_put_kobj(policy);
-	}
+err_init:
 	cpufreq_policy_free(policy);
-
 nomem_out:
 	up_read(&cpufreq_rwsem);
 
@@ -1307,100 +1246,16 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	return __cpufreq_add_dev(dev, sif);
 }
 
-static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
-					   unsigned int old_cpu)
-{
-	struct device *cpu_dev;
-	int ret;
-
-	/* first sibling now owns the new sysfs dir */
-	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
-
-	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
-	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
-	if (ret) {
-		pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
-
-		down_write(&policy->rwsem);
-		cpumask_set_cpu(old_cpu, policy->cpus);
-		up_write(&policy->rwsem);
-
-		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
-					"cpufreq");
-
-		return -EINVAL;
-	}
-
-	return cpu_dev->id;
-}
-
-static int __cpufreq_remove_dev_prepare(struct device *dev,
-					struct subsys_interface *sif)
+static int __cpufreq_remove_dev(struct device *dev,
+				struct subsys_interface *sif)
 {
-	unsigned int cpu = dev->id, cpus;
-	int new_cpu, ret;
+	unsigned int cpu = dev->id, j;
+	int ret = 0;
 	unsigned long flags;
 	struct cpufreq_policy *policy;
 
 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
-	write_lock_irqsave(&cpufreq_driver_lock, flags);
-
-	policy = per_cpu(cpufreq_cpu_data, cpu);
-
-	/* Save the policy somewhere when doing a light-weight tear-down */
-	if (cpufreq_suspended)
-		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
-
-	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-	if (!policy) {
-		pr_debug("%s: No cpu_data found\n", __func__);
-		return -EINVAL;
-	}
-
-	if (has_target()) {
-		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
-		if (ret) {
-			pr_err("%s: Failed to stop governor\n", __func__);
-			return ret;
-		}
-	}
-
-	if (!cpufreq_driver->setpolicy)
-		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
-			policy->governor->name, CPUFREQ_NAME_LEN);
-
-	down_read(&policy->rwsem);
-	cpus = cpumask_weight(policy->cpus);
-	up_read(&policy->rwsem);
-
-	if (cpu != policy->cpu) {
-		sysfs_remove_link(&dev->kobj, "cpufreq");
-	} else if (cpus > 1) {
-		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
-		if (new_cpu >= 0) {
-			update_policy_cpu(policy, new_cpu);
-
-			if (!cpufreq_suspended)
-				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
-					 __func__, new_cpu, cpu);
-		}
-	} else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
-		cpufreq_driver->stop_cpu(policy);
-	}
-
-	return 0;
-}
-
-static int __cpufreq_remove_dev_finish(struct device *dev,
-				       struct subsys_interface *sif)
-{
-	unsigned int cpu = dev->id, cpus;
-	int ret;
-	unsigned long flags;
-	struct cpufreq_policy *policy;
-
 	read_lock_irqsave(&cpufreq_driver_lock, flags);
 	policy = per_cpu(cpufreq_cpu_data, cpu);
 	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
@@ -1410,56 +1265,45 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 		return -EINVAL;
 	}
 
-	down_write(&policy->rwsem);
-	cpus = cpumask_weight(policy->cpus);
-
-	if (cpus > 1)
-		cpumask_clear_cpu(cpu, policy->cpus);
-	up_write(&policy->rwsem);
-
-	/* If cpu is last user of policy, free policy */
-	if (cpus == 1) {
-		if (has_target()) {
-			ret = __cpufreq_governor(policy,
-					CPUFREQ_GOV_POLICY_EXIT);
-			if (ret) {
-				pr_err("%s: Failed to exit governor\n",
-				       __func__);
-				return ret;
-			}
-		}
-
-		if (!cpufreq_suspended)
-			cpufreq_policy_put_kobj(policy);
+#ifdef CONFIG_HOTPLUG_CPU
+	ret = cpufreq_change_policy_cpus(policy, cpu, false);
+#endif
+	if (ret)
+		return ret;
 
-		/*
-		 * Perform the ->exit() even during light-weight tear-down,
-		 * since this is a core component, and is essential for the
-		 * subsequent light-weight ->init() to succeed.
-		 */
-		if (cpufreq_driver->exit)
-			cpufreq_driver->exit(policy);
+	if (!sif)
+		return 0;
 
-		/* Remove policy from list of active policies */
-		write_lock_irqsave(&cpufreq_driver_lock, flags);
-		list_del(&policy->policy_list);
-		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	if (!cpumask_empty(policy->cpus)) {
+		return 0;
+	}
 
-		if (!cpufreq_suspended)
-			cpufreq_policy_free(policy);
-	} else if (has_target()) {
-		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
-		if (!ret)
-			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+	cpufreq_dev_symlink(policy, false);
 
+	if (has_target()) {
+		ret = __cpufreq_governor(policy,
+				CPUFREQ_GOV_POLICY_EXIT);
 		if (ret) {
-			pr_err("%s: Failed to start governor\n", __func__);
+			pr_err("%s: Failed to exit governor\n",
+			       __func__);
 			return ret;
 		}
 	}
 
-	per_cpu(cpufreq_cpu_data, cpu) = NULL;
-	return 0;
+	cpufreq_policy_put_kobj(policy);
+	if (cpufreq_driver->exit)
+		cpufreq_driver->exit(policy);
+
+	/* Remove policy from list of active policies */
+	write_lock_irqsave(&cpufreq_driver_lock, flags);
+	for_each_cpu(j, policy->related_cpus)
+		per_cpu(cpufreq_cpu_data, j) = NULL;
+	list_del(&policy->policy_list);
+	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+	cpufreq_policy_free(policy);
+
+	return ret;
 }
 
 /**
@@ -1469,18 +1313,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
  */
 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
-	unsigned int cpu = dev->id;
-	int ret;
-
-	if (cpu_is_offline(cpu))
-		return 0;
-
-	ret = __cpufreq_remove_dev_prepare(dev, sif);
-
-	if (!ret)
-		ret = __cpufreq_remove_dev_finish(dev, sif);
-
-	return ret;
+	return __cpufreq_remove_dev(dev, sif);
 }
 
 static void handle_update(struct work_struct *work)
@@ -2295,19 +2128,12 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 	if (dev) {
 		switch (action & ~CPU_TASKS_FROZEN) {
 		case CPU_ONLINE:
+		case CPU_DOWN_FAILED:
 			__cpufreq_add_dev(dev, NULL);
 			break;
 
 		case CPU_DOWN_PREPARE:
-			__cpufreq_remove_dev_prepare(dev, NULL);
-			break;
-
-		case CPU_POST_DEAD:
-			__cpufreq_remove_dev_finish(dev, NULL);
-			break;
-
-		case CPU_DOWN_FAILED:
-			__cpufreq_add_dev(dev, NULL);
+			__cpufreq_remove_dev(dev, NULL);
 			break;
 		}
 	}
-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v3 2/2] cpufreq: Simplify and fix mutual exclusion with hotplug
  2014-07-15 22:47   ` [PATCH v3 0/2] Simplify hotplug/suspend handling Saravana Kannan
  2014-07-15 22:47     ` [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
@ 2014-07-15 22:47     ` Saravana Kannan
  2014-07-16  8:48       ` Viresh Kumar
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
  2 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-15 22:47 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

Since we no longer alloc and destroy/freeze policy and sysfs nodes during
hotplug and suspend, we don't need to lock sysfs with hotplug. We can
achieve the same effect by checking if policy->cpus is empty.

Hotplug mutual exclusion was only done for sysfs writes. But reads need the
same protection too.  So, this patch adds that too.

Also, cpufreq driver (un)register can race with hotplug since CPU online
state can change between adding/removing the currently online devices and
registering/unregistering for hotplug notifiers. So, fix that by
registering for hotplug notifiers first before adding devices and
unregistering from hotplug notifiers first before removing devices.

Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---
 drivers/cpufreq/cpufreq.c | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a0a2ec2..f72b2b7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -748,17 +748,18 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct cpufreq_policy *policy = to_policy(kobj);
 	struct freq_attr *fattr = to_attr(attr);
-	ssize_t ret;
+	ssize_t ret = -EINVAL;
 
 	if (!down_read_trylock(&cpufreq_rwsem))
-		return -EINVAL;
-
+		return ret;
 	down_read(&policy->rwsem);
 
-	if (fattr->show)
-		ret = fattr->show(policy, buf);
-	else
-		ret = -EIO;
+	if (!cpumask_empty(policy->cpus)) {
+		if (fattr->show)
+			ret = fattr->show(policy, buf);
+		else
+			ret = -EIO;
+	}
 
 	up_read(&policy->rwsem);
 	up_read(&cpufreq_rwsem);
@@ -773,26 +774,19 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	struct freq_attr *fattr = to_attr(attr);
 	ssize_t ret = -EINVAL;
 
-	get_online_cpus();
-
-	if (!cpu_online(policy->cpu))
-		goto unlock;
-
 	if (!down_read_trylock(&cpufreq_rwsem))
-		goto unlock;
-
+		return ret;
 	down_write(&policy->rwsem);
 
-	if (fattr->store)
-		ret = fattr->store(policy, buf, count);
-	else
-		ret = -EIO;
+	if (!cpumask_empty(policy->cpus)) {
+		if (fattr->store)
+			ret = fattr->store(policy, buf, count);
+		else
+			ret = -EIO;
+	}
 
 	up_write(&policy->rwsem);
-
 	up_read(&cpufreq_rwsem);
-unlock:
-	put_online_cpus();
 
 	return ret;
 }
@@ -2270,6 +2264,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 		}
 	}
 
+	register_hotcpu_notifier(&cpufreq_cpu_notifier);
+
 	ret = subsys_interface_register(&cpufreq_interface);
 	if (ret)
 		goto err_boost_unreg;
@@ -2293,13 +2289,13 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 		}
 	}
 
-	register_hotcpu_notifier(&cpufreq_cpu_notifier);
 	pr_debug("driver %s up and running\n", driver_data->name);
 
 	return 0;
 err_if_unreg:
 	subsys_interface_unregister(&cpufreq_interface);
 err_boost_unreg:
+	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
 	if (cpufreq_boost_supported())
 		cpufreq_sysfs_remove_file(&boost.attr);
 err_null_driver:
@@ -2327,12 +2323,12 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 
 	pr_debug("unregistering driver %s\n", driver->name);
 
+	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
+
 	subsys_interface_unregister(&cpufreq_interface);
 	if (cpufreq_boost_supported())
 		cpufreq_sysfs_remove_file(&boost.attr);
 
-	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
-
 	down_write(&cpufreq_rwsem);
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 
-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15 22:47     ` [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
@ 2014-07-16  0:28       ` Saravana Kannan
  2014-07-16  8:30         ` Viresh Kumar
  2014-07-16  8:24       ` Viresh Kumar
  2014-07-16 14:29       ` Dirk Brandewie
  2 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-16  0:28 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat,
	linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

One preemptive comment.

On 07/15/2014 03:47 PM, Saravana Kannan wrote:
> The CPUfreq core moves the cpufreq policy ownership between CPUs when CPUs
> within a cluster (CPUs sharing same policy) go ONLINE/OFFLINE. When moving
> policy ownership between CPUs, it also moves the cpufreq sysfs directory
> between CPUs and also fixes up the symlinks of the other CPUs in the
> cluster.
>
> Also, when all the CPUs in a cluster go OFFLINE, all the sysfs nodes and
> directories are deleted, the kobject is released and the policy is freed.
> And when the first CPU in a cluster comes up, the policy is reallocated and
> initialized, kobject is acquired, the sysfs nodes are created or symlinked,
> etc.
>
> All these steps end up creating unnecessarily complicated code and locking.
> There's no real benefit to adding/removing/moving the sysfs nodes and the
> policy between CPUs. Other per CPU sysfs directories like power and cpuidle
> are left alone during hotplug. So there's some precedence to what this
> patch is trying to do.
>
> This patch simplifies a lot of the code and locking by removing the
> adding/removing/moving of policy/sysfs/kobj and just leaves the cpufreq
> directory and policy in place irrespective of whether the CPUs are
> ONLINE/OFFLINE.
>
> Leaving the policy, sysfs and kobject in place also brings these additional
> benefits:
> * Faster suspend/resume
> * Faster hotplug
> * Sysfs file permissions maintained across hotplug
> * Policy settings and governor tunables maintained across hotplug
> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>    queried even after CPU goes OFFLINE
>
> Tested-by: Stephen Boyd <sboyd@codeaurora.org>
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>   drivers/cpufreq/cpufreq.c | 388 +++++++++++++---------------------------------
>   1 file changed, 107 insertions(+), 281 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 62259d2..a0a2ec2 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c

<SNIP>

> @@ -961,60 +967,58 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
>   }
>
>   #ifdef CONFIG_HOTPLUG_CPU
> -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
> -				  unsigned int cpu, struct device *dev)
> +static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
> +				  unsigned int cpu, bool add)
>   {
>   	int ret = 0;
> -	unsigned long flags;
> +	unsigned int cpus, pcpu;
>
> -	if (has_target()) {
> +	down_write(&policy->rwsem);
> +
> +	cpus = !cpumask_empty(policy->cpus);
> +	if (has_target() && cpus) {
>   		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>   		if (ret) {
>   			pr_err("%s: Failed to stop governor\n", __func__);
> -			return ret;
> +			goto unlock;
>   		}
>   	}
>

<SNIP>

> +	if (add)
> +		cpumask_set_cpu(cpu, policy->cpus);
> +	else
> +		cpumask_clear_cpu(cpu, policy->cpus);
>
> -	up_write(&policy->rwsem);
> +	pcpu = cpumask_first(policy->cpus);
> +	if (pcpu < nr_cpu_ids && policy->cpu != pcpu) {
> +		policy->last_cpu = policy->cpu;
> +		policy->cpu = pcpu;
> +		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> +					CPUFREQ_UPDATE_POLICY_CPU, policy);
> +	}
>
> -	if (has_target()) {
> +	cpus = !cpumask_empty(policy->cpus);
> +	if (has_target() && cpus) {
>   		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
>   		if (!ret)
>   			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
>
>   		if (ret) {
>   			pr_err("%s: Failed to start governor\n", __func__);
> -			return ret;
> +			goto unlock;
>   		}
>   	}
>

<SNIP>

> +	if (!cpus && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
> +		cpufreq_driver->stop_cpu(policy);
> +	}
>

Viresh, I tried your suggestion (and my initial thought too) to combine 
this as an if/else with the previous if. But the indentation got nasty 
and made it hard to read. I'm sure the compiler will optimize it. So, I 
would prefer to leave it this way.


> -	policy->governor = NULL;
> +unlock:
> +	up_write(&policy->rwsem);
>
> -	return policy;
> +	return ret;
>   }
> +#endif
>

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15  6:58                   ` Srivatsa S. Bhat
  2014-07-15 17:35                     ` skannan
@ 2014-07-16  5:44                     ` Viresh Kumar
  2014-07-16  7:49                       ` Srivatsa S. Bhat
  1 sibling, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-16  5:44 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: Saravana Kannan, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 15 July 2014 12:28, Srivatsa S. Bhat <srivatsa@mit.edu> wrote:
> Wait, allowing an offline CPU to be the policy->cpu (i.e., the CPU which is
> considered as the master of the policy/group) is just absurd.

Yeah, that was as Absurd as I am :)

> The goal of this patchset should be to just de-couple the sysfs files/ownership
> from the policy->cpu to an extent where it doesn't matter who owns those
> files, and probably make it easier to do CPU hotplug without having to
> destroy and recreate the files on every hotplug operation.

I went to that Absurd idea because we thought we can skip playing with
the sysfs nodes on suspend/hotplug.

And if policy->cpu keeps changing with hotplug, we *may* have to keep
sysfs stuff moving as well. One way to avoid that is by using something
like: policy->sysfs_cpu, but wasn't sure if that's the right path to follow.

Lets see what Saravana's new patchset has for us :)

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15 17:35                     ` skannan
@ 2014-07-16  7:44                       ` Srivatsa S. Bhat
  0 siblings, 0 replies; 76+ messages in thread
From: Srivatsa S. Bhat @ 2014-07-16  7:44 UTC (permalink / raw)
  To: skannan
  Cc: Viresh Kumar, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/15/2014 11:05 PM, skannan@codeaurora.org wrote:
> 
> Srivatsa S. Bhat wrote:
>> On 07/15/2014 11:06 AM, Saravana Kannan wrote:
>>> On 07/14/2014 09:35 PM, Viresh Kumar wrote:
>>>> On 15 July 2014 00:38, Saravana Kannan <skannan@codeaurora.org> wrote:
>>>>> Yeah, it definitely crashes if policy->cpu if an offline cpu. Because
>>>>> the
>>>>> mutex would be uninitialized if it's stopped after boot or it would
>>>>> never
>>>>> have been initialized (depending on how you fix policy->cpu at boot).
>>>>>
>>>>> Look at this snippet on the actual tree and it should be pretty
>>>>> evident.
>>>>
>>>> Yeah, I missed it. So the problem is we initialize timer_mutex's for
>>>> policy->cpus. So we need to do that just for policy->cpu and also we
>>>> don't
>>>> need a per-cpu timer_mutex anymore.
>>>>
>>>
>>> Btw, I tried to take a stab at removing any assumption in cpufreq code
>>> about policy->cpu being ONLINE.
>>
>> Wait, allowing an offline CPU to be the policy->cpu (i.e., the CPU which
>> is
>> considered as the master of the policy/group) is just absurd. If there is
>> no leader, there is no army. We should NOT sacrifice sane semantics for
>> the
>> sake of simplifying the code.
>>
>>> There are 160 instances of those of with
>>> 23 are in cpufreq.c
>>>
>>
>> And that explains why. It is just *natural* to assume that the CPUs
>> governed
>> by a policy are online. Especially so for the CPU which is supposed to be
>> the policy leader. Let us please not change that - it will become
>> counter-intuitive if we do so. [ The other reason is that physical hotplug
>> is also possible on some systems... in that case your code might make a
>> CPU
>> which is not even present (but possible) as the policy->cpu.. and great
>> 'fun'
>> will ensue after that ;-( ]
>>
>> The goal of this patchset should be to just de-couple the sysfs
>> files/ownership
>> from the policy->cpu to an extent where it doesn't matter who owns those
>> files, and probably make it easier to do CPU hotplug without having to
>> destroy and recreate the files on every hotplug operation.
>>
>> This is exactly why the _implementation_ matters in this particular case -
>> if we can't achieve the simplification by keeping sane semantics, then we
>> shouldn't do the simplification!
>>
>> That said, I think we should keep trying - we haven't exhausted all ideas
>> yet :-)
>>
> 
> I don't think we disagree. To summarize this topic: I tried to keep the
> policy->cpu an actual online CPU so as to not break existing semantics in
> this patch. Viresh asked "why not fix it at boot?". My response was to
> keep it an online CPU and give it a shot in a separate patch if we really
> want that. It's too risky to do that in this patch and also not a
> mandatory change for this patch.
> 
> I think we can work out the details on the need to fixing policy->cpu at
> boot and whether there's even a need for policy->cpu (when we already have
> policy->cpus) in a separate thread after the dust settles on this one?
>

Sure, that sounds good!

Regards,
Srivatsa S. Bhat


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16  5:44                     ` Viresh Kumar
@ 2014-07-16  7:49                       ` Srivatsa S. Bhat
  0 siblings, 0 replies; 76+ messages in thread
From: Srivatsa S. Bhat @ 2014-07-16  7:49 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Saravana Kannan, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 11:14 AM, Viresh Kumar wrote:
> On 15 July 2014 12:28, Srivatsa S. Bhat <srivatsa@mit.edu> wrote:
>> Wait, allowing an offline CPU to be the policy->cpu (i.e., the CPU which is
>> considered as the master of the policy/group) is just absurd.
> 
> Yeah, that was as Absurd as I am :)
> 

I have had my own share of silly ideas over the years; so don't worry, we are
all in the same boat ;-)

>> The goal of this patchset should be to just de-couple the sysfs files/ownership
>> from the policy->cpu to an extent where it doesn't matter who owns those
>> files, and probably make it easier to do CPU hotplug without having to
>> destroy and recreate the files on every hotplug operation.
> 
> I went to that Absurd idea because we thought we can skip playing with
> the sysfs nodes on suspend/hotplug.
> 
> And if policy->cpu keeps changing with hotplug, we *may* have to keep
> sysfs stuff moving as well. One way to avoid that is by using something
> like: policy->sysfs_cpu, but wasn't sure if that's the right path to follow.
>

Hmm, I understand.. Even I don't have any suggestions as of now, since I
haven't spent enough time thinking of alternatives yet.

> Lets see what Saravana's new patchset has for us :)
> 

Yep :-)

Regards,
Srivatsa S. Bhat

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15 22:47     ` [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
  2014-07-16  0:28       ` Saravana Kannan
@ 2014-07-16  8:24       ` Viresh Kumar
  2014-07-16 11:16         ` Srivatsa S. Bhat
  2014-07-16 20:25         ` Saravana Kannan
  2014-07-16 14:29       ` Dirk Brandewie
  2 siblings, 2 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-07-16  8:24 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 16 July 2014 04:17, Saravana Kannan <skannan@codeaurora.org> wrote:
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c

> +/* symlink related CPUs */
> +static int cpufreq_dev_symlink(struct cpufreq_policy *policy, bool add)
>  {
> -       unsigned int j;
> +       unsigned int j, first_cpu = cpumask_first(policy->related_cpus);

The CPU which came first should get the ownership by default, instead
of the first one in the mask.

Normally at boot, all CPUs come up first and then only cpufreq init starts.
But in case all other CPUs fail to come up, then policy->cpu *might* point
to a failed cpu.

And so, we should simply use policy->cpu here instead of finding the
first one in the mask.

Also, its not the duty of this routine to find which one is the policy cpu as
that is done by __cpufreq_add_dev(). And so in case we need to make
first cpu of a mask as policy->cpu, it should be done in __cpufreq_add_dev()
and not here. This one should just follow the orders :)

@Srivatsa: What happens to the sysfs directory if a CPU fails to come up?
Is it exactly similar to how it happens in hotplug? i.e. we do have a directory
there?

>         int ret = 0;
>
> -       for_each_cpu(j, policy->cpus) {
> +       for_each_cpu(j, policy->related_cpus) {
>                 struct device *cpu_dev;
>
> -               if (j == policy->cpu)
> +               if (j == first_cpu)
>                         continue;
>
> -               pr_debug("Adding link for CPU: %u\n", j);

Keep this please, it might be useful while debugging.

>                 cpu_dev = get_cpu_device(j);
> -               ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
> -                                       "cpufreq");
> +               if (add)
> +                       ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
> +                                               "cpufreq");
> +               else
> +                       sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
> +
>                 if (ret)
>                         break;
>         }
>         return ret;
>  }
>
> -static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
> -                                    struct device *dev)
> +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
>  {
>         struct freq_attr **drv_attr;
> +       struct device *dev;
>         int ret = 0;
>
> +       dev = get_cpu_device(cpumask_first(policy->related_cpus));
> +       if (!dev)
> +               return -EINVAL;

Again, deciding which cpu is policy->cpu here is wrong. Just follow
orders of __cpufreq_add_dev().

>         /* prepare interface data */
>         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
>                                    &dev->kobj, "cpufreq");
> @@ -917,7 +923,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
>                         goto err_out_kobj_put;
>         }
>
> -       ret = cpufreq_add_dev_symlink(policy);
> +       ret = cpufreq_dev_symlink(policy, true);
>         if (ret)
>                 goto err_out_kobj_put;
>
> @@ -961,60 +967,58 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
>  }
>
>  #ifdef CONFIG_HOTPLUG_CPU

@Srivatsa: I will try this but you also take care of this. These
ifdefs might go wrong,
i.e. we are surely using it in the current patch without HOTPLUG as well. See
cpufreq_add_dev()..

Also, how does suspend/resume work without CONFIG_HOTPLUG_CPU ?
What's the sequence of events?

> -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
> -                                 unsigned int cpu, struct device *dev)
> +static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
> +                                 unsigned int cpu, bool add)
>  {
>         int ret = 0;
> -       unsigned long flags;
> +       unsigned int cpus, pcpu;
>
> -       if (has_target()) {
> +       down_write(&policy->rwsem);
> +
> +       cpus = !cpumask_empty(policy->cpus);

We aren't using cpus at multiple places and so probably it would
be better to using cpumask_empty() directly.

> +       if (has_target() && cpus) {

I may get the answer later in reviews, but when will cpus be 0 here?
Probably for non-boot cluster during suspend/resume, or forceful
hotplugging off all CPUs of a cluster. Right?

>                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>                 if (ret) {
>                         pr_err("%s: Failed to stop governor\n", __func__);
> -                       return ret;
> +                       goto unlock;
>                 }
>         }
>
> -       down_write(&policy->rwsem);
> -
> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -       cpumask_set_cpu(cpu, policy->cpus);
> -       per_cpu(cpufreq_cpu_data, cpu) = policy;
> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +       if (add)
> +               cpumask_set_cpu(cpu, policy->cpus);
> +       else
> +               cpumask_clear_cpu(cpu, policy->cpus);
>
> -       up_write(&policy->rwsem);
> +       pcpu = cpumask_first(policy->cpus);
> +       if (pcpu < nr_cpu_ids && policy->cpu != pcpu) {

No, we don't have to consider changing policy->cpu for every change
in policy->cpus. We need to do that only when policy->cpu goes down.

Also pcpu can't be < nr_cpu_ids, right?

> +               policy->last_cpu = policy->cpu;
> +               policy->cpu = pcpu;
> +               blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> +                                       CPUFREQ_UPDATE_POLICY_CPU, policy);
> +       }
>
> -       if (has_target()) {
> +       cpus = !cpumask_empty(policy->cpus);
> +       if (has_target() && cpus) {
>                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
>                 if (!ret)
>                         ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
>
>                 if (ret) {
>                         pr_err("%s: Failed to start governor\n", __func__);
> -                       return ret;
> +                       goto unlock;
>                 }
>         }
>
> -       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
> -}
> -#endif
> -
> -static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
> -{
> -       struct cpufreq_policy *policy;
> -       unsigned long flags;
> -
> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -       policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
> -
> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +       if (!cpus && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {

As I commented on V1, please make it else part of above if..

> +               cpufreq_driver->stop_cpu(policy);
> +       }
>
> -       policy->governor = NULL;
> +unlock:
> +       up_write(&policy->rwsem);
>
> -       return policy;
> +       return ret;
>  }
> +#endif
>
>  static struct cpufreq_policy *cpufreq_policy_alloc(void)
>  {
> @@ -1053,10 +1057,8 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
>         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>                         CPUFREQ_REMOVE_POLICY, policy);
>
> -       down_read(&policy->rwsem);
>         kobj = &policy->kobj;
>         cmp = &policy->kobj_unregister;
> -       up_read(&policy->rwsem);

Why? And also, these are unrelated changes and must be added as separate
commits.

>         kobject_put(kobj);
>
>         /*
> @@ -1076,32 +1078,12 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
>         kfree(policy);
>  }
>
> -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
> -{
> -       if (WARN_ON(cpu == policy->cpu))
> -               return;
> -
> -       down_write(&policy->rwsem);
> -
> -       policy->last_cpu = policy->cpu;
> -       policy->cpu = cpu;
> -
> -       up_write(&policy->rwsem);
> -
> -       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> -                       CPUFREQ_UPDATE_POLICY_CPU, policy);
> -}
> -
>  static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  {
>         unsigned int j, cpu = dev->id;
>         int ret = -ENOMEM;
>         struct cpufreq_policy *policy;
>         unsigned long flags;
> -       bool recover_policy = cpufreq_suspended;
> -#ifdef CONFIG_HOTPLUG_CPU
> -       struct cpufreq_policy *tpolicy;
> -#endif
>
>         if (cpu_is_offline(cpu))
>                 return 0;
> @@ -1110,9 +1092,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>
>  #ifdef CONFIG_SMP
>         /* check whether a different CPU already registered this
> -        * CPU because it is in the same boat. */
> +        * CPU because it is one of the related CPUs. */
>         policy = cpufreq_cpu_get(cpu);
> -       if (unlikely(policy)) {
> +       if (policy) {
> +               cpufreq_change_policy_cpus(policy, cpu, true);

This is just a waste of time at boot as ... (see below)

>                 cpufreq_cpu_put(policy);
>                 return 0;
>         }
> @@ -1121,45 +1104,14 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         if (!down_read_trylock(&cpufreq_rwsem))
>                 return 0;
>
> -#ifdef CONFIG_HOTPLUG_CPU
> -       /* Check if this cpu was hot-unplugged earlier and has siblings */
> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
> -       list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
> -               if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
> -                       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -                       ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
> -                       up_read(&cpufreq_rwsem);
> -                       return ret;
> -               }
> -       }
> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -#endif
> -
> -       /*
> -        * Restore the saved policy when doing light-weight init and fall back
> -        * to the full init if that fails.
> -        */
> -       policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
> -       if (!policy) {
> -               recover_policy = false;
> -               policy = cpufreq_policy_alloc();
> -               if (!policy)
> -                       goto nomem_out;
> -       }
> -
> -       /*
> -        * In the resume path, since we restore a saved policy, the assignment
> -        * to policy->cpu is like an update of the existing policy, rather than
> -        * the creation of a brand new one. So we need to perform this update
> -        * by invoking update_policy_cpu().
> -        */
> -       if (recover_policy && cpu != policy->cpu)
> -               update_policy_cpu(policy, cpu);
> -       else
> -               policy->cpu = cpu;
> +       /* If we get this far, this is the first time we are adding the
> +        * policy */
> +       policy = cpufreq_policy_alloc();
> +       if (!policy)
> +               goto nomem_out;
> +       policy->cpu = cpu;
>
>         cpumask_copy(policy->cpus, cpumask_of(cpu));
> -
>         init_completion(&policy->kobj_unregister);
>         INIT_WORK(&policy->update, handle_update);
>
> @@ -1169,26 +1121,25 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         ret = cpufreq_driver->init(policy);
>         if (ret) {
>                 pr_debug("initialization failed\n");
> -               goto err_set_policy_cpu;
> +               goto err_init;
>         }
>
>         /* related cpus should atleast have policy->cpus */
>         cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);

policy->cpus is already updated here.

> +       /* Weed out impossible CPUs. */
> +       cpumask_and(policy->related_cpus, policy->related_cpus,
> +                       cpu_possible_mask);

This has to be in a separate commit..

>         /*
>          * affected cpus must always be the one, which are online. We aren't
>          * managing offline cpus here.
>          */
>         cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
>
> -       if (!recover_policy) {
> -               policy->user_policy.min = policy->min;
> -               policy->user_policy.max = policy->max;
> -       }
> -

Where did these go? There weren't there for fun.

>         down_write(&policy->rwsem);
>         write_lock_irqsave(&cpufreq_driver_lock, flags);
> -       for_each_cpu(j, policy->cpus)
> +       for_each_cpu(j, policy->related_cpus)
>                 per_cpu(cpufreq_cpu_data, j) = policy;
>         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>
> @@ -1243,13 +1194,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>                                      CPUFREQ_START, policy);
>
> -       if (!recover_policy) {
> -               ret = cpufreq_add_dev_interface(policy, dev);
> -               if (ret)
> -                       goto err_out_unregister;
> -               blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> -                               CPUFREQ_CREATE_POLICY, policy);
> -       }
> +       ret = cpufreq_add_dev_interface(policy);
> +       if (ret)
> +               goto err_out_unregister;
> +       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> +                       CPUFREQ_CREATE_POLICY, policy);
>
>         write_lock_irqsave(&cpufreq_driver_lock, flags);
>         list_add(&policy->policy_list, &cpufreq_policy_list);
> @@ -1257,10 +1206,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>
>         cpufreq_init_policy(policy);
>
> -       if (!recover_policy) {
> -               policy->user_policy.policy = policy->policy;
> -               policy->user_policy.governor = policy->governor;
> -       }

Same here.

>         up_write(&policy->rwsem);
>
>         kobject_uevent(&policy->kobj, KOBJ_ADD);
> @@ -1273,20 +1218,14 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  err_out_unregister:
>  err_get_freq:
>         write_lock_irqsave(&cpufreq_driver_lock, flags);
> -       for_each_cpu(j, policy->cpus)
> +       for_each_cpu(j, policy->related_cpus)
>                 per_cpu(cpufreq_cpu_data, j) = NULL;
>         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -
> +       up_write(&policy->rwsem);
>         if (cpufreq_driver->exit)
>                 cpufreq_driver->exit(policy);
> -err_set_policy_cpu:
> -       if (recover_policy) {
> -               /* Do not leave stale fallback data behind. */
> -               per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL;
> -               cpufreq_policy_put_kobj(policy);
> -       }
> +err_init:
>         cpufreq_policy_free(policy);
> -
>  nomem_out:
>         up_read(&cpufreq_rwsem);
>

Just to mention, I am not looking at the validity of error fallback paths
in this version. Just make sure they are all good :)

> @@ -1307,100 +1246,16 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         return __cpufreq_add_dev(dev, sif);
>  }
>
> -static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
> -                                          unsigned int old_cpu)
> -{
> -       struct device *cpu_dev;
> -       int ret;
> -
> -       /* first sibling now owns the new sysfs dir */
> -       cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
> -
> -       sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
> -       ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
> -       if (ret) {
> -               pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
> -
> -               down_write(&policy->rwsem);
> -               cpumask_set_cpu(old_cpu, policy->cpus);
> -               up_write(&policy->rwsem);
> -
> -               ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
> -                                       "cpufreq");
> -
> -               return -EINVAL;
> -       }
> -
> -       return cpu_dev->id;
> -}
> -
> -static int __cpufreq_remove_dev_prepare(struct device *dev,
> -                                       struct subsys_interface *sif)
> +static int __cpufreq_remove_dev(struct device *dev,
> +                               struct subsys_interface *sif)
>  {
> -       unsigned int cpu = dev->id, cpus;
> -       int new_cpu, ret;
> +       unsigned int cpu = dev->id, j;
> +       int ret = 0;
>         unsigned long flags;
>         struct cpufreq_policy *policy;
>
>         pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>
> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -       policy = per_cpu(cpufreq_cpu_data, cpu);
> -
> -       /* Save the policy somewhere when doing a light-weight tear-down */
> -       if (cpufreq_suspended)
> -               per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
> -
> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -
> -       if (!policy) {
> -               pr_debug("%s: No cpu_data found\n", __func__);
> -               return -EINVAL;
> -       }
> -
> -       if (has_target()) {
> -               ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
> -               if (ret) {
> -                       pr_err("%s: Failed to stop governor\n", __func__);
> -                       return ret;
> -               }
> -       }
> -
> -       if (!cpufreq_driver->setpolicy)
> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
> -                       policy->governor->name, CPUFREQ_NAME_LEN);

Where is this gone? There are several instances of code just being
removed, this is the third one. Its really really tough to catch these
in this big of a patch. Believe me.

You have to break this patch into multiple ones, see this on how to
break even simplest of the changes into multiple patches:
https://lkml.org/lkml/2013/9/6/400

Its just impossible to catch bugs that you might have introduced here due
to the size of this patch. And its taking a LOT of time for me to review this.
As I have to keep diff in one tab, new cpufreq.c in one and the old cpufreq.c
in one and then compare..

> -       down_read(&policy->rwsem);
> -       cpus = cpumask_weight(policy->cpus);
> -       up_read(&policy->rwsem);
> -
> -       if (cpu != policy->cpu) {
> -               sysfs_remove_link(&dev->kobj, "cpufreq");
> -       } else if (cpus > 1) {
> -               new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
> -               if (new_cpu >= 0) {
> -                       update_policy_cpu(policy, new_cpu);
> -
> -                       if (!cpufreq_suspended)
> -                               pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
> -                                        __func__, new_cpu, cpu);
> -               }
> -       } else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
> -               cpufreq_driver->stop_cpu(policy);
> -       }
> -
> -       return 0;
> -}
> -
> -static int __cpufreq_remove_dev_finish(struct device *dev,
> -                                      struct subsys_interface *sif)
> -{
> -       unsigned int cpu = dev->id, cpus;
> -       int ret;
> -       unsigned long flags;
> -       struct cpufreq_policy *policy;
> -
>         read_lock_irqsave(&cpufreq_driver_lock, flags);
>         policy = per_cpu(cpufreq_cpu_data, cpu);
>         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> @@ -1410,56 +1265,45 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>                 return -EINVAL;
>         }
>
> -       down_write(&policy->rwsem);
> -       cpus = cpumask_weight(policy->cpus);
> -
> -       if (cpus > 1)
> -               cpumask_clear_cpu(cpu, policy->cpus);
> -       up_write(&policy->rwsem);
> -
> -       /* If cpu is last user of policy, free policy */
> -       if (cpus == 1) {
> -               if (has_target()) {
> -                       ret = __cpufreq_governor(policy,
> -                                       CPUFREQ_GOV_POLICY_EXIT);
> -                       if (ret) {
> -                               pr_err("%s: Failed to exit governor\n",
> -                                      __func__);
> -                               return ret;
> -                       }
> -               }
> -
> -               if (!cpufreq_suspended)
> -                       cpufreq_policy_put_kobj(policy);
> +#ifdef CONFIG_HOTPLUG_CPU
> +       ret = cpufreq_change_policy_cpus(policy, cpu, false);
> +#endif
> +       if (ret)
> +               return ret;

Why is the if block kept outside of #ifdef? And should we really call
change_*() from inside a #ifdef here?

>
> -               /*
> -                * Perform the ->exit() even during light-weight tear-down,
> -                * since this is a core component, and is essential for the
> -                * subsequent light-weight ->init() to succeed.
> -                */
> -               if (cpufreq_driver->exit)
> -                       cpufreq_driver->exit(policy);
> +       if (!sif)
> +               return 0;

Why? I know that, but we should have comments to describe this ...

>
> -               /* Remove policy from list of active policies */
> -               write_lock_irqsave(&cpufreq_driver_lock, flags);
> -               list_del(&policy->policy_list);
> -               write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +       if (!cpumask_empty(policy->cpus)) {
> +               return 0;
> +       }

You might still call this attempt a showcase of idea, but I am reviewing it
at my full capacity. And these small things just break my flow.

- Don't add {} for single liner blocks
- Add comments with proper comment style
- Run checkpatch --strict before sending patches.

>
> -               if (!cpufreq_suspended)
> -                       cpufreq_policy_free(policy);
> -       } else if (has_target()) {
> -               ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
> -               if (!ret)
> -                       ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> +       cpufreq_dev_symlink(policy, false);
>
> +       if (has_target()) {
> +               ret = __cpufreq_governor(policy,
> +                               CPUFREQ_GOV_POLICY_EXIT);

Can come in single line

>                 if (ret) {
> -                       pr_err("%s: Failed to start governor\n", __func__);
> +                       pr_err("%s: Failed to exit governor\n",
> +                              __func__);

This too..

>                         return ret;
>                 }
>         }
>
> -       per_cpu(cpufreq_cpu_data, cpu) = NULL;
> -       return 0;
> +       cpufreq_policy_put_kobj(policy);
> +       if (cpufreq_driver->exit)
> +               cpufreq_driver->exit(policy);
> +
> +       /* Remove policy from list of active policies */
> +       write_lock_irqsave(&cpufreq_driver_lock, flags);
> +       for_each_cpu(j, policy->related_cpus)
> +               per_cpu(cpufreq_cpu_data, j) = NULL;
> +       list_del(&policy->policy_list);
> +       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +
> +       cpufreq_policy_free(policy);
> +
> +       return ret;
>  }
>
>  /**
> @@ -1469,18 +1313,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>   */
>  static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>  {
> -       unsigned int cpu = dev->id;
> -       int ret;
> -
> -       if (cpu_is_offline(cpu))
> -               return 0;

Why is it part of this commit?

> -       ret = __cpufreq_remove_dev_prepare(dev, sif);
> -
> -       if (!ret)
> -               ret = __cpufreq_remove_dev_finish(dev, sif);
> -
> -       return ret;
> +       return __cpufreq_remove_dev(dev, sif);
>  }
>
>  static void handle_update(struct work_struct *work)
> @@ -2295,19 +2128,12 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>         if (dev) {
>                 switch (action & ~CPU_TASKS_FROZEN) {
>                 case CPU_ONLINE:
> +               case CPU_DOWN_FAILED:

For example. This change doesn't have anything to do with this patch
and would have been so easy to review it, if it was kept separate.

Also, this would even require to wait for this complete series to make
sense and can be merged very early.

>                         __cpufreq_add_dev(dev, NULL);
>                         break;
>
>                 case CPU_DOWN_PREPARE:
> -                       __cpufreq_remove_dev_prepare(dev, NULL);
> -                       break;
> -
> -               case CPU_POST_DEAD:
> -                       __cpufreq_remove_dev_finish(dev, NULL);
> -                       break;
> -
> -               case CPU_DOWN_FAILED:
> -                       __cpufreq_add_dev(dev, NULL);
> +                       __cpufreq_remove_dev(dev, NULL);

@Srivatsa: You might want to have a look at this, remove sequence was
separated for some purpose and I am just not able to concentrate enough
to think of that, just too many cases running in my mind :)

>                         break;
>                 }
>         }

I am still not sure if everything will work as expected as I seriously doubt
my reviewing capabilities. There might be corner cases which I am still
missing.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16  0:28       ` Saravana Kannan
@ 2014-07-16  8:30         ` Viresh Kumar
  2014-07-16 19:19           ` Saravana Kannan
  0 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-16  8:30 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 16 July 2014 05:58, Saravana Kannan <skannan@codeaurora.org> wrote:
>> +       if (!cpus && cpufreq_driver->stop_cpu &&
>> cpufreq_driver->setpolicy) {
>> +               cpufreq_driver->stop_cpu(policy);
>> +       }
>>
>
> Viresh, I tried your suggestion (and my initial thought too) to combine this
> as an if/else with the previous if. But the indentation got nasty and made
> it hard to read. I'm sure the compiler will optimize it. So, I would prefer
> to leave it this way.

Okay, I gave that comment again :)

Try this:

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index f72b2b7..092a0ba 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -991,8 +991,10 @@ static int cpufreq_change_policy_cpus(struct
cpufreq_policy *policy,
                                        CPUFREQ_UPDATE_POLICY_CPU, policy);
        }

-       cpus = !cpumask_empty(policy->cpus);
-       if (has_target() && cpus) {
+       if (!cpumask_empty(policy->cpus)) {
+               if (!has_target())
+                       goto unlock;
+
                ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
                if (!ret)
                        ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
@@ -1001,9 +1003,7 @@ static int cpufreq_change_policy_cpus(struct
cpufreq_policy *policy,
                        pr_err("%s: Failed to start governor\n", __func__);
                        goto unlock;
                }
-       }
-
-       if (!cpus && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
+       } else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
                cpufreq_driver->stop_cpu(policy);
        }

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 2/2] cpufreq: Simplify and fix mutual exclusion with hotplug
  2014-07-15 22:47     ` [PATCH v3 2/2] cpufreq: Simplify and fix mutual exclusion with hotplug Saravana Kannan
@ 2014-07-16  8:48       ` Viresh Kumar
  2014-07-16 19:34         ` Saravana Kannan
  0 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-16  8:48 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 16 July 2014 04:17, Saravana Kannan <skannan@codeaurora.org> wrote:

Again, just too many things in a single patch. That's not acceptable.
Few of these might be bug fixes, which must go in before any other updates.
And so it must have been added as first patch.

Even the other stuff you are trying to fix (by checking policy->cpus) should go
before 1/2, otherwise 1/2 will actually break things inbetween, i.e. show values
even when no CPUs of a cluster are online.

> Since we no longer alloc and destroy/freeze policy and sysfs nodes during
> hotplug and suspend, we don't need to lock sysfs with hotplug. We can
> achieve the same effect by checking if policy->cpus is empty.

Are you talking about the changes in store()?

> Hotplug mutual exclusion was only done for sysfs writes. But reads need the
> same protection too.  So, this patch adds that too.

How? How is checking for policy->cpus enough?

> Also, cpufreq driver (un)register can race with hotplug since CPU online
> state can change between adding/removing the currently online devices and
> registering/unregistering for hotplug notifiers. So, fix that by
> registering for hotplug notifiers first before adding devices and
> unregistering from hotplug notifiers first before removing devices.

Couldn't get it, tell us an example race and what will go wrong due to it.
Also this should have had a separate patch for itself.

> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>  drivers/cpufreq/cpufreq.c | 44 ++++++++++++++++++++------------------------
>  1 file changed, 20 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index a0a2ec2..f72b2b7 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -748,17 +748,18 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
>  {
>         struct cpufreq_policy *policy = to_policy(kobj);
>         struct freq_attr *fattr = to_attr(attr);
> -       ssize_t ret;
> +       ssize_t ret = -EINVAL;
>
>         if (!down_read_trylock(&cpufreq_rwsem))
> -               return -EINVAL;
> -
> +               return ret;
>         down_read(&policy->rwsem);
>
> -       if (fattr->show)
> -               ret = fattr->show(policy, buf);
> -       else
> -               ret = -EIO;
> +       if (!cpumask_empty(policy->cpus)) {
> +               if (fattr->show)
> +                       ret = fattr->show(policy, buf);
> +               else
> +                       ret = -EIO;
> +       }

Makes sense upto this point.

>         up_read(&policy->rwsem);
>         up_read(&cpufreq_rwsem);
> @@ -773,26 +774,19 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
>         struct freq_attr *fattr = to_attr(attr);
>         ssize_t ret = -EINVAL;
>
> -       get_online_cpus();
> -
> -       if (!cpu_online(policy->cpu))
> -               goto unlock;
> -

@Srivatsa: what do you say?

>         if (!down_read_trylock(&cpufreq_rwsem))
> -               goto unlock;
> -
> +               return ret;
>         down_write(&policy->rwsem);
>
> -       if (fattr->store)
> -               ret = fattr->store(policy, buf, count);
> -       else
> -               ret = -EIO;
> +       if (!cpumask_empty(policy->cpus)) {
> +               if (fattr->store)
> +                       ret = fattr->store(policy, buf, count);
> +               else
> +                       ret = -EIO;
> +       }
>
>         up_write(&policy->rwsem);
> -
>         up_read(&cpufreq_rwsem);
> -unlock:
> -       put_online_cpus();
>
>         return ret;
>  }
> @@ -2270,6 +2264,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
>                 }
>         }
>
> +       register_hotcpu_notifier(&cpufreq_cpu_notifier);
> +
>         ret = subsys_interface_register(&cpufreq_interface);
>         if (ret)
>                 goto err_boost_unreg;
> @@ -2293,13 +2289,13 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
>                 }
>         }
>
> -       register_hotcpu_notifier(&cpufreq_cpu_notifier);
>         pr_debug("driver %s up and running\n", driver_data->name);
>
>         return 0;
>  err_if_unreg:
>         subsys_interface_unregister(&cpufreq_interface);
>  err_boost_unreg:
> +       unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
>         if (cpufreq_boost_supported())
>                 cpufreq_sysfs_remove_file(&boost.attr);
>  err_null_driver:
> @@ -2327,12 +2323,12 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
>
>         pr_debug("unregistering driver %s\n", driver->name);
>
> +       unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
> +
>         subsys_interface_unregister(&cpufreq_interface);
>         if (cpufreq_boost_supported())
>                 cpufreq_sysfs_remove_file(&boost.attr);
>
> -       unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
> -
>         down_write(&cpufreq_rwsem);
>         write_lock_irqsave(&cpufreq_driver_lock, flags);

Normally the order of register/unregister should be just opposite.
Isn't that true here? Yeah, it was broken earlier as well...

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16  8:24       ` Viresh Kumar
@ 2014-07-16 11:16         ` Srivatsa S. Bhat
  2014-07-16 13:13           ` Viresh Kumar
  2014-07-16 19:56           ` Saravana Kannan
  2014-07-16 20:25         ` Saravana Kannan
  1 sibling, 2 replies; 76+ messages in thread
From: Srivatsa S. Bhat @ 2014-07-16 11:16 UTC (permalink / raw)
  To: Viresh Kumar, Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 01:54 PM, Viresh Kumar wrote:
> On 16 July 2014 04:17, Saravana Kannan <skannan@codeaurora.org> wrote:
>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> 
>> +/* symlink related CPUs */
>> +static int cpufreq_dev_symlink(struct cpufreq_policy *policy, bool add)
>>  {
>> -       unsigned int j;
>> +       unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
> 
> The CPU which came first should get the ownership by default, instead
> of the first one in the mask.
> 
> Normally at boot, all CPUs come up first and then only cpufreq init starts.
> But in case all other CPUs fail to come up, then policy->cpu *might* point
> to a failed cpu.
> 
> And so, we should simply use policy->cpu here instead of finding the
> first one in the mask.
> 
> Also, its not the duty of this routine to find which one is the policy cpu as
> that is done by __cpufreq_add_dev(). And so in case we need to make
> first cpu of a mask as policy->cpu, it should be done in __cpufreq_add_dev()
> and not here. This one should just follow the orders :)
> 
> @Srivatsa: What happens to the sysfs directory if a CPU fails to come up?
> Is it exactly similar to how it happens in hotplug? i.e. we do have a directory
> there?
> 

Short answer: If the sysfs directory has already been created by cpufreq,
then yes, it will remain as it is. However, if the online operation failed
before that, then cpufreq won't know about that CPU at all, and no file will
be created.

Long answer:
The existing cpufreq code does all its work (including creating the sysfs
directories etc) at the CPU_ONLINE stage. This stage is not expected to fail
(in fact even the core CPU hotplug code in kernel/cpu.c doesn't care for
error returns at this point). So if a CPU fails to come up in earlier stages
itself (such as CPU_UP_PREPARE), then cpufreq won't even hear about that CPU,
and hence no sysfs files will be created/linked. However, if the CPU bringup
operation fails during the CPU_ONLINE stage after the cpufreq's notifier has
been invoked, then we do nothing about it and the cpufreq sysfs files will
remain.

>>         int ret = 0;
>>
>> -       for_each_cpu(j, policy->cpus) {
>> +       for_each_cpu(j, policy->related_cpus) {
>>                 struct device *cpu_dev;
>>

[...]

>> @@ -961,60 +967,58 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
>>  }
>>
>>  #ifdef CONFIG_HOTPLUG_CPU
> 
> @Srivatsa: I will try this but you also take care of this. These
> ifdefs might go wrong,
> i.e. we are surely using it in the current patch without HOTPLUG as well. See
> cpufreq_add_dev()..
> 

Yeah, looks suspicious.

> Also, how does suspend/resume work without CONFIG_HOTPLUG_CPU ?
> What's the sequence of events?
> 

Well, CONFIG_SUSPEND doesn't have an explicit dependency on HOTPLUG_CPU, but
SMP systems usually use CONFIG_PM_SLEEP_SMP, which sets CONFIG_HOTPLUG_CPU.
(I guess the reason why CONFIG_SUSPEND doesn't depend on HOTPLUG_CPU is
because suspend is possible even on uniprocessor systems and hence the
Kconfig dependency wasn't really justified).

>> -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>> -                                 unsigned int cpu, struct device *dev)
>> +static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
>> +                                 unsigned int cpu, bool add)

[...]

>> -
>> -       if (!cpufreq_driver->setpolicy)
>> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
>> -                       policy->governor->name, CPUFREQ_NAME_LEN);
> 
> Where is this gone? There are several instances of code just being
> removed, this is the third one. Its really really tough to catch these
> in this big of a patch. Believe me.
> 
> You have to break this patch into multiple ones, see this on how to
> break even simplest of the changes into multiple patches:
> https://lkml.org/lkml/2013/9/6/400
> 
> Its just impossible to catch bugs that you might have introduced here due
> to the size of this patch. And its taking a LOT of time for me to review this.
> As I have to keep diff in one tab, new cpufreq.c in one and the old cpufreq.c
> in one and then compare..
> 

True, this is still a pretty huge chunk. Saravana, at this stage, don't worry
about making cpufreq work properly in each and every patch. Just ensure that
every patch builds fine; that should be good enough. I hope this will help you
in splitting up the patches further.

One other thing: your changelog contains what we usually write in a cover-
letter - *very* high-level goals of the patch. Ideally, you should explain
the subtle details and the non-obvious decisions or trade-offs that you have
made at various places in the code. Otherwise it becomes very hard to follow
your thought-flow just by looking at the patch. So please split up the patch
further and also make the changelogs useful to review the patch :-)

The link that Viresh gave above also did a lot of code reorganization in
cpufreq, so it should give you a good example of how to proceed.

[...]

>>                         __cpufreq_add_dev(dev, NULL);
>>                         break;
>>
>>                 case CPU_DOWN_PREPARE:
>> -                       __cpufreq_remove_dev_prepare(dev, NULL);
>> -                       break;
>> -
>> -               case CPU_POST_DEAD:
>> -                       __cpufreq_remove_dev_finish(dev, NULL);
>> -                       break;
>> -
>> -               case CPU_DOWN_FAILED:
>> -                       __cpufreq_add_dev(dev, NULL);
>> +                       __cpufreq_remove_dev(dev, NULL);
> 
> @Srivatsa: You might want to have a look at this, remove sequence was
> separated for some purpose and I am just not able to concentrate enough
> to think of that, just too many cases running in my mind :)
> 

Yeah, we had split it into _remove_dev_prepare() and _remove_dev_finish()
to avoid a few potential deadlocks. We wanted to call _remove_dev_prepare()
in the DOWN_PREPARE stage and then call _remove_dev_finish() (which waits
for the kobject refcount to drop) in the POST_DEAD stage. That is, we wanted
to do the kobject cleanup after releasing the hotplug lock, and POST_DEAD stage
was well-suited for that.

Commit 1aee40ac9c8 (cpufreq: Invoke __cpufreq_remove_dev_finish() after
releasing cpu_hotplug.lock) explains this in detail. Saravana, please take a
look at that reasoning and ensure that your patch doesn't re-introduce those
deadlock possibilities!

>>                         break;
>>                 }
>>         }
> 
> I am still not sure if everything will work as expected as I seriously doubt
> my reviewing capabilities. There might be corner cases which I am still
> missing.
> 

Regards,
Srivatsa S. Bhat

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 11:16         ` Srivatsa S. Bhat
@ 2014-07-16 13:13           ` Viresh Kumar
  2014-07-16 18:04             ` Srivatsa S. Bhat
  2014-07-16 19:56             ` Saravana Kannan
  2014-07-16 19:56           ` Saravana Kannan
  1 sibling, 2 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-07-16 13:13 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: Saravana Kannan, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 16 July 2014 16:46, Srivatsa S. Bhat <srivatsa@mit.edu> wrote:
> Short answer: If the sysfs directory has already been created by cpufreq,
> then yes, it will remain as it is. However, if the online operation failed
> before that, then cpufreq won't know about that CPU at all, and no file will
> be created.
>
> Long answer:
> The existing cpufreq code does all its work (including creating the sysfs
> directories etc) at the CPU_ONLINE stage. This stage is not expected to fail
> (in fact even the core CPU hotplug code in kernel/cpu.c doesn't care for
> error returns at this point). So if a CPU fails to come up in earlier stages
> itself (such as CPU_UP_PREPARE), then cpufreq won't even hear about that CPU,
> and hence no sysfs files will be created/linked. However, if the CPU bringup
> operation fails during the CPU_ONLINE stage after the cpufreq's notifier has
> been invoked, then we do nothing about it and the cpufreq sysfs files will
> remain.

In short, the problem I mentioned before this para is genuine. And setting
policy->cpu to the first cpu of a mask is indeed a bad idea.

>> Also, how does suspend/resume work without CONFIG_HOTPLUG_CPU ?
>> What's the sequence of events?
>>
>
> Well, CONFIG_SUSPEND doesn't have an explicit dependency on HOTPLUG_CPU, but
> SMP systems usually use CONFIG_PM_SLEEP_SMP, which sets CONFIG_HOTPLUG_CPU.

I read usually as *optional*

> (I guess the reason why CONFIG_SUSPEND doesn't depend on HOTPLUG_CPU is
> because suspend is possible even on uniprocessor systems and hence the
> Kconfig dependency wasn't really justified).

Again the same question, how do we suspend when HOTPLUG is disabled?

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-15 22:47     ` [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
  2014-07-16  0:28       ` Saravana Kannan
  2014-07-16  8:24       ` Viresh Kumar
@ 2014-07-16 14:29       ` Dirk Brandewie
  2014-07-16 15:28         ` Viresh Kumar
  2 siblings, 1 reply; 76+ messages in thread
From: Dirk Brandewie @ 2014-07-16 14:29 UTC (permalink / raw)
  To: Saravana Kannan, Rafael J . Wysocki, Viresh Kumar, Todd Poynor,
	Srivatsa S . Bhat
  Cc: dirk.brandewie, linux-pm, linux-kernel, linux-arm-msm,
	linux-arm-kernel, Stephen Boyd

On 07/15/2014 03:47 PM, Saravana Kannan wrote:
> The CPUfreq core moves the cpufreq policy ownership between CPUs when CPUs
> within a cluster (CPUs sharing same policy) go ONLINE/OFFLINE. When moving
> policy ownership between CPUs, it also moves the cpufreq sysfs directory
> between CPUs and also fixes up the symlinks of the other CPUs in the
> cluster.
>
> Also, when all the CPUs in a cluster go OFFLINE, all the sysfs nodes and
> directories are deleted, the kobject is released and the policy is freed.
> And when the first CPU in a cluster comes up, the policy is reallocated and
> initialized, kobject is acquired, the sysfs nodes are created or symlinked,
> etc.
>
> All these steps end up creating unnecessarily complicated code and locking.
> There's no real benefit to adding/removing/moving the sysfs nodes and the
> policy between CPUs. Other per CPU sysfs directories like power and cpuidle
> are left alone during hotplug. So there's some precedence to what this
> patch is trying to do.
>
> This patch simplifies a lot of the code and locking by removing the
> adding/removing/moving of policy/sysfs/kobj and just leaves the cpufreq
> directory and policy in place irrespective of whether the CPUs are
> ONLINE/OFFLINE.
>
> Leaving the policy, sysfs and kobject in place also brings these additional
> benefits:
> * Faster suspend/resume
> * Faster hotplug
> * Sysfs file permissions maintained across hotplug
> * Policy settings and governor tunables maintained across hotplug
> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>    queried even after CPU goes OFFLINE
>
> Tested-by: Stephen Boyd <sboyd@codeaurora.org>
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>   drivers/cpufreq/cpufreq.c | 388 +++++++++++++---------------------------------
>   1 file changed, 107 insertions(+), 281 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 62259d2..a0a2ec2 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -37,7 +37,6 @@
>    */
>   static struct cpufreq_driver *cpufreq_driver;
>   static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
> -static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback);
>   static DEFINE_RWLOCK(cpufreq_driver_lock);
>   DEFINE_MUTEX(cpufreq_governor_lock);
>   static LIST_HEAD(cpufreq_policy_list);
> @@ -859,34 +858,41 @@ void cpufreq_sysfs_remove_file(const struct attribute *attr)
>   }
>   EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
>
> -/* symlink affected CPUs */
> -static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
> +/* symlink related CPUs */
> +static int cpufreq_dev_symlink(struct cpufreq_policy *policy, bool add)
>   {
> -	unsigned int j;
> +	unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
>   	int ret = 0;
>
> -	for_each_cpu(j, policy->cpus) {
> +	for_each_cpu(j, policy->related_cpus) {
>   		struct device *cpu_dev;
>
> -		if (j == policy->cpu)
> +		if (j == first_cpu)
>   			continue;
>
> -		pr_debug("Adding link for CPU: %u\n", j);
>   		cpu_dev = get_cpu_device(j);
> -		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
> -					"cpufreq");
> +		if (add)
> +			ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
> +						"cpufreq");
> +		else
> +			sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
> +
>   		if (ret)
>   			break;
>   	}
>   	return ret;
>   }
>
> -static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
> -				     struct device *dev)
> +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
>   {
>   	struct freq_attr **drv_attr;
> +	struct device *dev;
>   	int ret = 0;
>
> +	dev = get_cpu_device(cpumask_first(policy->related_cpus));
> +	if (!dev)
> +		return -EINVAL;
> +
>   	/* prepare interface data */
>   	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
>   				   &dev->kobj, "cpufreq");
> @@ -917,7 +923,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
>   			goto err_out_kobj_put;
>   	}
>
> -	ret = cpufreq_add_dev_symlink(policy);
> +	ret = cpufreq_dev_symlink(policy, true);
>   	if (ret)
>   		goto err_out_kobj_put;
>
> @@ -961,60 +967,58 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
>   }
>
>   #ifdef CONFIG_HOTPLUG_CPU
> -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
> -				  unsigned int cpu, struct device *dev)
> +static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
> +				  unsigned int cpu, bool add)
>   {
>   	int ret = 0;
> -	unsigned long flags;
> +	unsigned int cpus, pcpu;
>
> -	if (has_target()) {
> +	down_write(&policy->rwsem);
> +
> +	cpus = !cpumask_empty(policy->cpus);
> +	if (has_target() && cpus) {
>   		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>   		if (ret) {
>   			pr_err("%s: Failed to stop governor\n", __func__);
> -			return ret;
> +			goto unlock;
>   		}
>   	}
>
> -	down_write(&policy->rwsem);
> -
> -	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -	cpumask_set_cpu(cpu, policy->cpus);
> -	per_cpu(cpufreq_cpu_data, cpu) = policy;
> -	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +	if (add)
> +		cpumask_set_cpu(cpu, policy->cpus);
> +	else
> +		cpumask_clear_cpu(cpu, policy->cpus);
>
> -	up_write(&policy->rwsem);
> +	pcpu = cpumask_first(policy->cpus);
> +	if (pcpu < nr_cpu_ids && policy->cpu != pcpu) {
> +		policy->last_cpu = policy->cpu;
> +		policy->cpu = pcpu;
> +		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> +					CPUFREQ_UPDATE_POLICY_CPU, policy);
> +	}
>
> -	if (has_target()) {
> +	cpus = !cpumask_empty(policy->cpus);
> +	if (has_target() && cpus) {
>   		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
>   		if (!ret)
>   			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
>
>   		if (ret) {
>   			pr_err("%s: Failed to start governor\n", __func__);
> -			return ret;
> +			goto unlock;
>   		}
>   	}
>
> -	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
> -}
> -#endif
> -
> -static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
> -{
> -	struct cpufreq_policy *policy;
> -	unsigned long flags;
> -
> -	read_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -	policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
> -
> -	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +	if (!cpus && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
> +		cpufreq_driver->stop_cpu(policy);
> +	}

stop_cpu() only needs to be called during __cpufreq_remove_dev_prepare() no
where else.

>
> -	policy->governor = NULL;
> +unlock:
> +	up_write(&policy->rwsem);
>
> -	return policy;
> +	return ret;
>   }
> +#endif
>
>   static struct cpufreq_policy *cpufreq_policy_alloc(void)
>   {
> @@ -1053,10 +1057,8 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
>   	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>   			CPUFREQ_REMOVE_POLICY, policy);
>
> -	down_read(&policy->rwsem);
>   	kobj = &policy->kobj;
>   	cmp = &policy->kobj_unregister;
> -	up_read(&policy->rwsem);
>   	kobject_put(kobj);
>
>   	/*
> @@ -1076,32 +1078,12 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
>   	kfree(policy);
>   }
>
> -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
> -{
> -	if (WARN_ON(cpu == policy->cpu))
> -		return;
> -
> -	down_write(&policy->rwsem);
> -
> -	policy->last_cpu = policy->cpu;
> -	policy->cpu = cpu;
> -
> -	up_write(&policy->rwsem);
> -
> -	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> -			CPUFREQ_UPDATE_POLICY_CPU, policy);
> -}
> -
>   static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>   {
>   	unsigned int j, cpu = dev->id;
>   	int ret = -ENOMEM;
>   	struct cpufreq_policy *policy;
>   	unsigned long flags;
> -	bool recover_policy = cpufreq_suspended;
> -#ifdef CONFIG_HOTPLUG_CPU
> -	struct cpufreq_policy *tpolicy;
> -#endif
>
>   	if (cpu_is_offline(cpu))
>   		return 0;
> @@ -1110,9 +1092,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>
>   #ifdef CONFIG_SMP
>   	/* check whether a different CPU already registered this
> -	 * CPU because it is in the same boat. */
> +	 * CPU because it is one of the related CPUs. */
>   	policy = cpufreq_cpu_get(cpu);
> -	if (unlikely(policy)) {
> +	if (policy) {
> +		cpufreq_change_policy_cpus(policy, cpu, true);
>   		cpufreq_cpu_put(policy);
>   		return 0;
>   	}
> @@ -1121,45 +1104,14 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>   	if (!down_read_trylock(&cpufreq_rwsem))
>   		return 0;
>
> -#ifdef CONFIG_HOTPLUG_CPU
> -	/* Check if this cpu was hot-unplugged earlier and has siblings */
> -	read_lock_irqsave(&cpufreq_driver_lock, flags);
> -	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
> -		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
> -			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
> -			up_read(&cpufreq_rwsem);
> -			return ret;
> -		}
> -	}
> -	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -#endif
> -
> -	/*
> -	 * Restore the saved policy when doing light-weight init and fall back
> -	 * to the full init if that fails.
> -	 */
> -	policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
> -	if (!policy) {
> -		recover_policy = false;
> -		policy = cpufreq_policy_alloc();
> -		if (!policy)
> -			goto nomem_out;
> -	}
> -
> -	/*
> -	 * In the resume path, since we restore a saved policy, the assignment
> -	 * to policy->cpu is like an update of the existing policy, rather than
> -	 * the creation of a brand new one. So we need to perform this update
> -	 * by invoking update_policy_cpu().
> -	 */
> -	if (recover_policy && cpu != policy->cpu)
> -		update_policy_cpu(policy, cpu);
> -	else
> -		policy->cpu = cpu;
> +	/* If we get this far, this is the first time we are adding the
> +	 * policy */
> +	policy = cpufreq_policy_alloc();
> +	if (!policy)
> +		goto nomem_out;
> +	policy->cpu = cpu;
>
>   	cpumask_copy(policy->cpus, cpumask_of(cpu));
> -
>   	init_completion(&policy->kobj_unregister);
>   	INIT_WORK(&policy->update, handle_update);
>
> @@ -1169,26 +1121,25 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>   	ret = cpufreq_driver->init(policy);
>   	if (ret) {
>   		pr_debug("initialization failed\n");
> -		goto err_set_policy_cpu;
> +		goto err_init;
>   	}
>
>   	/* related cpus should atleast have policy->cpus */
>   	cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
>
> +	/* Weed out impossible CPUs. */
> +	cpumask_and(policy->related_cpus, policy->related_cpus,
> +			cpu_possible_mask);
> +
>   	/*
>   	 * affected cpus must always be the one, which are online. We aren't
>   	 * managing offline cpus here.
>   	 */
>   	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
>
> -	if (!recover_policy) {
> -		policy->user_policy.min = policy->min;
> -		policy->user_policy.max = policy->max;
> -	}
> -
>   	down_write(&policy->rwsem);
>   	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -	for_each_cpu(j, policy->cpus)
> +	for_each_cpu(j, policy->related_cpus)
>   		per_cpu(cpufreq_cpu_data, j) = policy;
>   	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>
> @@ -1243,13 +1194,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>   	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>   				     CPUFREQ_START, policy);
>
> -	if (!recover_policy) {
> -		ret = cpufreq_add_dev_interface(policy, dev);
> -		if (ret)
> -			goto err_out_unregister;
> -		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> -				CPUFREQ_CREATE_POLICY, policy);
> -	}
> +	ret = cpufreq_add_dev_interface(policy);
> +	if (ret)
> +		goto err_out_unregister;
> +	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> +			CPUFREQ_CREATE_POLICY, policy);
>
>   	write_lock_irqsave(&cpufreq_driver_lock, flags);
>   	list_add(&policy->policy_list, &cpufreq_policy_list);
> @@ -1257,10 +1206,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>
>   	cpufreq_init_policy(policy);
>
> -	if (!recover_policy) {
> -		policy->user_policy.policy = policy->policy;
> -		policy->user_policy.governor = policy->governor;
> -	}
>   	up_write(&policy->rwsem);
>
>   	kobject_uevent(&policy->kobj, KOBJ_ADD);
> @@ -1273,20 +1218,14 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>   err_out_unregister:
>   err_get_freq:
>   	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -	for_each_cpu(j, policy->cpus)
> +	for_each_cpu(j, policy->related_cpus)
>   		per_cpu(cpufreq_cpu_data, j) = NULL;
>   	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -
> +	up_write(&policy->rwsem);
>   	if (cpufreq_driver->exit)
>   		cpufreq_driver->exit(policy);
> -err_set_policy_cpu:
> -	if (recover_policy) {
> -		/* Do not leave stale fallback data behind. */
> -		per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL;
> -		cpufreq_policy_put_kobj(policy);
> -	}
> +err_init:
>   	cpufreq_policy_free(policy);
> -
>   nomem_out:
>   	up_read(&cpufreq_rwsem);
>
> @@ -1307,100 +1246,16 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>   	return __cpufreq_add_dev(dev, sif);
>   }
>
> -static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
> -					   unsigned int old_cpu)
> -{
> -	struct device *cpu_dev;
> -	int ret;
> -
> -	/* first sibling now owns the new sysfs dir */
> -	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
> -
> -	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
> -	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
> -	if (ret) {
> -		pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
> -
> -		down_write(&policy->rwsem);
> -		cpumask_set_cpu(old_cpu, policy->cpus);
> -		up_write(&policy->rwsem);
> -
> -		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
> -					"cpufreq");
> -
> -		return -EINVAL;
> -	}
> -
> -	return cpu_dev->id;
> -}
> -
> -static int __cpufreq_remove_dev_prepare(struct device *dev,
> -					struct subsys_interface *sif)
> +static int __cpufreq_remove_dev(struct device *dev,
> +				struct subsys_interface *sif)
>   {
> -	unsigned int cpu = dev->id, cpus;
> -	int new_cpu, ret;
> +	unsigned int cpu = dev->id, j;
> +	int ret = 0;
>   	unsigned long flags;
>   	struct cpufreq_policy *policy;
>
>   	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>
> -	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> -	policy = per_cpu(cpufreq_cpu_data, cpu);
> -
> -	/* Save the policy somewhere when doing a light-weight tear-down */
> -	if (cpufreq_suspended)
> -		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
> -
> -	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -
> -	if (!policy) {
> -		pr_debug("%s: No cpu_data found\n", __func__);
> -		return -EINVAL;
> -	}
> -
> -	if (has_target()) {
> -		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
> -		if (ret) {
> -			pr_err("%s: Failed to stop governor\n", __func__);
> -			return ret;
> -		}
> -	}
> -
> -	if (!cpufreq_driver->setpolicy)
> -		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
> -			policy->governor->name, CPUFREQ_NAME_LEN);
> -
> -	down_read(&policy->rwsem);
> -	cpus = cpumask_weight(policy->cpus);
> -	up_read(&policy->rwsem);
> -
> -	if (cpu != policy->cpu) {
> -		sysfs_remove_link(&dev->kobj, "cpufreq");
> -	} else if (cpus > 1) {
> -		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
> -		if (new_cpu >= 0) {
> -			update_policy_cpu(policy, new_cpu);
> -
> -			if (!cpufreq_suspended)
> -				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
> -					 __func__, new_cpu, cpu);
> -		}
> -	} else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
> -		cpufreq_driver->stop_cpu(policy);
> -	}
> -
> -	return 0;
> -}
> -
> -static int __cpufreq_remove_dev_finish(struct device *dev,
> -				       struct subsys_interface *sif)
> -{
> -	unsigned int cpu = dev->id, cpus;
> -	int ret;
> -	unsigned long flags;
> -	struct cpufreq_policy *policy;
> -
>   	read_lock_irqsave(&cpufreq_driver_lock, flags);
>   	policy = per_cpu(cpufreq_cpu_data, cpu);
>   	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> @@ -1410,56 +1265,45 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>   		return -EINVAL;
>   	}
>
> -	down_write(&policy->rwsem);
> -	cpus = cpumask_weight(policy->cpus);
> -
> -	if (cpus > 1)
> -		cpumask_clear_cpu(cpu, policy->cpus);
> -	up_write(&policy->rwsem);
> -
> -	/* If cpu is last user of policy, free policy */
> -	if (cpus == 1) {
> -		if (has_target()) {
> -			ret = __cpufreq_governor(policy,
> -					CPUFREQ_GOV_POLICY_EXIT);
> -			if (ret) {
> -				pr_err("%s: Failed to exit governor\n",
> -				       __func__);
> -				return ret;
> -			}
> -		}
> -
> -		if (!cpufreq_suspended)
> -			cpufreq_policy_put_kobj(policy);
> +#ifdef CONFIG_HOTPLUG_CPU
> +	ret = cpufreq_change_policy_cpus(policy, cpu, false);
> +#endif
> +	if (ret)
> +		return ret;
>
> -		/*
> -		 * Perform the ->exit() even during light-weight tear-down,
> -		 * since this is a core component, and is essential for the
> -		 * subsequent light-weight ->init() to succeed.
> -		 */
> -		if (cpufreq_driver->exit)
> -			cpufreq_driver->exit(policy);
> +	if (!sif)
> +		return 0;
>
> -		/* Remove policy from list of active policies */
> -		write_lock_irqsave(&cpufreq_driver_lock, flags);
> -		list_del(&policy->policy_list);
> -		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +	if (!cpumask_empty(policy->cpus)) {
> +		return 0;
> +	}
>
> -		if (!cpufreq_suspended)
> -			cpufreq_policy_free(policy);
> -	} else if (has_target()) {
> -		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
> -		if (!ret)
> -			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> +	cpufreq_dev_symlink(policy, false);
>
> +	if (has_target()) {
> +		ret = __cpufreq_governor(policy,
> +				CPUFREQ_GOV_POLICY_EXIT);
>   		if (ret) {
> -			pr_err("%s: Failed to start governor\n", __func__);
> +			pr_err("%s: Failed to exit governor\n",
> +			       __func__);
>   			return ret;
>   		}
>   	}
>
> -	per_cpu(cpufreq_cpu_data, cpu) = NULL;
> -	return 0;
> +	cpufreq_policy_put_kobj(policy);
> +	if (cpufreq_driver->exit)
> +		cpufreq_driver->exit(policy);
> +
> +	/* Remove policy from list of active policies */
> +	write_lock_irqsave(&cpufreq_driver_lock, flags);
> +	for_each_cpu(j, policy->related_cpus)
> +		per_cpu(cpufreq_cpu_data, j) = NULL;
> +	list_del(&policy->policy_list);
> +	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +
> +	cpufreq_policy_free(policy);
> +
> +	return ret;
>   }
>
>   /**
> @@ -1469,18 +1313,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>    */
>   static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>   {
> -	unsigned int cpu = dev->id;
> -	int ret;
> -
> -	if (cpu_is_offline(cpu))
> -		return 0;
> -
> -	ret = __cpufreq_remove_dev_prepare(dev, sif);
> -
> -	if (!ret)
> -		ret = __cpufreq_remove_dev_finish(dev, sif);
> -
> -	return ret;
> +	return __cpufreq_remove_dev(dev, sif);
>   }
>
>   static void handle_update(struct work_struct *work)
> @@ -2295,19 +2128,12 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>   	if (dev) {
>   		switch (action & ~CPU_TASKS_FROZEN) {
>   		case CPU_ONLINE:
> +		case CPU_DOWN_FAILED:
>   			__cpufreq_add_dev(dev, NULL);
>   			break;
>
>   		case CPU_DOWN_PREPARE:
> -			__cpufreq_remove_dev_prepare(dev, NULL);
> -			break;
> -
> -		case CPU_POST_DEAD:
> -			__cpufreq_remove_dev_finish(dev, NULL);
> -			break;
> -
> -		case CPU_DOWN_FAILED:
> -			__cpufreq_add_dev(dev, NULL);
> +			__cpufreq_remove_dev(dev, NULL);
>   			break;
>   		}
>   	}
>


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 14:29       ` Dirk Brandewie
@ 2014-07-16 15:28         ` Viresh Kumar
  2014-07-16 19:42           ` Saravana Kannan
  0 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-16 15:28 UTC (permalink / raw)
  To: Dirk Brandewie
  Cc: Saravana Kannan, Rafael J . Wysocki, Todd Poynor,
	Srivatsa S . Bhat, linux-pm, Linux Kernel Mailing List,
	linux-arm-msm, linux-arm-kernel, Stephen Boyd

On 16 July 2014 19:59, Dirk Brandewie <dirk.brandewie@gmail.com> wrote:
> stop_cpu() only needs to be called during __cpufreq_remove_dev_prepare() no
> where else.

Oh, thanks for reminding us..

Look at this Saravana:
367dc4a cpufreq: Add stop CPU callback to cpufreq_driver interface

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 13:13           ` Viresh Kumar
@ 2014-07-16 18:04             ` Srivatsa S. Bhat
  2014-07-16 19:56             ` Saravana Kannan
  1 sibling, 0 replies; 76+ messages in thread
From: Srivatsa S. Bhat @ 2014-07-16 18:04 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Saravana Kannan, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 06:43 PM, Viresh Kumar wrote:
> On 16 July 2014 16:46, Srivatsa S. Bhat <srivatsa@mit.edu> wrote:
>> Short answer: If the sysfs directory has already been created by cpufreq,
>> then yes, it will remain as it is. However, if the online operation failed
>> before that, then cpufreq won't know about that CPU at all, and no file will
>> be created.
>>
>> Long answer:
>> The existing cpufreq code does all its work (including creating the sysfs
>> directories etc) at the CPU_ONLINE stage. This stage is not expected to fail
>> (in fact even the core CPU hotplug code in kernel/cpu.c doesn't care for
>> error returns at this point). So if a CPU fails to come up in earlier stages
>> itself (such as CPU_UP_PREPARE), then cpufreq won't even hear about that CPU,
>> and hence no sysfs files will be created/linked. However, if the CPU bringup
>> operation fails during the CPU_ONLINE stage after the cpufreq's notifier has
>> been invoked, then we do nothing about it and the cpufreq sysfs files will
>> remain.
> 
> In short, the problem I mentioned before this para is genuine. And setting
> policy->cpu to the first cpu of a mask is indeed a bad idea.
> 
>>> Also, how does suspend/resume work without CONFIG_HOTPLUG_CPU ?
>>> What's the sequence of events?
>>>
>>
>> Well, CONFIG_SUSPEND doesn't have an explicit dependency on HOTPLUG_CPU, but
>> SMP systems usually use CONFIG_PM_SLEEP_SMP, which sets CONFIG_HOTPLUG_CPU.
> 
> I read usually as *optional*
> 
>> (I guess the reason why CONFIG_SUSPEND doesn't depend on HOTPLUG_CPU is
>> because suspend is possible even on uniprocessor systems and hence the
>> Kconfig dependency wasn't really justified).
> 
> Again the same question, how do we suspend when HOTPLUG is disabled?
> 

>From what I understand, if you disable HOTPLUG_CPU and enable CONFIG_SUSPEND
and try suspend/resume on an SMP system, the disable_nonboot_cpus() call will
return silently without doing anything. Thus, suspend will fail silently and
the system might have trouble resuming.

But surprisingly we have never had such bug reports so far! Most probably this
is because PM_SLEEP_SMP has a default of y (which in turn selects HOTPLUG_CPU):

config PM_SLEEP_SMP
        def_bool y
        depends on SMP
        depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
        depends on PM_SLEEP
        select HOTPLUG_CPU

So I guess nobody really tried turning this off on SMP systems and then trying
suspend. Then I started looking at the git history and wondered where this
Kconfig dependency between SUSPEND and SMP<->HOTPLUG_CPU got messed up. But
instead I found that the initial commit itself didn't get the dependency right.

Commit 296699de6bdc (Introduce CONFIG_SUSPEND for suspend-to-Ram and standby)
introduced all the Kconfig options, and it indeed mentions this in the
changelog: "Make HOTPLUG_CPU be selected automatically if SUSPEND or
HIBERNATION has been chosen and the kernel is intended for SMP systems". But
unfortunately, the code didn't get it right because it made CONFIG_SUSPEND
depend on SUSPEND_SMP_POSSIBLE instead of SUSPEND_SMP.

In other words, we have had this incorrect dependency all the time!

Regards,
Srivatsa S. Bhat

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16  8:30         ` Viresh Kumar
@ 2014-07-16 19:19           ` Saravana Kannan
  0 siblings, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-16 19:19 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 01:30 AM, Viresh Kumar wrote:
> On 16 July 2014 05:58, Saravana Kannan <skannan@codeaurora.org> wrote:
>>> +       if (!cpus && cpufreq_driver->stop_cpu &&
>>> cpufreq_driver->setpolicy) {
>>> +               cpufreq_driver->stop_cpu(policy);
>>> +       }
>>>
>>
>> Viresh, I tried your suggestion (and my initial thought too) to combine this
>> as an if/else with the previous if. But the indentation got nasty and made
>> it hard to read. I'm sure the compiler will optimize it. So, I would prefer
>> to leave it this way.
>
> Okay, I gave that comment again :)
>
> Try this:
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index f72b2b7..092a0ba 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -991,8 +991,10 @@ static int cpufreq_change_policy_cpus(struct
> cpufreq_policy *policy,
>                                          CPUFREQ_UPDATE_POLICY_CPU, policy);
>          }
>
> -       cpus = !cpumask_empty(policy->cpus);
> -       if (has_target() && cpus) {
> +       if (!cpumask_empty(policy->cpus)) {
> +               if (!has_target())
> +                       goto unlock;
> +
>                  ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
>                  if (!ret)
>                          ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> @@ -1001,9 +1003,7 @@ static int cpufreq_change_policy_cpus(struct
> cpufreq_policy *policy,
>                          pr_err("%s: Failed to start governor\n", __func__);
>                          goto unlock;
>                  }
> -       }
> -
> -       if (!cpus && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
> +       } else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
>                  cpufreq_driver->stop_cpu(policy);
>          }
>

Not bad :) I'll take it.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 2/2] cpufreq: Simplify and fix mutual exclusion with hotplug
  2014-07-16  8:48       ` Viresh Kumar
@ 2014-07-16 19:34         ` Saravana Kannan
  0 siblings, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-16 19:34 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 01:48 AM, Viresh Kumar wrote:
> On 16 July 2014 04:17, Saravana Kannan <skannan@codeaurora.org> wrote:
>
> Again, just too many things in a single patch. That's not acceptable.
> Few of these might be bug fixes, which must go in before any other updates.
> And so it must have been added as first patch.
>
> Even the other stuff you are trying to fix (by checking policy->cpus) should go
> before 1/2, otherwise 1/2 will actually break things inbetween, i.e. show values
> even when no CPUs of a cluster are online.

Well, it's no worse that what it does today. The existing code actually 
causes a crash when you try to show while hotplugging a CPU. I'm keeping 
the 1/2 as small as possible. You clearly want to smaller, so I don't 
want to add this to that.

Also, the current add/remove path is complicated with many cases. So, 
I'm not comfortable saying I'm sure policy->cpus check would be 
sufficient. I'm willing to throw out this change if you think this is 
still wrong when it comes after 1/2.

>> Since we no longer alloc and destroy/freeze policy and sysfs nodes during
>> hotplug and suspend, we don't need to lock sysfs with hotplug. We can
>> achieve the same effect by checking if policy->cpus is empty.
>
> Are you talking about the changes in store()?

Yes.

>
>> Hotplug mutual exclusion was only done for sysfs writes. But reads need the
>> same protection too.  So, this patch adds that too.
>
> How? How is checking for policy->cpus enough?

Because when all the CPUs in a policy are hotplugged off, the 
policy->cpus would be empty? So, it's functionally the same without 
having to get hotplug lock. This way, CPUs of other policies could be 
hotplugged while your are show/store on one policy.

But I'm sure you already understood this. So, not sure what you are 
really asking.

>
>> Also, cpufreq driver (un)register can race with hotplug since CPU online
>> state can change between adding/removing the currently online devices and
>> registering/unregistering for hotplug notifiers. So, fix that by
>> registering for hotplug notifiers first before adding devices and
>> unregistering from hotplug notifiers first before removing devices.
>
> Couldn't get it, tell us an example race and what will go wrong due to it.
> Also this should have had a separate patch for itself.

I assumed we go a lot of down_write()s and that would cause a 
down_read_trylock() to fail. But we really do that only for cpufreq 
driver register/unregister. So, my previous statement is not really a 
very useful/common.

But I do hate that we do "trylock". It always makes one wonder if it 
will silently fail (since we return NULL, which is same as policy with 
"offline" policy). Technically, we could do down_read(), but lockdep is 
throwing warnings when it's really not an issue (doing down read twice). 
So, I'm guessing all these trylocks are just to keep lockdep happy?


>
>> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
>> ---
>>   drivers/cpufreq/cpufreq.c | 44 ++++++++++++++++++++------------------------
>>   1 file changed, 20 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>> index a0a2ec2..f72b2b7 100644
>> --- a/drivers/cpufreq/cpufreq.c
>> +++ b/drivers/cpufreq/cpufreq.c
>> @@ -748,17 +748,18 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
>>   {
>>          struct cpufreq_policy *policy = to_policy(kobj);
>>          struct freq_attr *fattr = to_attr(attr);
>> -       ssize_t ret;
>> +       ssize_t ret = -EINVAL;
>>
>>          if (!down_read_trylock(&cpufreq_rwsem))
>> -               return -EINVAL;
>> -
>> +               return ret;
>>          down_read(&policy->rwsem);
>>
>> -       if (fattr->show)
>> -               ret = fattr->show(policy, buf);
>> -       else
>> -               ret = -EIO;
>> +       if (!cpumask_empty(policy->cpus)) {
>> +               if (fattr->show)
>> +                       ret = fattr->show(policy, buf);
>> +               else
>> +                       ret = -EIO;
>> +       }
>
> Makes sense upto this point.
>
>>          up_read(&policy->rwsem);
>>          up_read(&cpufreq_rwsem);
>> @@ -773,26 +774,19 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
>>          struct freq_attr *fattr = to_attr(attr);
>>          ssize_t ret = -EINVAL;
>>
>> -       get_online_cpus();
>> -
>> -       if (!cpu_online(policy->cpu))
>> -               goto unlock;
>> -
>
> @Srivatsa: what do you say?
>
>>          if (!down_read_trylock(&cpufreq_rwsem))
>> -               goto unlock;
>> -
>> +               return ret;
>>          down_write(&policy->rwsem);
>>
>> -       if (fattr->store)
>> -               ret = fattr->store(policy, buf, count);
>> -       else
>> -               ret = -EIO;
>> +       if (!cpumask_empty(policy->cpus)) {
>> +               if (fattr->store)
>> +                       ret = fattr->store(policy, buf, count);
>> +               else
>> +                       ret = -EIO;
>> +       }
>>
>>          up_write(&policy->rwsem);
>> -
>>          up_read(&cpufreq_rwsem);
>> -unlock:
>> -       put_online_cpus();
>>
>>          return ret;
>>   }
>> @@ -2270,6 +2264,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
>>                  }
>>          }
>>
>> +       register_hotcpu_notifier(&cpufreq_cpu_notifier);
>> +
>>          ret = subsys_interface_register(&cpufreq_interface);
>>          if (ret)
>>                  goto err_boost_unreg;
>> @@ -2293,13 +2289,13 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
>>                  }
>>          }
>>
>> -       register_hotcpu_notifier(&cpufreq_cpu_notifier);
>>          pr_debug("driver %s up and running\n", driver_data->name);
>>
>>          return 0;
>>   err_if_unreg:
>>          subsys_interface_unregister(&cpufreq_interface);
>>   err_boost_unreg:
>> +       unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
>>          if (cpufreq_boost_supported())
>>                  cpufreq_sysfs_remove_file(&boost.attr);
>>   err_null_driver:
>> @@ -2327,12 +2323,12 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
>>
>>          pr_debug("unregistering driver %s\n", driver->name);
>>
>> +       unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
>> +
>>          subsys_interface_unregister(&cpufreq_interface);
>>          if (cpufreq_boost_supported())
>>                  cpufreq_sysfs_remove_file(&boost.attr);
>>
>> -       unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
>> -
>>          down_write(&cpufreq_rwsem);
>>          write_lock_irqsave(&cpufreq_driver_lock, flags);
>
> Normally the order of register/unregister should be just opposite.
> Isn't that true here? Yeah, it was broken earlier as well...

Generally agreed, but as explained in the commit text, we need to keep 
it this way to avoid races with hotplug/unregister.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 15:28         ` Viresh Kumar
@ 2014-07-16 19:42           ` Saravana Kannan
  0 siblings, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-16 19:42 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Dirk Brandewie, Rafael J . Wysocki, Todd Poynor,
	Srivatsa S . Bhat, linux-pm, Linux Kernel Mailing List,
	linux-arm-msm, linux-arm-kernel, Stephen Boyd

On 07/16/2014 08:28 AM, Viresh Kumar wrote:
> On 16 July 2014 19:59, Dirk Brandewie <dirk.brandewie@gmail.com> wrote:
>> stop_cpu() only needs to be called during __cpufreq_remove_dev_prepare() no
>> where else.
>
> Oh, thanks for reminding us..
>
> Look at this Saravana:
> 367dc4a cpufreq: Add stop CPU callback to cpufreq_driver interface
>

I'll only get called at the same time as it is called today. 
__cpufreq_remove_dev_prepare is now renamed to __cpufreq_remove_dev. And 
this function is called from there.

The only time stop does get called is when __cpufreq_remove_dev is 
called on the last CPU of a policy. So, functionally it's identical.

Btw, I already added logs to all cpufreq driver ops and checked the 
calls come in the same order with and without my changes.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 11:16         ` Srivatsa S. Bhat
  2014-07-16 13:13           ` Viresh Kumar
@ 2014-07-16 19:56           ` Saravana Kannan
  2014-07-17  5:35             ` Viresh Kumar
  1 sibling, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-16 19:56 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: Viresh Kumar, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 04:16 AM, Srivatsa S. Bhat wrote:
> On 07/16/2014 01:54 PM, Viresh Kumar wrote:
>> On 16 July 2014 04:17, Saravana Kannan <skannan@codeaurora.org> wrote:
>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>>

<SNIP>

>>> -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>>> -                                 unsigned int cpu, struct device *dev)
>>> +static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
>>> +                                 unsigned int cpu, bool add)
>
> [...]
>
>>> -
>>> -       if (!cpufreq_driver->setpolicy)
>>> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
>>> -                       policy->governor->name, CPUFREQ_NAME_LEN);
>>
>> Where is this gone? There are several instances of code just being
>> removed, this is the third one. Its really really tough to catch these
>> in this big of a patch. Believe me.
>>
>> You have to break this patch into multiple ones, see this on how to
>> break even simplest of the changes into multiple patches:
>> https://lkml.org/lkml/2013/9/6/400
>>
>> Its just impossible to catch bugs that you might have introduced here due
>> to the size of this patch. And its taking a LOT of time for me to review this.
>> As I have to keep diff in one tab, new cpufreq.c in one and the old cpufreq.c
>> in one and then compare..
>>
>
> True, this is still a pretty huge chunk. Saravana, at this stage, don't worry
> about making cpufreq work properly in each and every patch. Just ensure that
> every patch builds fine; that should be good enough. I hope this will help you
> in splitting up the patches further.

Thanks Srivatsa. This will definitely help split them up into smaller 
chunks.

> One other thing: your changelog contains what we usually write in a cover-
> letter - *very* high-level goals of the patch. Ideally, you should explain
> the subtle details and the non-obvious decisions or trade-offs that you have
> made at various places in the code. Otherwise it becomes very hard to follow
> your thought-flow just by looking at the patch. So please split up the patch
> further and also make the changelogs useful to review the patch :-)

Thanks. Will do.

> The link that Viresh gave above also did a lot of code reorganization in
> cpufreq, so it should give you a good example of how to proceed.
>
> [...]
>
>>>                          __cpufreq_add_dev(dev, NULL);
>>>                          break;
>>>
>>>                  case CPU_DOWN_PREPARE:
>>> -                       __cpufreq_remove_dev_prepare(dev, NULL);
>>> -                       break;
>>> -
>>> -               case CPU_POST_DEAD:
>>> -                       __cpufreq_remove_dev_finish(dev, NULL);
>>> -                       break;
>>> -
>>> -               case CPU_DOWN_FAILED:
>>> -                       __cpufreq_add_dev(dev, NULL);
>>> +                       __cpufreq_remove_dev(dev, NULL);
>>
>> @Srivatsa: You might want to have a look at this, remove sequence was
>> separated for some purpose and I am just not able to concentrate enough
>> to think of that, just too many cases running in my mind :)
>>
>
> Yeah, we had split it into _remove_dev_prepare() and _remove_dev_finish()
> to avoid a few potential deadlocks. We wanted to call _remove_dev_prepare()
> in the DOWN_PREPARE stage and then call _remove_dev_finish() (which waits
> for the kobject refcount to drop) in the POST_DEAD stage. That is, we wanted
> to do the kobject cleanup after releasing the hotplug lock, and POST_DEAD stage
> was well-suited for that.
>
> Commit 1aee40ac9c8 (cpufreq: Invoke __cpufreq_remove_dev_finish() after
> releasing cpu_hotplug.lock) explains this in detail. Saravana, please take a
> look at that reasoning and ensure that your patch doesn't re-introduce those
> deadlock possibilities!

But all of that was needed _because_ we were creating and destroying 
policies and kobjs all the time. We don't do that anymore. So, I don't 
think any of that applies. We only destroy when the cpufreq driver is 
unregistered. That's kinda of the point of this patchset.

Thoughts?

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 13:13           ` Viresh Kumar
  2014-07-16 18:04             ` Srivatsa S. Bhat
@ 2014-07-16 19:56             ` Saravana Kannan
  2014-07-17  5:51               ` Viresh Kumar
  1 sibling, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-16 19:56 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Srivatsa S. Bhat, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 06:13 AM, Viresh Kumar wrote:
> On 16 July 2014 16:46, Srivatsa S. Bhat <srivatsa@mit.edu> wrote:
>> Short answer: If the sysfs directory has already been created by cpufreq,
>> then yes, it will remain as it is. However, if the online operation failed
>> before that, then cpufreq won't know about that CPU at all, and no file will
>> be created.
>>
>> Long answer:
>> The existing cpufreq code does all its work (including creating the sysfs
>> directories etc) at the CPU_ONLINE stage. This stage is not expected to fail
>> (in fact even the core CPU hotplug code in kernel/cpu.c doesn't care for
>> error returns at this point). So if a CPU fails to come up in earlier stages
>> itself (such as CPU_UP_PREPARE), then cpufreq won't even hear about that CPU,
>> and hence no sysfs files will be created/linked. However, if the CPU bringup
>> operation fails during the CPU_ONLINE stage after the cpufreq's notifier has
>> been invoked, then we do nothing about it and the cpufreq sysfs files will
>> remain.
>
> In short, the problem I mentioned before this para is genuine. And setting
> policy->cpu to the first cpu of a mask is indeed a bad idea.

No it's not. All the cpu*/ directories for all possible CPUs will be 
there whether a CPU is online/offline. Which is why I also weed out 
impossible CPUs, but you said the driver shouldn't be passing impossible 
CPUs anyway. I'm just picking one directory to put the real cpufreq 
directory under. So, the code as-is is definitely not broken.

Sure, I can pick the first cpu that comes online to decide where to put 
the real sysfs cpufreq directory, but then I have to keep track of that 
in a separate field when it's time to remove it when the cpufreq driver 
is unregistered. It's yet another pointless thing to keep track. And no, 
we shouldn't be moving sysfs directory to stay with only an online 
directory. That's the thing this patch is trying to simplify.

So, I think using the first cpu in related CPUs will always work. If 
there's any disagreement, I think it's purely a personal preference over 
adding another field vs calling cpumask_first()

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16  8:24       ` Viresh Kumar
  2014-07-16 11:16         ` Srivatsa S. Bhat
@ 2014-07-16 20:25         ` Saravana Kannan
  2014-07-16 21:45           ` Saravana Kannan
  2014-07-17  6:24           ` Viresh Kumar
  1 sibling, 2 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-16 20:25 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 01:24 AM, Viresh Kumar wrote:
> On 16 July 2014 04:17, Saravana Kannan <skannan@codeaurora.org> wrote:
>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>
>> +/* symlink related CPUs */
>> +static int cpufreq_dev_symlink(struct cpufreq_policy *policy, bool add)
>>   {
>> -       unsigned int j;
>> +       unsigned int j, first_cpu = cpumask_first(policy->related_cpus);
>
> The CPU which came first should get the ownership by default, instead
> of the first one in the mask.
>
> Normally at boot, all CPUs come up first and then only cpufreq init starts.
> But in case all other CPUs fail to come up, then policy->cpu *might* point
> to a failed cpu.
>
> And so, we should simply use policy->cpu here instead of finding the
> first one in the mask.
>

Replied to this in a different email.

> Also, its not the duty of this routine to find which one is the policy cpu as
> that is done by __cpufreq_add_dev(). And so in case we need to make
> first cpu of a mask as policy->cpu, it should be done in __cpufreq_add_dev()
> and not here. This one should just follow the orders :)

This is a new function. And that split up might have made sense earlier. 
But not so much anymore since I'm sharing a lot of code between 
__cpufreq_add_dev() and __cpufreq_remove_dev(). There's not reason to 
stick with the previous split of up work if it doesn't apply well anymore.

Please give this a second thought. Maybe it'll make more sense after I 
split this up into smaller patches.

>
> @Srivatsa: What happens to the sysfs directory if a CPU fails to come up?
> Is it exactly similar to how it happens in hotplug? i.e. we do have a directory
> there?
>
>>          int ret = 0;
>>
>> -       for_each_cpu(j, policy->cpus) {
>> +       for_each_cpu(j, policy->related_cpus) {
>>                  struct device *cpu_dev;
>>
>> -               if (j == policy->cpu)
>> +               if (j == first_cpu)
>>                          continue;
>>
>> -               pr_debug("Adding link for CPU: %u\n", j);
>
> Keep this please, it might be useful while debugging.

Reluctant ok. We don't add/remove these files anymore in a common 
scenario. So, it's not going to be very helpful. I'll also have to do a 
add ? Add : Remove blurb for the print.

>
>>                  cpu_dev = get_cpu_device(j);
>> -               ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
>> -                                       "cpufreq");
>> +               if (add)
>> +                       ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
>> +                                               "cpufreq");
>> +               else
>> +                       sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
>> +
>>                  if (ret)
>>                          break;
>>          }
>>          return ret;
>>   }
>>
>> -static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
>> -                                    struct device *dev)
>> +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
>>   {
>>          struct freq_attr **drv_attr;
>> +       struct device *dev;
>>          int ret = 0;
>>
>> +       dev = get_cpu_device(cpumask_first(policy->related_cpus));
>> +       if (!dev)
>> +               return -EINVAL;
>
> Again, deciding which cpu is policy->cpu here is wrong. Just follow
> orders of __cpufreq_add_dev().

But that's not what I'm doing here?

>>          /* prepare interface data */
>>          ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
>>                                     &dev->kobj, "cpufreq");
>> @@ -917,7 +923,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
>>                          goto err_out_kobj_put;
>>          }
>>
>> -       ret = cpufreq_add_dev_symlink(policy);
>> +       ret = cpufreq_dev_symlink(policy, true);
>>          if (ret)
>>                  goto err_out_kobj_put;
>>
>> @@ -961,60 +967,58 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy)
>>   }
>>
>>   #ifdef CONFIG_HOTPLUG_CPU
>
> @Srivatsa: I will try this but you also take care of this. These
> ifdefs might go wrong,
> i.e. we are surely using it in the current patch without HOTPLUG as well. See
> cpufreq_add_dev()..
>
> Also, how does suspend/resume work without CONFIG_HOTPLUG_CPU ?
> What's the sequence of events?
>
>> -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>> -                                 unsigned int cpu, struct device *dev)
>> +static int cpufreq_change_policy_cpus(struct cpufreq_policy *policy,
>> +                                 unsigned int cpu, bool add)
>>   {
>>          int ret = 0;
>> -       unsigned long flags;
>> +       unsigned int cpus, pcpu;
>>
>> -       if (has_target()) {
>> +       down_write(&policy->rwsem);
>> +
>> +       cpus = !cpumask_empty(policy->cpus);
>
> We aren't using cpus at multiple places and so probably it would
> be better to using cpumask_empty() directly.
>
>> +       if (has_target() && cpus) {
>
> I may get the answer later in reviews, but when will cpus be 0 here?
> Probably for non-boot cluster during suspend/resume, or forceful
> hotplugging off all CPUs of a cluster. Right?

Yup!

>
>>                  ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>>                  if (ret) {
>>                          pr_err("%s: Failed to stop governor\n", __func__);
>> -                       return ret;
>> +                       goto unlock;
>>                  }
>>          }
>>
>> -       down_write(&policy->rwsem);
>> -
>> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
>> -
>> -       cpumask_set_cpu(cpu, policy->cpus);
>> -       per_cpu(cpufreq_cpu_data, cpu) = policy;
>> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +       if (add)
>> +               cpumask_set_cpu(cpu, policy->cpus);
>> +       else
>> +               cpumask_clear_cpu(cpu, policy->cpus);
>>
>> -       up_write(&policy->rwsem);
>> +       pcpu = cpumask_first(policy->cpus);
>> +       if (pcpu < nr_cpu_ids && policy->cpu != pcpu) {
>
> No, we don't have to consider changing policy->cpu for every change
> in policy->cpus. We need to do that only when policy->cpu goes down.

Ok, I agree I could improve the check to reduce the unnecessary 
notification even more.

> Also pcpu can't be < nr_cpu_ids, right?

This is for the case when all CPUs in a cluster have been taken down. We 
don't want to send the notifier at that point. When the mask is empty, 
the function returns a value >= nr_cpu_ids to indicate an error.

>
>> +               policy->last_cpu = policy->cpu;
>> +               policy->cpu = pcpu;
>> +               blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>> +                                       CPUFREQ_UPDATE_POLICY_CPU, policy);
>> +       }
>>
>> -       if (has_target()) {
>> +       cpus = !cpumask_empty(policy->cpus);
>> +       if (has_target() && cpus) {
>>                  ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
>>                  if (!ret)
>>                          ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
>>
>>                  if (ret) {
>>                          pr_err("%s: Failed to start governor\n", __func__);
>> -                       return ret;
>> +                       goto unlock;
>>                  }
>>          }
>>
>> -       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
>> -}
>> -#endif
>> -
>> -static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
>> -{
>> -       struct cpufreq_policy *policy;
>> -       unsigned long flags;
>> -
>> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
>> -
>> -       policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
>> -
>> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +       if (!cpus && cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
>
> As I commented on V1, please make it else part of above if..
>
>> +               cpufreq_driver->stop_cpu(policy);
>> +       }
>>
>> -       policy->governor = NULL;
>> +unlock:
>> +       up_write(&policy->rwsem);
>>
>> -       return policy;
>> +       return ret;
>>   }
>> +#endif
>>
>>   static struct cpufreq_policy *cpufreq_policy_alloc(void)
>>   {
>> @@ -1053,10 +1057,8 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
>>          blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>>                          CPUFREQ_REMOVE_POLICY, policy);
>>
>> -       down_read(&policy->rwsem);
>>          kobj = &policy->kobj;
>>          cmp = &policy->kobj_unregister;
>> -       up_read(&policy->rwsem);
>
> Why? And also, these are unrelated changes and must be added as separate
> commits.

This is because we call this with policy rwsem read lock held and 
lockdep throws a warning. So, it's related to this patch.

>
>>          kobject_put(kobj);
>>
>>          /*
>> @@ -1076,32 +1078,12 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
>>          kfree(policy);
>>   }
>>
>> -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
>> -{
>> -       if (WARN_ON(cpu == policy->cpu))
>> -               return;
>> -
>> -       down_write(&policy->rwsem);
>> -
>> -       policy->last_cpu = policy->cpu;
>> -       policy->cpu = cpu;
>> -
>> -       up_write(&policy->rwsem);
>> -
>> -       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>> -                       CPUFREQ_UPDATE_POLICY_CPU, policy);
>> -}
>> -
>>   static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>   {
>>          unsigned int j, cpu = dev->id;
>>          int ret = -ENOMEM;
>>          struct cpufreq_policy *policy;
>>          unsigned long flags;
>> -       bool recover_policy = cpufreq_suspended;
>> -#ifdef CONFIG_HOTPLUG_CPU
>> -       struct cpufreq_policy *tpolicy;
>> -#endif
>>
>>          if (cpu_is_offline(cpu))
>>                  return 0;
>> @@ -1110,9 +1092,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>
>>   #ifdef CONFIG_SMP
>>          /* check whether a different CPU already registered this
>> -        * CPU because it is in the same boat. */
>> +        * CPU because it is one of the related CPUs. */
>>          policy = cpufreq_cpu_get(cpu);
>> -       if (unlikely(policy)) {
>> +       if (policy) {
>> +               cpufreq_change_policy_cpus(policy, cpu, true);
>
> This is just a waste of time at boot as ... (see below)

Why? Please explain.

>
>>                  cpufreq_cpu_put(policy);
>>                  return 0;
>>          }
>> @@ -1121,45 +1104,14 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          if (!down_read_trylock(&cpufreq_rwsem))
>>                  return 0;
>>
>> -#ifdef CONFIG_HOTPLUG_CPU
>> -       /* Check if this cpu was hot-unplugged earlier and has siblings */
>> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
>> -       list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
>> -               if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
>> -                       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> -                       ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
>> -                       up_read(&cpufreq_rwsem);
>> -                       return ret;
>> -               }
>> -       }
>> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> -#endif
>> -
>> -       /*
>> -        * Restore the saved policy when doing light-weight init and fall back
>> -        * to the full init if that fails.
>> -        */
>> -       policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
>> -       if (!policy) {
>> -               recover_policy = false;
>> -               policy = cpufreq_policy_alloc();
>> -               if (!policy)
>> -                       goto nomem_out;
>> -       }
>> -
>> -       /*
>> -        * In the resume path, since we restore a saved policy, the assignment
>> -        * to policy->cpu is like an update of the existing policy, rather than
>> -        * the creation of a brand new one. So we need to perform this update
>> -        * by invoking update_policy_cpu().
>> -        */
>> -       if (recover_policy && cpu != policy->cpu)
>> -               update_policy_cpu(policy, cpu);
>> -       else
>> -               policy->cpu = cpu;
>> +       /* If we get this far, this is the first time we are adding the
>> +        * policy */
>> +       policy = cpufreq_policy_alloc();
>> +       if (!policy)
>> +               goto nomem_out;
>> +       policy->cpu = cpu;
>>
>>          cpumask_copy(policy->cpus, cpumask_of(cpu));
>> -
>>          init_completion(&policy->kobj_unregister);
>>          INIT_WORK(&policy->update, handle_update);
>>
>> @@ -1169,26 +1121,25 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          ret = cpufreq_driver->init(policy);
>>          if (ret) {
>>                  pr_debug("initialization failed\n");
>> -               goto err_set_policy_cpu;
>> +               goto err_init;
>>          }
>>
>>          /* related cpus should atleast have policy->cpus */
>>          cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
>
> policy->cpus is already updated here.
>
>> +       /* Weed out impossible CPUs. */
>> +       cpumask_and(policy->related_cpus, policy->related_cpus,
>> +                       cpu_possible_mask);
>
> This has to be in a separate commit..

I meant to remove this based on your previous comment that it's the 
responsibility of the driver to pass only possible CPUs. Forgot. Will do.

>
>>          /*
>>           * affected cpus must always be the one, which are online. We aren't
>>           * managing offline cpus here.
>>           */
>>          cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
>>
>> -       if (!recover_policy) {
>> -               policy->user_policy.min = policy->min;
>> -               policy->user_policy.max = policy->max;
>> -       }
>> -
>
> Where did these go? There weren't there for fun.

We are keeping the policy intact. So, why would this be needed anymore?

>
>>          down_write(&policy->rwsem);
>>          write_lock_irqsave(&cpufreq_driver_lock, flags);
>> -       for_each_cpu(j, policy->cpus)
>> +       for_each_cpu(j, policy->related_cpus)
>>                  per_cpu(cpufreq_cpu_data, j) = policy;
>>          write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>>
>> @@ -1243,13 +1194,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>>                                       CPUFREQ_START, policy);
>>
>> -       if (!recover_policy) {
>> -               ret = cpufreq_add_dev_interface(policy, dev);
>> -               if (ret)
>> -                       goto err_out_unregister;
>> -               blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>> -                               CPUFREQ_CREATE_POLICY, policy);
>> -       }
>> +       ret = cpufreq_add_dev_interface(policy);
>> +       if (ret)
>> +               goto err_out_unregister;
>> +       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>> +                       CPUFREQ_CREATE_POLICY, policy);
>>
>>          write_lock_irqsave(&cpufreq_driver_lock, flags);
>>          list_add(&policy->policy_list, &cpufreq_policy_list);
>> @@ -1257,10 +1206,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>
>>          cpufreq_init_policy(policy);
>>
>> -       if (!recover_policy) {
>> -               policy->user_policy.policy = policy->policy;
>> -               policy->user_policy.governor = policy->governor;
>> -       }
>
> Same here.

Same here. We are keeping the policy intact. So, not needed?

>
>>          up_write(&policy->rwsem);
>>
>>          kobject_uevent(&policy->kobj, KOBJ_ADD);
>> @@ -1273,20 +1218,14 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>   err_out_unregister:
>>   err_get_freq:
>>          write_lock_irqsave(&cpufreq_driver_lock, flags);
>> -       for_each_cpu(j, policy->cpus)
>> +       for_each_cpu(j, policy->related_cpus)
>>                  per_cpu(cpufreq_cpu_data, j) = NULL;
>>          write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> -
>> +       up_write(&policy->rwsem);
>>          if (cpufreq_driver->exit)
>>                  cpufreq_driver->exit(policy);
>> -err_set_policy_cpu:
>> -       if (recover_policy) {
>> -               /* Do not leave stale fallback data behind. */
>> -               per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL;
>> -               cpufreq_policy_put_kobj(policy);
>> -       }
>> +err_init:
>>          cpufreq_policy_free(policy);
>> -
>>   nomem_out:
>>          up_read(&cpufreq_rwsem);
>>
>
> Just to mention, I am not looking at the validity of error fallback paths
> in this version. Just make sure they are all good :)

Will do. And even if it's broken, I'll fix it in a separate patch :)


>> @@ -1307,100 +1246,16 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          return __cpufreq_add_dev(dev, sif);
>>   }
>>
>> -static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
>> -                                          unsigned int old_cpu)
>> -{
>> -       struct device *cpu_dev;
>> -       int ret;
>> -
>> -       /* first sibling now owns the new sysfs dir */
>> -       cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
>> -
>> -       sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
>> -       ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
>> -       if (ret) {
>> -               pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
>> -
>> -               down_write(&policy->rwsem);
>> -               cpumask_set_cpu(old_cpu, policy->cpus);
>> -               up_write(&policy->rwsem);
>> -
>> -               ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
>> -                                       "cpufreq");
>> -
>> -               return -EINVAL;
>> -       }
>> -
>> -       return cpu_dev->id;
>> -}
>> -
>> -static int __cpufreq_remove_dev_prepare(struct device *dev,
>> -                                       struct subsys_interface *sif)
>> +static int __cpufreq_remove_dev(struct device *dev,
>> +                               struct subsys_interface *sif)
>>   {
>> -       unsigned int cpu = dev->id, cpus;
>> -       int new_cpu, ret;
>> +       unsigned int cpu = dev->id, j;
>> +       int ret = 0;
>>          unsigned long flags;
>>          struct cpufreq_policy *policy;
>>
>>          pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>>
>> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
>> -
>> -       policy = per_cpu(cpufreq_cpu_data, cpu);
>> -
>> -       /* Save the policy somewhere when doing a light-weight tear-down */
>> -       if (cpufreq_suspended)
>> -               per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
>> -
>> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> -
>> -       if (!policy) {
>> -               pr_debug("%s: No cpu_data found\n", __func__);
>> -               return -EINVAL;
>> -       }
>> -
>> -       if (has_target()) {
>> -               ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>> -               if (ret) {
>> -                       pr_err("%s: Failed to stop governor\n", __func__);
>> -                       return ret;
>> -               }
>> -       }
>> -
>> -       if (!cpufreq_driver->setpolicy)
>> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
>> -                       policy->governor->name, CPUFREQ_NAME_LEN);
>
> Where is this gone? There are several instances of code just being
> removed, this is the third one. Its really really tough to catch these
> in this big of a patch. Believe me.
>
> You have to break this patch into multiple ones, see this on how to
> break even simplest of the changes into multiple patches:
> https://lkml.org/lkml/2013/9/6/400
>
> Its just impossible to catch bugs that you might have introduced here due
> to the size of this patch. And its taking a LOT of time for me to review this.
> As I have to keep diff in one tab, new cpufreq.c in one and the old cpufreq.c
> in one and then compare..

Will do. With Srivatsa point about just making sure every patch 
compiles, it should be easy to break it up. But to answer your original 
question, it's again not needed to save/restore since we don't destroy it.

>
>> -       down_read(&policy->rwsem);
>> -       cpus = cpumask_weight(policy->cpus);
>> -       up_read(&policy->rwsem);
>> -
>> -       if (cpu != policy->cpu) {
>> -               sysfs_remove_link(&dev->kobj, "cpufreq");
>> -       } else if (cpus > 1) {
>> -               new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
>> -               if (new_cpu >= 0) {
>> -                       update_policy_cpu(policy, new_cpu);
>> -
>> -                       if (!cpufreq_suspended)
>> -                               pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
>> -                                        __func__, new_cpu, cpu);
>> -               }
>> -       } else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
>> -               cpufreq_driver->stop_cpu(policy);
>> -       }
>> -
>> -       return 0;
>> -}
>> -
>> -static int __cpufreq_remove_dev_finish(struct device *dev,
>> -                                      struct subsys_interface *sif)
>> -{
>> -       unsigned int cpu = dev->id, cpus;
>> -       int ret;
>> -       unsigned long flags;
>> -       struct cpufreq_policy *policy;
>> -
>>          read_lock_irqsave(&cpufreq_driver_lock, flags);
>>          policy = per_cpu(cpufreq_cpu_data, cpu);
>>          read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> @@ -1410,56 +1265,45 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>>                  return -EINVAL;
>>          }
>>
>> -       down_write(&policy->rwsem);
>> -       cpus = cpumask_weight(policy->cpus);
>> -
>> -       if (cpus > 1)
>> -               cpumask_clear_cpu(cpu, policy->cpus);
>> -       up_write(&policy->rwsem);
>> -
>> -       /* If cpu is last user of policy, free policy */
>> -       if (cpus == 1) {
>> -               if (has_target()) {
>> -                       ret = __cpufreq_governor(policy,
>> -                                       CPUFREQ_GOV_POLICY_EXIT);
>> -                       if (ret) {
>> -                               pr_err("%s: Failed to exit governor\n",
>> -                                      __func__);
>> -                               return ret;
>> -                       }
>> -               }
>> -
>> -               if (!cpufreq_suspended)
>> -                       cpufreq_policy_put_kobj(policy);
>> +#ifdef CONFIG_HOTPLUG_CPU
>> +       ret = cpufreq_change_policy_cpus(policy, cpu, false);
>> +#endif
>> +       if (ret)
>> +               return ret;
>
> Why is the if block kept outside of #ifdef? And should we really call
> change_*() from inside a #ifdef here?

Yeah, it can be inside.

>
>>
>> -               /*
>> -                * Perform the ->exit() even during light-weight tear-down,
>> -                * since this is a core component, and is essential for the
>> -                * subsequent light-weight ->init() to succeed.
>> -                */
>> -               if (cpufreq_driver->exit)
>> -                       cpufreq_driver->exit(policy);
>> +       if (!sif)
>> +               return 0;
>
> Why? I know that, but we should have comments to describe this ...

Will do.

>
>>
>> -               /* Remove policy from list of active policies */
>> -               write_lock_irqsave(&cpufreq_driver_lock, flags);
>> -               list_del(&policy->policy_list);
>> -               write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +       if (!cpumask_empty(policy->cpus)) {
>> +               return 0;
>> +       }
>
> You might still call this attempt a showcase of idea, but I am reviewing it
> at my full capacity.

Oh, at this point this is not an RFC at all. I want it merged. So, 
that's for the thorough review.

> And these small things just break my flow.
>
> - Don't add {} for single liner blocks
> - Add comments with proper comment style
> - Run checkpatch --strict before sending patches.

Will do.

>
>>
>> -               if (!cpufreq_suspended)
>> -                       cpufreq_policy_free(policy);
>> -       } else if (has_target()) {
>> -               ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
>> -               if (!ret)
>> -                       ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
>> +       cpufreq_dev_symlink(policy, false);
>>
>> +       if (has_target()) {
>> +               ret = __cpufreq_governor(policy,
>> +                               CPUFREQ_GOV_POLICY_EXIT);
>
> Can come in single line
>
>>                  if (ret) {
>> -                       pr_err("%s: Failed to start governor\n", __func__);
>> +                       pr_err("%s: Failed to exit governor\n",
>> +                              __func__);
>
> This too..
>
>>                          return ret;
>>                  }
>>          }
>>
>> -       per_cpu(cpufreq_cpu_data, cpu) = NULL;
>> -       return 0;
>> +       cpufreq_policy_put_kobj(policy);
>> +       if (cpufreq_driver->exit)
>> +               cpufreq_driver->exit(policy);
>> +
>> +       /* Remove policy from list of active policies */
>> +       write_lock_irqsave(&cpufreq_driver_lock, flags);
>> +       for_each_cpu(j, policy->related_cpus)
>> +               per_cpu(cpufreq_cpu_data, j) = NULL;
>> +       list_del(&policy->policy_list);
>> +       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +
>> +       cpufreq_policy_free(policy);
>> +
>> +       return ret;
>>   }
>>
>>   /**
>> @@ -1469,18 +1313,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>>    */
>>   static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>>   {
>> -       unsigned int cpu = dev->id;
>> -       int ret;
>> -
>> -       if (cpu_is_offline(cpu))
>> -               return 0;
>
> Why is it part of this commit?

Which part? Just removing the offline check? I should move it to 2/2 (or 
the split ups of it) probably.

>
>> -       ret = __cpufreq_remove_dev_prepare(dev, sif);
>> -
>> -       if (!ret)
>> -               ret = __cpufreq_remove_dev_finish(dev, sif);
>> -
>> -       return ret;
>> +       return __cpufreq_remove_dev(dev, sif);
>>   }
>>
>>   static void handle_update(struct work_struct *work)
>> @@ -2295,19 +2128,12 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>>          if (dev) {
>>                  switch (action & ~CPU_TASKS_FROZEN) {
>>                  case CPU_ONLINE:
>> +               case CPU_DOWN_FAILED:
>
> For example. This change doesn't have anything to do with this patch
> and would have been so easy to review it, if it was kept separate.
>
> Also, this would even require to wait for this complete series to make
> sense and can be merged very early.

Ok

>
>>                          __cpufreq_add_dev(dev, NULL);
>>                          break;
>>
>>                  case CPU_DOWN_PREPARE:
>> -                       __cpufreq_remove_dev_prepare(dev, NULL);
>> -                       break;
>> -
>> -               case CPU_POST_DEAD:
>> -                       __cpufreq_remove_dev_finish(dev, NULL);
>> -                       break;
>> -
>> -               case CPU_DOWN_FAILED:
>> -                       __cpufreq_add_dev(dev, NULL);
>> +                       __cpufreq_remove_dev(dev, NULL);
>
> @Srivatsa: You might want to have a look at this, remove sequence was
> separated for some purpose and I am just not able to concentrate enough
> to think of that, just too many cases running in my mind :)
>
>>                          break;
>>                  }
>>          }
>
> I am still not sure if everything will work as expected as I seriously doubt
> my reviewing capabilities. There might be corner cases which I am still
> missing.
>

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 20:25         ` Saravana Kannan
@ 2014-07-16 21:45           ` Saravana Kannan
  2014-07-17  6:24           ` Viresh Kumar
  1 sibling, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-16 21:45 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 01:25 PM, Saravana Kannan wrote:
> On 07/16/2014 01:24 AM, Viresh Kumar wrote:
>> On 16 July 2014 04:17, Saravana Kannan <skannan@codeaurora.org> wrote:
>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>>
>>> @@ -1110,9 +1092,10 @@ static int __cpufreq_add_dev(struct device
>>> *dev, struct subsys_interface *sif)
>>>
>>>   #ifdef CONFIG_SMP
>>>          /* check whether a different CPU already registered this
>>> -        * CPU because it is in the same boat. */
>>> +        * CPU because it is one of the related CPUs. */
>>>          policy = cpufreq_cpu_get(cpu);
>>> -       if (unlikely(policy)) {
>>> +       if (policy) {
>>> +               cpufreq_change_policy_cpus(policy, cpu, true);
>>
>> This is just a waste of time at boot as ... (see below)
>
> Why? Please explain.
>

Nevermind. Figured what you meant. I just need to improve the "if" check.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-10  2:37 [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
  2014-07-11  4:18 ` [PATCH v2] " Saravana Kannan
@ 2014-07-16 22:02 ` Rafael J. Wysocki
  2014-07-16 22:35   ` Saravana Kannan
  2014-07-24  3:02   ` Saravana Kannan
  1 sibling, 2 replies; 76+ messages in thread
From: Rafael J. Wysocki @ 2014-07-16 22:02 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Viresh Kumar, Todd Poynor, linux-pm, linux-kernel, linux-arm-msm,
	linux-arm-kernel

On Wednesday, July 09, 2014 07:37:30 PM Saravana Kannan wrote:
> Preliminary patch. Not tested. Just sending out to give an idea of what I'm
> looking to do. Expect a lot more simplification when it's done.
> 
> Benefits:
> * A lot more simpler code.
> * Less stability issues.
> * Suspend/resume time would improve.
> * Hotplug time would improve.
> * Sysfs file permissions would be maintained.
> * More policy settings would be maintained across suspend/resume.
> * cpufreq stats would be maintained across hotplug for all CPUs.

One problem.  The real hotplug (when the CPU actually goes away) depends on
offline removing all that stuff for it.  How are you going to address that?

Rafael


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 22:02 ` [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Rafael J. Wysocki
@ 2014-07-16 22:35   ` Saravana Kannan
  2014-07-24  3:02   ` Saravana Kannan
  1 sibling, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-16 22:35 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Viresh Kumar, Todd Poynor, linux-pm, linux-kernel, linux-arm-msm,
	linux-arm-kernel

On 07/16/2014 03:02 PM, Rafael J. Wysocki wrote:
> On Wednesday, July 09, 2014 07:37:30 PM Saravana Kannan wrote:
>> Preliminary patch. Not tested. Just sending out to give an idea of what I'm
>> looking to do. Expect a lot more simplification when it's done.
>>
>> Benefits:
>> * A lot more simpler code.
>> * Less stability issues.
>> * Suspend/resume time would improve.
>> * Hotplug time would improve.
>> * Sysfs file permissions would be maintained.
>> * More policy settings would be maintained across suspend/resume.
>> * cpufreq stats would be maintained across hotplug for all CPUs.
>
> One problem.  The real hotplug (when the CPU actually goes away) depends on
> offline removing all that stuff for it.  How are you going to address that?

policy, sysfs and kobj are just SW state inside cpufreq core. So, that 
shouldn't really affect what happens in HW when the CPU really is 
hotplugged. Can you please elaborate what you mean?

The only thing that this code assumes is that in real hotplug case too, 
that the /sys/system/devices/cpuX directory doesn't go away. I don't 
think it does. Does it?

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 19:56           ` Saravana Kannan
@ 2014-07-17  5:35             ` Viresh Kumar
  2014-07-18  3:25               ` Saravana Kannan
  0 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-17  5:35 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Srivatsa S. Bhat, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 17 July 2014 01:26, Saravana Kannan <skannan@codeaurora.org> wrote:
> On 07/16/2014 04:16 AM, Srivatsa S. Bhat wrote:

>> That is, we wanted
>> to do the kobject cleanup after releasing the hotplug lock, and POST_DEAD
>> stage was well-suited for that.

I think, this has changed in Saravana's patch, we do it in the PREPARE stage
now.

>> Commit 1aee40ac9c8 (cpufreq: Invoke __cpufreq_remove_dev_finish() after
>> releasing cpu_hotplug.lock) explains this in detail. Saravana, please take
>> a
>> look at that reasoning and ensure that your patch doesn't re-introduce
>> those
>> deadlock possibilities!
>
>
> But all of that was needed _because_ we were creating and destroying
> policies and kobjs all the time. We don't do that anymore. So, I don't think
> any of that applies. We only destroy when the cpufreq driver is
> unregistered. That's kinda of the point of this patchset.
>
> Thoughts?

See above.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 19:56             ` Saravana Kannan
@ 2014-07-17  5:51               ` Viresh Kumar
  0 siblings, 0 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-07-17  5:51 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Srivatsa S. Bhat, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 17 July 2014 01:26, Saravana Kannan <skannan@codeaurora.org> wrote:
> No it's not. All the cpu*/ directories for all possible CPUs will be there
> whether a CPU is online/offline. Which is why I also weed out impossible
> CPUs, but you said the driver shouldn't be passing impossible CPUs anyway.
> I'm just picking one directory to put the real cpufreq directory under. So,
> the code as-is is definitely not broken.

I may be wrong, and that's why I checked it with Srivatsa. He is quite familiar
with hotplug code.

Let me show the example again, its a bit tricky.

I agree with you that sysfs nodes for CPUs stay as is with offline events, but
we aren't talking about that here. On boot when we are trying to bring CPUs
online, some of them may fail to come. And in that case, as confirmed by
Srivatsa, there are no sysfs directories. This doesn't happen normally and
is a very corner case.

Still think we are wrong?

> Sure, I can pick the first cpu that comes online to decide where to put the
> real sysfs cpufreq directory, but then I have to keep track of that in a
> separate field when it's time to remove it when the cpufreq driver is
> unregistered.

It works this way right now and I don't think we maintain any separate field
here. Subsys-interface takes care of the order in which CPUs are added/
removed. And we don't have to handle that here. Just fix policy->cpu
at first cpufreq_add_dev().

> And no, we
> shouldn't be moving sysfs directory to stay with only an online directory.
> That's the thing this patch is trying to simplify.

Ahh, I really missed it in reviews. So, that's why you are looking at first
cpu of related_cpus.. Hmm, so it is quite possible that we would end up
in a case where policy->cpu wouldn't have sysfs directory created for it.

Not sure if that might cause some hickups.

@Srivatsa: ??

> So, I think using the first cpu in related CPUs will always work. If there's
> any disagreement, I think it's purely a personal preference over adding
> another field vs calling cpumask_first()

That's what the problem with this patch was, just too big to miss important
things :)

I now understood why you had these extra cpumask_first() calls.

But having said that, I still don't see a need to change the current behavior.
The important point is kobject and links are added just once, no movement.
And so, I would still like to add it to policy->cpu, i.e. the cpu which comes
first. And this happens only once while we register a driver, so no side
effects probably.

Not every platform is going through hotplug/suspend/resume and keeping
policy->cpu and sysfs node aligned atleast for them might not be that bad.
Though it will work for any cpu.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 20:25         ` Saravana Kannan
  2014-07-16 21:45           ` Saravana Kannan
@ 2014-07-17  6:24           ` Viresh Kumar
  1 sibling, 0 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-07-17  6:24 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 17 July 2014 01:55, Saravana Kannan <skannan@codeaurora.org> wrote:
> On 07/16/2014 01:24 AM, Viresh Kumar wrote:
>> Also, its not the duty of this routine to find which one is the policy cpu
>> as
>> that is done by __cpufreq_add_dev(). And so in case we need to make
>> first cpu of a mask as policy->cpu, it should be done in
>> __cpufreq_add_dev()
>> and not here. This one should just follow the orders :)
>
>
> This is a new function. And that split up might have made sense earlier. But
> not so much anymore since I'm sharing a lot of code between
> __cpufreq_add_dev() and __cpufreq_remove_dev(). There's not reason to stick
> with the previous split of up work if it doesn't apply well anymore.
>
> Please give this a second thought. Maybe it'll make more sense after I split
> this up into smaller patches.

Don't worry I am all open to good suggestions :)

So, this is why I see your idea of first cpu in related_cpus is good:
- cpufreq_dev_symlink() is called while adding/removing links now
- And we must know which CPU owns kobj in the first place, as there
is no symlink there.
- To be guaranteed about that, first-cpu logic makes sense and I can see
why you did it this way.
- If we want to do it myway, i.e. using policy->cpu there is a problem.
policy->cpu may change and we have to keep another field like:
policy->sysfs_cpu to track sysfs master. That's bad..

This is what you wanted to hear, isn't it ? :)

But, as usual I have few concerns:
- As we talked in another thread consider this scenario:
- Dual cluster system, 4 CPUs per cluster. Cluser0: cpu0-3, Cluster1: 4-7.
- CPU 4 failed to come online on boot, but it is still the first cpu of
related_cpus. It can still come online later on if we fix things somehow.
- We CAN'T guarantee that first CPU of related_cpus will have a sysfs
directory for itself, as it may have failed to comeup in the first place..
- What can we do now? Go to next CPU? Maybe yes, but then we *have*
to track policy->sysfs_cpu. Isn't it?

If that's the case, lets track sysfs_cpu and lets make it equal to policy->cpu
instead of the first-cpu logic :)

I know you don't like me much by now :) Just kidding.

>> Keep this please, it might be useful while debugging.
>
>
> Reluctant ok. We don't add/remove these files anymore in a common scenario.
> So, it's not going to be very helpful. I'll also have to do a add ? Add :
> Remove blurb for the print.

May still be useful :). For example:
- In IKS (In kernel switcher: which you may use for you b.L implementation), we
can turn IKS on/off at runtime and the only way it works is by
unregistering/registering
cpufreq driver. And this will be useful there. Also we might want to know what
went wrong while porting a cpufreq driver for a platform initially.

>>> +       dev = get_cpu_device(cpumask_first(policy->related_cpus));
>>> +       if (!dev)
>>> +               return -EINVAL;
>>
>>
>> Again, deciding which cpu is policy->cpu here is wrong. Just follow
>> orders of __cpufreq_add_dev().
>
> But that's not what I'm doing here?

Yeah, I misread that earlier. So take my comment for sysfs-cpu here :)

>> Also pcpu can't be < nr_cpu_ids, right?
>
> This is for the case when all CPUs in a cluster have been taken down. We
> don't want to send the notifier at that point. When the mask is empty, the
> function returns a value >= nr_cpu_ids to indicate an error.

I see, probably you can use cpumask_weight() earlier in the code and
reuse it here instead of checking for cpumask_first() to find if we need to do
something. Confusing ? Look at this routine again in your code and you will
come to know what I refer to. :)

>>> @@ -1053,10 +1057,8 @@ static void cpufreq_policy_put_kobj(struct
>>> cpufreq_policy *policy)
>>>          blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>>>                          CPUFREQ_REMOVE_POLICY, policy);
>>>
>>> -       down_read(&policy->rwsem);
>>>          kobj = &policy->kobj;
>>>          cmp = &policy->kobj_unregister;
>>> -       up_read(&policy->rwsem);
>>
>>
>> Why? And also, these are unrelated changes and must be added as separate
>> commits.
>
>
> This is because we call this with policy rwsem read lock held and lockdep
> throws a warning. So, it's related to this patch.

I fail to see that in the final code, Can you please enlighten me with line
numbers please?

>>> @@ -1110,9 +1092,10 @@ static int __cpufreq_add_dev(struct device *dev,
>>> +       if (policy) {
>>> +               cpufreq_change_policy_cpus(policy, cpu, true);
>>
>>
>> This is just a waste of time at boot as ... (see below)
>
> Why? Please explain.

As I said, see below :)

>>>          /* related cpus should atleast have policy->cpus */
>>>          cpumask_or(policy->related_cpus, policy->related_cpus,
>>> policy->cpus);
>>
>>
>> policy->cpus is already updated here.

--------------- HERE -------------------------

>>> -       if (!recover_policy) {
>>> -               policy->user_policy.min = policy->min;
>>> -               policy->user_policy.max = policy->max;
>>> -       }
>>
>> Where did these go? There weren't there for fun.
>
> We are keeping the policy intact. So, why would this be needed anymore?

This code would execute on !recover_policy, i.e. when we aren't recovering
policy. Also at boot.. I forgot exact details, please try 'git blame' ..

> Will do. With Srivatsa point about just making sure every patch compiles, it
> should be easy to break it up. But to answer your original question, it's
> again not needed to save/restore since we don't destroy it.

Again, check why it was required with git bisect.



Okay, another thing which I just figured out. You changed something really
really important.

We don't call ->init()/exit() anymore on suspend/resume or when we move
all CPUs out. I am quite sure this will break platforms, and actually those
which Rafael care about most :)

Again. I still feel this patch was a lot over-engineered. I agree that there
are things which we want to solve, but the first thing to solve is not moving
sysfs nodes. Which can be solved with very basic changes.

Get that right first and send patches for that. Nothing else.

You can send out improvements later once we have your really really
important fix in.

Otherwise, you will just make it tougher for this patchset to get merged.

Look at this (I don't have a link yet, but you are cc'd):
[PATCH V1 Resend 0/4] CPUFreq: Bug fixes & cleanups

A perfect example of how to get the fix in first and then improvements.

Everybody have to follow these, even Maintainers.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-17  5:35             ` Viresh Kumar
@ 2014-07-18  3:25               ` Saravana Kannan
  2014-07-18  4:19                 ` Viresh Kumar
  0 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-18  3:25 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Srivatsa S. Bhat, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/16/2014 10:35 PM, Viresh Kumar wrote:
> On 17 July 2014 01:26, Saravana Kannan <skannan@codeaurora.org> wrote:
>> On 07/16/2014 04:16 AM, Srivatsa S. Bhat wrote:
>
>>> That is, we wanted
>>> to do the kobject cleanup after releasing the hotplug lock, and POST_DEAD
>>> stage was well-suited for that.
>
> I think, this has changed in Saravana's patch, we do it in the PREPARE stage
> now.

Not really. We much never do it during hotplug. We only do it when the 
cpufreq driver unregisters.

This should be easier to see in v4, where I'm breaking up the patches 
into easier diffs.

>>> Commit 1aee40ac9c8 (cpufreq: Invoke __cpufreq_remove_dev_finish() after
>>> releasing cpu_hotplug.lock) explains this in detail. Saravana, please take
>>> a
>>> look at that reasoning and ensure that your patch doesn't re-introduce
>>> those
>>> deadlock possibilities!
>>
>>
>> But all of that was needed _because_ we were creating and destroying
>> policies and kobjs all the time. We don't do that anymore. So, I don't think
>> any of that applies. We only destroy when the cpufreq driver is
>> unregistered. That's kinda of the point of this patchset.
>>
>> Thoughts?
>
> See above.
>

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-18  3:25               ` Saravana Kannan
@ 2014-07-18  4:19                 ` Viresh Kumar
  0 siblings, 0 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-07-18  4:19 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Srivatsa S. Bhat, Rafael J . Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 18 July 2014 08:55, Saravana Kannan <skannan@codeaurora.org> wrote:
> Not really. We much never do it during hotplug. We only do it when the
> cpufreq driver unregisters.

Oh yes.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-16 22:02 ` [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Rafael J. Wysocki
  2014-07-16 22:35   ` Saravana Kannan
@ 2014-07-24  3:02   ` Saravana Kannan
  2014-07-24  5:04     ` Viresh Kumar
  1 sibling, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-24  3:02 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Viresh Kumar, Todd Poynor, linux-pm, linux-kernel, linux-arm-msm,
	linux-arm-kernel

On 07/16/2014 03:02 PM, Rafael J. Wysocki wrote:
> On Wednesday, July 09, 2014 07:37:30 PM Saravana Kannan wrote:
>> Preliminary patch. Not tested. Just sending out to give an idea of what I'm
>> looking to do. Expect a lot more simplification when it's done.
>>
>> Benefits:
>> * A lot more simpler code.
>> * Less stability issues.
>> * Suspend/resume time would improve.
>> * Hotplug time would improve.
>> * Sysfs file permissions would be maintained.
>> * More policy settings would be maintained across suspend/resume.
>> * cpufreq stats would be maintained across hotplug for all CPUs.
>
> One problem.  The real hotplug (when the CPU actually goes away) depends on
> offline removing all that stuff for it.  How are you going to address that?
>

Ok, I think I've figured this out. But one question. Is it possible to 
physically remove one CPU in a bunch of "related cpus" without also 
unplugging the rest? Put another way, can you unplug one core from a 
cluster?

It's not too hard to support that too, but if it's not a realistic case, 
I would rather not write code for that.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-24  3:02   ` Saravana Kannan
@ 2014-07-24  5:04     ` Viresh Kumar
  2014-07-24  9:12       ` skannan
  0 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-07-24  5:04 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J. Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel

On 24 July 2014 08:32, Saravana Kannan <skannan@codeaurora.org> wrote:
> Ok, I think I've figured this out. But one question. Is it possible to
> physically remove one CPU in a bunch of "related cpus" without also
> unplugging the rest? Put another way, can you unplug one core from a
> cluster?

Are we talking about doing this here:

echo 0 > /sys/devices/system/cpu/cpuX/online      ??

If yes, then what's the confusion all about? Yes we do it all the time.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-24  5:04     ` Viresh Kumar
@ 2014-07-24  9:12       ` skannan
  0 siblings, 0 replies; 76+ messages in thread
From: skannan @ 2014-07-24  9:12 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Saravana Kannan, Rafael J. Wysocki, Todd Poynor, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel


Viresh Kumar wrote:
> On 24 July 2014 08:32, Saravana Kannan <skannan@codeaurora.org> wrote:
>> Ok, I think I've figured this out. But one question. Is it possible to
>> physically remove one CPU in a bunch of "related cpus" without also
>> unplugging the rest? Put another way, can you unplug one core from a
>> cluster?
>
> Are we talking about doing this here:
>
> echo 0 > /sys/devices/system/cpu/cpuX/online      ??
>
> If yes, then what's the confusion all about? Yes we do it all the time.
>

No. That's why I said physically remove.

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 76+ messages in thread

* [PATCH v4 0/5] Simplify hotplug/suspend handling
  2014-07-15 22:47   ` [PATCH v3 0/2] Simplify hotplug/suspend handling Saravana Kannan
  2014-07-15 22:47     ` [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
  2014-07-15 22:47     ` [PATCH v3 2/2] cpufreq: Simplify and fix mutual exclusion with hotplug Saravana Kannan
@ 2014-07-25  1:07     ` Saravana Kannan
  2014-07-25  1:07       ` [PATCH v4 1/5] cpufreq: Don't wait for CPU to going offline to restart governor Saravana Kannan
                         ` (7 more replies)
  2 siblings, 8 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-25  1:07 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

Series of patchs to simplify policy/sysfs/kobj/locking handling across
suspend/resume

The following have been tested so far on a 2x2 cluster environment:
- Boot with 2 cpus and no cpufreq driver.
- mod probe driver and see cpufreq sysfs files show up only for the 1st cluster.
- Online the rest of the 2 CPUs and have files show up correctly.
- rmmod the driver and see the files go away.
- modprobe again (or back and forth multiples times) and see it work.
- suspend/resume works as expected.
- When a cluster is offline, all read/writes to its sysfs files return an error

v4
- Split it up into smaller patches
- Will handle physical CPU removal correctly
- Fixed earlier mistake of deleting code under !recover_policy
- Dropped some code refactor that reuses a lot of code between add/remove
- Dropped fix for exiting hotplug race with cpufreq driver probe/rmmod
- Dropped changes will come later once this series is acked.


Saravana Kannan (5):
  cpufreq: Don't wait for CPU to going offline to restart governor
  cpufreq: Keep track of which CPU owns the kobj/sysfs nodes separately
  cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  cpufreq: Properly handle physical CPU hot-add/hot-remove
  cpufreq: Delete dead code related to policy save/restore

 drivers/cpufreq/cpufreq.c | 238 ++++++++++++++++++----------------------------
 include/linux/cpufreq.h   |   1 +
 2 files changed, 93 insertions(+), 146 deletions(-)

-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [PATCH v4 1/5] cpufreq: Don't wait for CPU to going offline to restart governor
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
@ 2014-07-25  1:07       ` Saravana Kannan
  2014-07-31 20:47         ` Saravana Kannan
  2014-07-25  1:07       ` [PATCH v4 2/5] cpufreq: Keep track of which CPU owns the kobj/sysfs nodes separately Saravana Kannan
                         ` (6 subsequent siblings)
  7 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-25  1:07 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

There's no need to wait for the CPU going down to fully go offline to
restart the governor. We can stop the governor, change policy->cpus and
immediately restart the governor. This should reduce the time without any
CPUfreq monitoring and also help future patches with simplifying the code.

Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---
 drivers/cpufreq/cpufreq.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 62259d2..ee0eb7b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1390,6 +1390,21 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
 		cpufreq_driver->stop_cpu(policy);
 	}
 
+	down_write(&policy->rwsem);
+	cpumask_clear_cpu(cpu, policy->cpus);
+	up_write(&policy->rwsem);
+
+	if (cpus > 1 && has_target()) {
+		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
+		if (!ret)
+			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+
+		if (ret) {
+			pr_err("%s: Failed to start governor\n", __func__);
+			return ret;
+		}
+	}
+
 	return 0;
 }
 
@@ -1410,15 +1425,12 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 		return -EINVAL;
 	}
 
-	down_write(&policy->rwsem);
+	down_read(&policy->rwsem);
 	cpus = cpumask_weight(policy->cpus);
-
-	if (cpus > 1)
-		cpumask_clear_cpu(cpu, policy->cpus);
-	up_write(&policy->rwsem);
+	up_read(&policy->rwsem);
 
 	/* If cpu is last user of policy, free policy */
-	if (cpus == 1) {
+	if (cpus == 0) {
 		if (has_target()) {
 			ret = __cpufreq_governor(policy,
 					CPUFREQ_GOV_POLICY_EXIT);
@@ -1447,15 +1459,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 
 		if (!cpufreq_suspended)
 			cpufreq_policy_free(policy);
-	} else if (has_target()) {
-		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
-		if (!ret)
-			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-
-		if (ret) {
-			pr_err("%s: Failed to start governor\n", __func__);
-			return ret;
-		}
 	}
 
 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v4 2/5] cpufreq: Keep track of which CPU owns the kobj/sysfs nodes separately
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
  2014-07-25  1:07       ` [PATCH v4 1/5] cpufreq: Don't wait for CPU to going offline to restart governor Saravana Kannan
@ 2014-07-25  1:07       ` Saravana Kannan
  2014-08-07  9:02         ` Viresh Kumar
  2014-07-25  1:07       ` [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
                         ` (5 subsequent siblings)
  7 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-25  1:07 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

The ownership of the kobj doesn't need to match policy->cpu or change as
frequently. So, keep track of it separately.

Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---
 drivers/cpufreq/cpufreq.c | 4 +++-
 include/linux/cpufreq.h   | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ee0eb7b..af4f291 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -868,7 +868,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 	for_each_cpu(j, policy->cpus) {
 		struct device *cpu_dev;
 
-		if (j == policy->cpu)
+		if (j == policy->kobj_cpu)
 			continue;
 
 		pr_debug("Adding link for CPU: %u\n", j);
@@ -917,6 +917,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
 			goto err_out_kobj_put;
 	}
 
+	policy->kobj_cpu = policy->cpu;
 	ret = cpufreq_add_dev_symlink(policy);
 	if (ret)
 		goto err_out_kobj_put;
@@ -1330,6 +1331,7 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
 
 		return -EINVAL;
 	}
+	policy->kobj_cpu = cpu_dev->id;
 
 	return cpu_dev->id;
 }
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ec4112d..91c2e38 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -68,6 +68,7 @@ struct cpufreq_policy {
 	unsigned int		cpu;    /* cpu nr of CPU managing this policy */
 	unsigned int		last_cpu; /* cpu nr of previous CPU that managed
 					   * this policy */
+	unsigned int		kobj_cpu; /* Tracks which CPU own the kobj */
 	struct clk		*clk;
 	struct cpufreq_cpuinfo	cpuinfo;/* see above */
 
-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
  2014-07-25  1:07       ` [PATCH v4 1/5] cpufreq: Don't wait for CPU to going offline to restart governor Saravana Kannan
  2014-07-25  1:07       ` [PATCH v4 2/5] cpufreq: Keep track of which CPU owns the kobj/sysfs nodes separately Saravana Kannan
@ 2014-07-25  1:07       ` Saravana Kannan
  2014-07-31 21:56         ` Rafael J. Wysocki
  2014-08-07 10:48         ` Viresh Kumar
  2014-07-25  1:07       ` [PATCH v4 4/5] cpufreq: Properly handle physical CPU hot-add/hot-remove Saravana Kannan
                         ` (4 subsequent siblings)
  7 siblings, 2 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-25  1:07 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

This patch simplifies a lot of the hotplug/suspend code by not
adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
the cpufreq directory and policy in place irrespective of whether the CPUs
are ONLINE/OFFLINE.

Leaving the policy, sysfs and kobject in place also brings these additional
benefits:
* Faster suspend/resume
* Faster hotplug
* Sysfs file permissions maintained across hotplug
* Policy settings and governor tunables maintained across hotplug
* Cpufreq stats would be maintained across hotplug for all CPUs and can be
  queried even after CPU goes OFFLINE

Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---
 drivers/cpufreq/cpufreq.c | 83 ++++++++++++++++-------------------------------
 1 file changed, 28 insertions(+), 55 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index af4f291..d9fc6e5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -865,7 +865,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 	unsigned int j;
 	int ret = 0;
 
-	for_each_cpu(j, policy->cpus) {
+	for_each_cpu(j, policy->related_cpus) {
 		struct device *cpu_dev;
 
 		if (j == policy->kobj_cpu)
@@ -968,7 +968,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 	int ret = 0;
 	unsigned long flags;
 
-	if (has_target()) {
+	if (cpumask_weight(policy->cpus) && has_target()) {
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 		if (ret) {
 			pr_err("%s: Failed to stop governor\n", __func__);
@@ -997,7 +997,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 		}
 	}
 
-	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
+	return 0;
 }
 #endif
 
@@ -1100,9 +1100,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	struct cpufreq_policy *policy;
 	unsigned long flags;
 	bool recover_policy = cpufreq_suspended;
-#ifdef CONFIG_HOTPLUG_CPU
-	struct cpufreq_policy *tpolicy;
-#endif
 
 	if (cpu_is_offline(cpu))
 		return 0;
@@ -1113,28 +1110,22 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	/* check whether a different CPU already registered this
 	 * CPU because it is in the same boat. */
 	policy = cpufreq_cpu_get(cpu);
-	if (unlikely(policy)) {
+	if (policy) {
+		if (!cpumask_test_cpu(cpu, policy->cpus))
+			ret = cpufreq_add_policy_cpu(policy, cpu, dev);
+		else
+			ret = 0;
 		cpufreq_cpu_put(policy);
-		return 0;
+		return ret;
 	}
 #endif
 
 	if (!down_read_trylock(&cpufreq_rwsem))
 		return 0;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* Check if this cpu was hot-unplugged earlier and has siblings */
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
-		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
-			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
-			up_read(&cpufreq_rwsem);
-			return ret;
-		}
-	}
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-#endif
+	/* If we get this far, this is the first time we are adding the
+	 * policy */
+	recover_policy = false;
 
 	/*
 	 * Restore the saved policy when doing light-weight init and fall back
@@ -1189,7 +1180,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	down_write(&policy->rwsem);
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
-	for_each_cpu(j, policy->cpus)
+	for_each_cpu(j, policy->related_cpus)
 		per_cpu(cpufreq_cpu_data, j) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
@@ -1274,7 +1265,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 err_out_unregister:
 err_get_freq:
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
-	for_each_cpu(j, policy->cpus)
+	for_each_cpu(j, policy->related_cpus)
 		per_cpu(cpufreq_cpu_data, j) = NULL;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
@@ -1340,21 +1331,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
 					struct subsys_interface *sif)
 {
 	unsigned int cpu = dev->id, cpus;
-	int new_cpu, ret;
+	int new_cpu, ret = 0;
 	unsigned long flags;
 	struct cpufreq_policy *policy;
 
 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
-	write_lock_irqsave(&cpufreq_driver_lock, flags);
-
+	read_lock_irqsave(&cpufreq_driver_lock, flags);
 	policy = per_cpu(cpufreq_cpu_data, cpu);
-
-	/* Save the policy somewhere when doing a light-weight tear-down */
-	if (cpufreq_suspended)
-		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
-
-	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
 	if (!policy) {
 		pr_debug("%s: No cpu_data found\n", __func__);
@@ -1369,24 +1354,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
 		}
 	}
 
-	if (!cpufreq_driver->setpolicy)
-		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
-			policy->governor->name, CPUFREQ_NAME_LEN);
-
 	down_read(&policy->rwsem);
 	cpus = cpumask_weight(policy->cpus);
 	up_read(&policy->rwsem);
 
-	if (cpu != policy->cpu) {
-		sysfs_remove_link(&dev->kobj, "cpufreq");
-	} else if (cpus > 1) {
-		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
-		if (new_cpu >= 0) {
-			update_policy_cpu(policy, new_cpu);
-
-			if (!cpufreq_suspended)
-				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
-					 __func__, new_cpu, cpu);
+	if (cpus > 1) {
+		if (cpu == policy->cpu) {
+			new_cpu = cpumask_any_but(policy->cpus, cpu);
+			if (new_cpu >= 0)
+				update_policy_cpu(policy, new_cpu);
 		}
 	} else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
 		cpufreq_driver->stop_cpu(policy);
@@ -1431,6 +1407,9 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 	cpus = cpumask_weight(policy->cpus);
 	up_read(&policy->rwsem);
 
+	if (cpu != policy->kobj_cpu)
+		sysfs_remove_link(&dev->kobj, "cpufreq");
+
 	/* If cpu is last user of policy, free policy */
 	if (cpus == 0) {
 		if (has_target()) {
@@ -1475,12 +1454,10 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
 	unsigned int cpu = dev->id;
-	int ret;
-
-	if (cpu_is_offline(cpu))
-		return 0;
+	int ret = 0;
 
-	ret = __cpufreq_remove_dev_prepare(dev, sif);
+	if (cpu_online(cpu))
+		ret = __cpufreq_remove_dev_prepare(dev, sif);
 
 	if (!ret)
 		ret = __cpufreq_remove_dev_finish(dev, sif);
@@ -2307,10 +2284,6 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 			__cpufreq_remove_dev_prepare(dev, NULL);
 			break;
 
-		case CPU_POST_DEAD:
-			__cpufreq_remove_dev_finish(dev, NULL);
-			break;
-
 		case CPU_DOWN_FAILED:
 			__cpufreq_add_dev(dev, NULL);
 			break;
-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v4 4/5] cpufreq: Properly handle physical CPU hot-add/hot-remove
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
                         ` (2 preceding siblings ...)
  2014-07-25  1:07       ` [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
@ 2014-07-25  1:07       ` Saravana Kannan
  2014-08-07 11:02         ` Viresh Kumar
  2014-07-25  1:07       ` [PATCH v4 5/5] cpufreq: Delete dead code related to policy save/restore Saravana Kannan
                         ` (3 subsequent siblings)
  7 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-25  1:07 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

When CPUs are physically added/removed, its cpuX sysfs directory is
dynamically added/removed. To handle this correctly, the cpufreq sysfs
nodes also need to be added/removed dynamically.

Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---
 drivers/cpufreq/cpufreq.c | 46 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index d9fc6e5..97edf05 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -41,6 +41,7 @@ static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback);
 static DEFINE_RWLOCK(cpufreq_driver_lock);
 DEFINE_MUTEX(cpufreq_governor_lock);
 static LIST_HEAD(cpufreq_policy_list);
+static cpumask_t has_symlink;
 
 /* This one keeps track of the previously set governor of a removed CPU */
 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
@@ -865,7 +866,10 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 	unsigned int j;
 	int ret = 0;
 
-	for_each_cpu(j, policy->related_cpus) {
+	/* Only some of the related CPUs might be present. So, create
+	 * symlinks only for those.
+	 */
+	for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) {
 		struct device *cpu_dev;
 
 		if (j == policy->kobj_cpu)
@@ -877,6 +881,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 					"cpufreq");
 		if (ret)
 			break;
+		cpumask_set_cpu(j, &has_symlink);
 	}
 	return ret;
 }
@@ -1101,9 +1106,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	unsigned long flags;
 	bool recover_policy = cpufreq_suspended;
 
-	if (cpu_is_offline(cpu))
-		return 0;
-
 	pr_debug("adding CPU %u\n", cpu);
 
 #ifdef CONFIG_SMP
@@ -1111,7 +1113,19 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	 * CPU because it is in the same boat. */
 	policy = cpufreq_cpu_get(cpu);
 	if (policy) {
-		if (!cpumask_test_cpu(cpu, policy->cpus))
+		/* If a CPU gets physically plugged in after one or more of
+		 * its related CPUs are ONLINE, we need to create a symlink
+		 * for it since it wouldn't have been created when the policy
+		 * was initialized. Do this as soon as it's plugged in.
+		 */
+		if (sif && !cpumask_test_cpu(cpu, &has_symlink)) {
+			ret = sysfs_create_link(&dev->kobj, &policy->kobj,
+						"cpufreq");
+			if (!ret)
+				cpumask_set_cpu(cpu, &has_symlink);
+		}
+
+		if (!cpumask_test_cpu(cpu, policy->cpus) && cpu_online(cpu))
 			ret = cpufreq_add_policy_cpu(policy, cpu, dev);
 		else
 			ret = 0;
@@ -1120,6 +1134,9 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	}
 #endif
 
+	if (cpu_is_offline(cpu))
+		return 0;
+
 	if (!down_read_trylock(&cpufreq_rwsem))
 		return 0;
 
@@ -1303,25 +1320,24 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
 					   unsigned int old_cpu)
 {
 	struct device *cpu_dev;
+	unsigned int new_cpu;
 	int ret;
 
 	/* first sibling now owns the new sysfs dir */
-	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
+	for_each_cpu_and(new_cpu, policy->related_cpus, cpu_present_mask)
+		if (new_cpu != old_cpu)
+			break;
+	cpu_dev = get_cpu_device(new_cpu);
 
 	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
 	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
 	if (ret) {
 		pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
-
-		down_write(&policy->rwsem);
-		cpumask_set_cpu(old_cpu, policy->cpus);
-		up_write(&policy->rwsem);
-
 		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
 					"cpufreq");
-
 		return -EINVAL;
 	}
+	cpumask_clear_cpu(new_cpu, &has_symlink);
 	policy->kobj_cpu = cpu_dev->id;
 
 	return cpu_dev->id;
@@ -1407,8 +1423,12 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 	cpus = cpumask_weight(policy->cpus);
 	up_read(&policy->rwsem);
 
-	if (cpu != policy->kobj_cpu)
+	if (cpu != policy->kobj_cpu) {
 		sysfs_remove_link(&dev->kobj, "cpufreq");
+		cpumask_clear_cpu(cpu, &has_symlink);
+	} else {
+		cpufreq_nominate_new_policy_cpu(policy, cpu);
+	}
 
 	/* If cpu is last user of policy, free policy */
 	if (cpus == 0) {
-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v4 5/5] cpufreq: Delete dead code related to policy save/restore
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
                         ` (3 preceding siblings ...)
  2014-07-25  1:07       ` [PATCH v4 4/5] cpufreq: Properly handle physical CPU hot-add/hot-remove Saravana Kannan
@ 2014-07-25  1:07       ` Saravana Kannan
  2014-08-07 11:06         ` Viresh Kumar
  2014-07-29  5:52       ` [PATCH v4 0/5] Simplify hotplug/suspend handling skannan
                         ` (2 subsequent siblings)
  7 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-07-25  1:07 UTC (permalink / raw)
  To: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat
  Cc: linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd

Since we no longer destroy/realloc policy during logical hotplug, most of
the policy save/restore code is dead code that doesn't get executed. Remove
those.

Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---
 drivers/cpufreq/cpufreq.c | 82 +++++++++--------------------------------------
 1 file changed, 15 insertions(+), 67 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 97edf05..b635e0e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -37,7 +37,6 @@
  */
 static struct cpufreq_driver *cpufreq_driver;
 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
-static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback);
 static DEFINE_RWLOCK(cpufreq_driver_lock);
 DEFINE_MUTEX(cpufreq_governor_lock);
 static LIST_HEAD(cpufreq_policy_list);
@@ -1006,22 +1005,6 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 }
 #endif
 
-static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-	unsigned long flags;
-
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-
-	policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
-
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-	policy->governor = NULL;
-
-	return policy;
-}
-
 static struct cpufreq_policy *cpufreq_policy_alloc(void)
 {
 	struct cpufreq_policy *policy;
@@ -1104,7 +1087,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	int ret = -ENOMEM;
 	struct cpufreq_policy *policy;
 	unsigned long flags;
-	bool recover_policy = cpufreq_suspended;
 
 	pr_debug("adding CPU %u\n", cpu);
 
@@ -1142,31 +1124,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	/* If we get this far, this is the first time we are adding the
 	 * policy */
-	recover_policy = false;
-
-	/*
-	 * Restore the saved policy when doing light-weight init and fall back
-	 * to the full init if that fails.
-	 */
-	policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
-	if (!policy) {
-		recover_policy = false;
-		policy = cpufreq_policy_alloc();
-		if (!policy)
-			goto nomem_out;
-	}
-
-	/*
-	 * In the resume path, since we restore a saved policy, the assignment
-	 * to policy->cpu is like an update of the existing policy, rather than
-	 * the creation of a brand new one. So we need to perform this update
-	 * by invoking update_policy_cpu().
-	 */
-	if (recover_policy && cpu != policy->cpu)
-		update_policy_cpu(policy, cpu);
-	else
-		policy->cpu = cpu;
+	policy = cpufreq_policy_alloc();
+	if (!policy)
+		goto nomem_out;
 
+	policy->cpu = cpu;
 	cpumask_copy(policy->cpus, cpumask_of(cpu));
 
 	init_completion(&policy->kobj_unregister);
@@ -1190,10 +1152,8 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	 */
 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
 
-	if (!recover_policy) {
-		policy->user_policy.min = policy->min;
-		policy->user_policy.max = policy->max;
-	}
+	policy->user_policy.min = policy->min;
+	policy->user_policy.max = policy->max;
 
 	down_write(&policy->rwsem);
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
@@ -1252,13 +1212,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				     CPUFREQ_START, policy);
 
-	if (!recover_policy) {
-		ret = cpufreq_add_dev_interface(policy, dev);
-		if (ret)
-			goto err_out_unregister;
-		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-				CPUFREQ_CREATE_POLICY, policy);
-	}
+	ret = cpufreq_add_dev_interface(policy, dev);
+	if (ret)
+		goto err_out_unregister;
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_CREATE_POLICY, policy);
 
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 	list_add(&policy->policy_list, &cpufreq_policy_list);
@@ -1266,10 +1224,8 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	cpufreq_init_policy(policy);
 
-	if (!recover_policy) {
-		policy->user_policy.policy = policy->policy;
-		policy->user_policy.governor = policy->governor;
-	}
+	policy->user_policy.policy = policy->policy;
+	policy->user_policy.governor = policy->governor;
 	up_write(&policy->rwsem);
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
@@ -1289,13 +1245,7 @@ err_get_freq:
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
 err_set_policy_cpu:
-	if (recover_policy) {
-		/* Do not leave stale fallback data behind. */
-		per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL;
-		cpufreq_policy_put_kobj(policy);
-	}
 	cpufreq_policy_free(policy);
-
 nomem_out:
 	up_read(&cpufreq_rwsem);
 
@@ -1442,8 +1392,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 			}
 		}
 
-		if (!cpufreq_suspended)
-			cpufreq_policy_put_kobj(policy);
+		cpufreq_policy_put_kobj(policy);
 
 		/*
 		 * Perform the ->exit() even during light-weight tear-down,
@@ -1458,8 +1407,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
 		list_del(&policy->policy_list);
 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-		if (!cpufreq_suspended)
-			cpufreq_policy_free(policy);
+		cpufreq_policy_free(policy);
 	}
 
 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
-- 
1.8.2.1

The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 0/5] Simplify hotplug/suspend handling
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
                         ` (4 preceding siblings ...)
  2014-07-25  1:07       ` [PATCH v4 5/5] cpufreq: Delete dead code related to policy save/restore Saravana Kannan
@ 2014-07-29  5:52       ` skannan
  2014-07-30  0:29       ` Rafael J. Wysocki
  2014-10-16  8:53       ` Viresh Kumar
  7 siblings, 0 replies; 76+ messages in thread
From: skannan @ 2014-07-29  5:52 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat,
	linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Saravana Kannan, Stephen Boyd


Saravana Kannan wrote:
> Series of patchs to simplify policy/sysfs/kobj/locking handling across
> suspend/resume
>

Bump.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 0/5] Simplify hotplug/suspend handling
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
                         ` (5 preceding siblings ...)
  2014-07-29  5:52       ` [PATCH v4 0/5] Simplify hotplug/suspend handling skannan
@ 2014-07-30  0:29       ` Rafael J. Wysocki
  2014-07-31 20:25         ` Saravana Kannan
  2014-08-07  6:04         ` skannan
  2014-10-16  8:53       ` Viresh Kumar
  7 siblings, 2 replies; 76+ messages in thread
From: Rafael J. Wysocki @ 2014-07-30  0:29 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Viresh Kumar, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	linux-kernel, linux-arm-msm, linux-arm-kernel, Stephen Boyd

On Thursday, July 24, 2014 06:07:23 PM Saravana Kannan wrote:
> Series of patchs to simplify policy/sysfs/kobj/locking handling across
> suspend/resume

I need someone to review this series for me.  Viresh or Srivatsa, preferably
both.

Thanks!

> The following have been tested so far on a 2x2 cluster environment:
> - Boot with 2 cpus and no cpufreq driver.
> - mod probe driver and see cpufreq sysfs files show up only for the 1st cluster.
> - Online the rest of the 2 CPUs and have files show up correctly.
> - rmmod the driver and see the files go away.
> - modprobe again (or back and forth multiples times) and see it work.
> - suspend/resume works as expected.
> - When a cluster is offline, all read/writes to its sysfs files return an error
> 
> v4
> - Split it up into smaller patches
> - Will handle physical CPU removal correctly
> - Fixed earlier mistake of deleting code under !recover_policy
> - Dropped some code refactor that reuses a lot of code between add/remove
> - Dropped fix for exiting hotplug race with cpufreq driver probe/rmmod
> - Dropped changes will come later once this series is acked.
> 
> 
> Saravana Kannan (5):
>   cpufreq: Don't wait for CPU to going offline to restart governor
>   cpufreq: Keep track of which CPU owns the kobj/sysfs nodes separately
>   cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
>   cpufreq: Properly handle physical CPU hot-add/hot-remove
>   cpufreq: Delete dead code related to policy save/restore
> 
>  drivers/cpufreq/cpufreq.c | 238 ++++++++++++++++++----------------------------
>  include/linux/cpufreq.h   |   1 +
>  2 files changed, 93 insertions(+), 146 deletions(-)
> 
> 

-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 0/5] Simplify hotplug/suspend handling
  2014-07-30  0:29       ` Rafael J. Wysocki
@ 2014-07-31 20:25         ` Saravana Kannan
  2014-08-07  6:04         ` skannan
  1 sibling, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-31 20:25 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Viresh Kumar, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	linux-kernel, linux-arm-msm, linux-arm-kernel, Stephen Boyd

On 07/29/2014 05:29 PM, Rafael J. Wysocki wrote:
> On Thursday, July 24, 2014 06:07:23 PM Saravana Kannan wrote:
>> Series of patchs to simplify policy/sysfs/kobj/locking handling across
>> suspend/resume
>
> I need someone to review this series for me.  Viresh or Srivatsa, preferably
> both.

Viresh/Srivatsa,

Bump. Can you guys please review this?

-Saravana


-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 1/5] cpufreq: Don't wait for CPU to going offline to restart governor
  2014-07-25  1:07       ` [PATCH v4 1/5] cpufreq: Don't wait for CPU to going offline to restart governor Saravana Kannan
@ 2014-07-31 20:47         ` Saravana Kannan
  0 siblings, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-31 20:47 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat,
	linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 07/24/2014 06:07 PM, Saravana Kannan wrote:
> There's no need to wait for the CPU going down to fully go offline to
> restart the governor. We can stop the governor, change policy->cpus and
> immediately restart the governor. This should reduce the time without any
> CPUfreq monitoring and also help future patches with simplifying the code.
>
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>   drivers/cpufreq/cpufreq.c | 33 ++++++++++++++++++---------------
>   1 file changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 62259d2..ee0eb7b 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1390,6 +1390,21 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>   		cpufreq_driver->stop_cpu(policy);
>   	}
>
> +	down_write(&policy->rwsem);
> +	cpumask_clear_cpu(cpu, policy->cpus);
> +	up_write(&policy->rwsem);
> +
> +	if (cpus > 1 && has_target()) {
> +		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
> +		if (!ret)
> +			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> +
> +		if (ret) {
> +			pr_err("%s: Failed to start governor\n", __func__);
> +			return ret;
> +		}
> +	}
> +
>   	return 0;
>   }
>
> @@ -1410,15 +1425,12 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>   		return -EINVAL;
>   	}
>
> -	down_write(&policy->rwsem);
> +	down_read(&policy->rwsem);
>   	cpus = cpumask_weight(policy->cpus);
> -
> -	if (cpus > 1)
> -		cpumask_clear_cpu(cpu, policy->cpus);
> -	up_write(&policy->rwsem);
> +	up_read(&policy->rwsem);
>
>   	/* If cpu is last user of policy, free policy */
> -	if (cpus == 1) {
> +	if (cpus == 0) {
>   		if (has_target()) {
>   			ret = __cpufreq_governor(policy,
>   					CPUFREQ_GOV_POLICY_EXIT);
> @@ -1447,15 +1459,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>
>   		if (!cpufreq_suspended)
>   			cpufreq_policy_free(policy);
> -	} else if (has_target()) {
> -		ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
> -		if (!ret)
> -			ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
> -
> -		if (ret) {
> -			pr_err("%s: Failed to start governor\n", __func__);
> -			return ret;
> -		}
>   	}
>
>   	per_cpu(cpufreq_cpu_data, cpu) = NULL;
>

This patch should also fix another issue reported in-house recently. 
cpufreq_update_policy() fails for an ONLINE CPU. This is the scenario 
that triggers it:

Thead A
- Cluster with 4 CPUs
- CPU3 is going down.
- Governor is STOPed.
- CPU3 is removed, but governor not STARTed yet.

Thread B
- get_online_cpus()
- We cross this hotplug barrier since since POST_DEAD is sent AFTER 
releasing the hotplug lock.
- cpufreq_update_policy(CPU0) does a bunch of stuff
- Then sends GOV_LIMITS to governor.
- governor is currently STOPed, so it returns an error and 
cpufreq_update_policy() fails.

Thread A
- In POST_DEAD notifier, STARTs the governor again.

So, a perfectly valid call (doing get_online_cpus() and checking for 
cpu_online() on a CPU before calling) fails.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-25  1:07       ` [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
@ 2014-07-31 21:56         ` Rafael J. Wysocki
  2014-07-31 22:15           ` Saravana Kannan
                             ` (2 more replies)
  2014-08-07 10:48         ` Viresh Kumar
  1 sibling, 3 replies; 76+ messages in thread
From: Rafael J. Wysocki @ 2014-07-31 21:56 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Viresh Kumar, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	linux-kernel, linux-arm-msm, linux-arm-kernel, Stephen Boyd

On Thursday, July 24, 2014 06:07:26 PM Saravana Kannan wrote:
> This patch simplifies a lot of the hotplug/suspend code by not
> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
> the cpufreq directory and policy in place irrespective of whether the CPUs
> are ONLINE/OFFLINE.

I'm still quite unsure how this is going to work with the real CPU hot-remove
that makes the entire sysfs cpu directories go away.  Can you please explain
that?

> Leaving the policy, sysfs and kobject in place also brings these additional
> benefits:
> * Faster suspend/resume
> * Faster hotplug
> * Sysfs file permissions maintained across hotplug
> * Policy settings and governor tunables maintained across hotplug
> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>   queried even after CPU goes OFFLINE
> 
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>  drivers/cpufreq/cpufreq.c | 83 ++++++++++++++++-------------------------------
>  1 file changed, 28 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index af4f291..d9fc6e5 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -865,7 +865,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>  	unsigned int j;
>  	int ret = 0;
>  
> -	for_each_cpu(j, policy->cpus) {
> +	for_each_cpu(j, policy->related_cpus) {
>  		struct device *cpu_dev;
>  
>  		if (j == policy->kobj_cpu)
> @@ -968,7 +968,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>  	int ret = 0;
>  	unsigned long flags;
>  
> -	if (has_target()) {
> +	if (cpumask_weight(policy->cpus) && has_target()) {
>  		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>  		if (ret) {
>  			pr_err("%s: Failed to stop governor\n", __func__);
> @@ -997,7 +997,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>  		}
>  	}
>  
> -	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
> +	return 0;
>  }
>  #endif
>  
> @@ -1100,9 +1100,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  	struct cpufreq_policy *policy;
>  	unsigned long flags;
>  	bool recover_policy = cpufreq_suspended;
> -#ifdef CONFIG_HOTPLUG_CPU
> -	struct cpufreq_policy *tpolicy;
> -#endif
>  
>  	if (cpu_is_offline(cpu))
>  		return 0;
> @@ -1113,28 +1110,22 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  	/* check whether a different CPU already registered this
>  	 * CPU because it is in the same boat. */
>  	policy = cpufreq_cpu_get(cpu);
> -	if (unlikely(policy)) {
> +	if (policy) {
> +		if (!cpumask_test_cpu(cpu, policy->cpus))
> +			ret = cpufreq_add_policy_cpu(policy, cpu, dev);
> +		else
> +			ret = 0;
>  		cpufreq_cpu_put(policy);
> -		return 0;
> +		return ret;
>  	}
>  #endif
>  
>  	if (!down_read_trylock(&cpufreq_rwsem))
>  		return 0;
>  
> -#ifdef CONFIG_HOTPLUG_CPU
> -	/* Check if this cpu was hot-unplugged earlier and has siblings */
> -	read_lock_irqsave(&cpufreq_driver_lock, flags);
> -	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
> -		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
> -			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
> -			up_read(&cpufreq_rwsem);
> -			return ret;
> -		}
> -	}
> -	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -#endif
> +	/* If we get this far, this is the first time we are adding the
> +	 * policy */
> +	recover_policy = false;
>  
>  	/*
>  	 * Restore the saved policy when doing light-weight init and fall back
> @@ -1189,7 +1180,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  
>  	down_write(&policy->rwsem);
>  	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -	for_each_cpu(j, policy->cpus)
> +	for_each_cpu(j, policy->related_cpus)
>  		per_cpu(cpufreq_cpu_data, j) = policy;
>  	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>  
> @@ -1274,7 +1265,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  err_out_unregister:
>  err_get_freq:
>  	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -	for_each_cpu(j, policy->cpus)
> +	for_each_cpu(j, policy->related_cpus)
>  		per_cpu(cpufreq_cpu_data, j) = NULL;
>  	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>  
> @@ -1340,21 +1331,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>  					struct subsys_interface *sif)
>  {
>  	unsigned int cpu = dev->id, cpus;
> -	int new_cpu, ret;
> +	int new_cpu, ret = 0;
>  	unsigned long flags;
>  	struct cpufreq_policy *policy;
>  
>  	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>  
> -	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> +	read_lock_irqsave(&cpufreq_driver_lock, flags);
>  	policy = per_cpu(cpufreq_cpu_data, cpu);
> -
> -	/* Save the policy somewhere when doing a light-weight tear-down */
> -	if (cpufreq_suspended)
> -		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
> -
> -	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>  
>  	if (!policy) {
>  		pr_debug("%s: No cpu_data found\n", __func__);
> @@ -1369,24 +1354,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>  		}
>  	}
>  
> -	if (!cpufreq_driver->setpolicy)
> -		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
> -			policy->governor->name, CPUFREQ_NAME_LEN);
> -
>  	down_read(&policy->rwsem);
>  	cpus = cpumask_weight(policy->cpus);
>  	up_read(&policy->rwsem);
>  
> -	if (cpu != policy->cpu) {
> -		sysfs_remove_link(&dev->kobj, "cpufreq");
> -	} else if (cpus > 1) {
> -		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
> -		if (new_cpu >= 0) {
> -			update_policy_cpu(policy, new_cpu);
> -
> -			if (!cpufreq_suspended)
> -				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
> -					 __func__, new_cpu, cpu);
> +	if (cpus > 1) {
> +		if (cpu == policy->cpu) {
> +			new_cpu = cpumask_any_but(policy->cpus, cpu);
> +			if (new_cpu >= 0)
> +				update_policy_cpu(policy, new_cpu);
>  		}
>  	} else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
>  		cpufreq_driver->stop_cpu(policy);
> @@ -1431,6 +1407,9 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>  	cpus = cpumask_weight(policy->cpus);
>  	up_read(&policy->rwsem);
>  
> +	if (cpu != policy->kobj_cpu)
> +		sysfs_remove_link(&dev->kobj, "cpufreq");
> +
>  	/* If cpu is last user of policy, free policy */
>  	if (cpus == 0) {
>  		if (has_target()) {
> @@ -1475,12 +1454,10 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>  static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>  {
>  	unsigned int cpu = dev->id;
> -	int ret;
> -
> -	if (cpu_is_offline(cpu))
> -		return 0;
> +	int ret = 0;
>  
> -	ret = __cpufreq_remove_dev_prepare(dev, sif);
> +	if (cpu_online(cpu))
> +		ret = __cpufreq_remove_dev_prepare(dev, sif);
>  
>  	if (!ret)
>  		ret = __cpufreq_remove_dev_finish(dev, sif);
> @@ -2307,10 +2284,6 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>  			__cpufreq_remove_dev_prepare(dev, NULL);
>  			break;
>  
> -		case CPU_POST_DEAD:
> -			__cpufreq_remove_dev_finish(dev, NULL);
> -			break;
> -
>  		case CPU_DOWN_FAILED:
>  			__cpufreq_add_dev(dev, NULL);
>  			break;
> 

-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-31 21:56         ` Rafael J. Wysocki
@ 2014-07-31 22:15           ` Saravana Kannan
  2014-07-31 23:48           ` Saravana Kannan
  2014-08-07 10:51           ` Viresh Kumar
  2 siblings, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-31 22:15 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Viresh Kumar, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	linux-kernel, linux-arm-msm, linux-arm-kernel, Stephen Boyd

On 07/31/2014 02:56 PM, Rafael J. Wysocki wrote:
> On Thursday, July 24, 2014 06:07:26 PM Saravana Kannan wrote:
>> This patch simplifies a lot of the hotplug/suspend code by not
>> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
>> the cpufreq directory and policy in place irrespective of whether the CPUs
>> are ONLINE/OFFLINE.
>
> I'm still quite unsure how this is going to work with the real CPU hot-remove
> that makes the entire sysfs cpu directories go away.  Can you please explain
> that?

With this patch it won't work correctly. 4/5 fixes it to work correctly. 
Just keeping them separate to make it easy to review.

We can squash 3/5 and 4/5 later if people prefer it that way.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-31 21:56         ` Rafael J. Wysocki
  2014-07-31 22:15           ` Saravana Kannan
@ 2014-07-31 23:48           ` Saravana Kannan
  2014-08-07 10:51           ` Viresh Kumar
  2 siblings, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-07-31 23:48 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Viresh Kumar, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	linux-kernel, linux-arm-msm, linux-arm-kernel, Stephen Boyd

On 07/31/2014 02:56 PM, Rafael J. Wysocki wrote:
> On Thursday, July 24, 2014 06:07:26 PM Saravana Kannan wrote:
>> This patch simplifies a lot of the hotplug/suspend code by not
>> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
>> the cpufreq directory and policy in place irrespective of whether the CPUs
>> are ONLINE/OFFLINE.
>
> I'm still quite unsure how this is going to work with the real CPU hot-remove
> that makes the entire sysfs cpu directories go away.  Can you please explain
> that?

Sure. Not a problem. I just wanted to make sure you had a chance to look 
at the code first.

Physical hot-remove triggers a "remove" for all the  registered 
subsys_interfaces for that CPU (after going through a couple of 
functions). So, when that happens, the cpufreq subsys_interface remove 
for that CPU gets called. At that point, I clean up that CPU's SW states 
as if it was never plugged in from the start. If that CPU was the owner 
of the sysfs directory, I move it over to a different CPU.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 0/5] Simplify hotplug/suspend handling
  2014-07-30  0:29       ` Rafael J. Wysocki
  2014-07-31 20:25         ` Saravana Kannan
@ 2014-08-07  6:04         ` skannan
  1 sibling, 0 replies; 76+ messages in thread
From: skannan @ 2014-08-07  6:04 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Saravana Kannan, Viresh Kumar, Todd Poynor, Srivatsa S . Bhat,
	linux-pm, linux-kernel, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd


Rafael J. Wysocki wrote:
> On Thursday, July 24, 2014 06:07:23 PM Saravana Kannan wrote:
>> Series of patchs to simplify policy/sysfs/kobj/locking handling across
>> suspend/resume
>
> I need someone to review this series for me.  Viresh or Srivatsa,
> preferably
> both.
>
> Thanks!

This took quite a bit of work to get to v4. It's been almost 2 weeks.
Could someone please review this? I'll get pulled into some heavy "work"
work soon. I would really prefer not to let this bit rot and later
abandoned due to lack of time on my part. Would really appreciate some
review folks. Thanks.

Regards,
Saravana

>> The following have been tested so far on a 2x2 cluster environment:
>> - Boot with 2 cpus and no cpufreq driver.
>> - mod probe driver and see cpufreq sysfs files show up only for the 1st
>> cluster.
>> - Online the rest of the 2 CPUs and have files show up correctly.
>> - rmmod the driver and see the files go away.
>> - modprobe again (or back and forth multiples times) and see it work.
>> - suspend/resume works as expected.
>> - When a cluster is offline, all read/writes to its sysfs files return
>> an error
>>
>> v4
>> - Split it up into smaller patches
>> - Will handle physical CPU removal correctly
>> - Fixed earlier mistake of deleting code under !recover_policy
>> - Dropped some code refactor that reuses a lot of code between
>> add/remove
>> - Dropped fix for exiting hotplug race with cpufreq driver probe/rmmod
>> - Dropped changes will come later once this series is acked.
>>
>>
>> Saravana Kannan (5):
>>   cpufreq: Don't wait for CPU to going offline to restart governor
>>   cpufreq: Keep track of which CPU owns the kobj/sysfs nodes separately
>>   cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
>>   cpufreq: Properly handle physical CPU hot-add/hot-remove
>>   cpufreq: Delete dead code related to policy save/restore
>>
>>  drivers/cpufreq/cpufreq.c | 238
>> ++++++++++++++++++----------------------------
>>  include/linux/cpufreq.h   |   1 +
>>  2 files changed, 93 insertions(+), 146 deletions(-)
>>
>>
>
> --
> I speak only for myself.
> Rafael J. Wysocki, Intel Open Source Technology Center.
>


-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 2/5] cpufreq: Keep track of which CPU owns the kobj/sysfs nodes separately
  2014-07-25  1:07       ` [PATCH v4 2/5] cpufreq: Keep track of which CPU owns the kobj/sysfs nodes separately Saravana Kannan
@ 2014-08-07  9:02         ` Viresh Kumar
  0 siblings, 0 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-08-07  9:02 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
> The ownership of the kobj doesn't need to match policy->cpu or change as
> frequently. So, keep track of it separately.
>
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>  drivers/cpufreq/cpufreq.c | 4 +++-
>  include/linux/cpufreq.h   | 1 +
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index ee0eb7b..af4f291 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -868,7 +868,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>         for_each_cpu(j, policy->cpus) {
>                 struct device *cpu_dev;
>
> -               if (j == policy->cpu)
> +               if (j == policy->kobj_cpu)
>                         continue;
>
>                 pr_debug("Adding link for CPU: %u\n", j);
> @@ -917,6 +917,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
>                         goto err_out_kobj_put;
>         }
>
> +       policy->kobj_cpu = policy->cpu;

Not here but in __cpufreq_add_dev()..

>         ret = cpufreq_add_dev_symlink(policy);
>         if (ret)
>                 goto err_out_kobj_put;
> @@ -1330,6 +1331,7 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,

This doesn't exist anymore.

>
>                 return -EINVAL;
>         }
> +       policy->kobj_cpu = cpu_dev->id;
>
>         return cpu_dev->id;
>  }
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index ec4112d..91c2e38 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -68,6 +68,7 @@ struct cpufreq_policy {
>         unsigned int            cpu;    /* cpu nr of CPU managing this policy */
>         unsigned int            last_cpu; /* cpu nr of previous CPU that managed
>                                            * this policy */
> +       unsigned int            kobj_cpu; /* Tracks which CPU own the kobj */
>         struct clk              *clk;
>         struct cpufreq_cpuinfo  cpuinfo;/* see above */
>
> --
> 1.8.2.1
>
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-25  1:07       ` [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
  2014-07-31 21:56         ` Rafael J. Wysocki
@ 2014-08-07 10:48         ` Viresh Kumar
  2014-08-11 22:13           ` Saravana Kannan
  1 sibling, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-08-07 10:48 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
> This patch simplifies a lot of the hotplug/suspend code by not
> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
> the cpufreq directory and policy in place irrespective of whether the CPUs
> are ONLINE/OFFLINE.
>
> Leaving the policy, sysfs and kobject in place also brings these additional
> benefits:
> * Faster suspend/resume
> * Faster hotplug
> * Sysfs file permissions maintained across hotplug
> * Policy settings and governor tunables maintained across hotplug
> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>   queried even after CPU goes OFFLINE
>
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>  drivers/cpufreq/cpufreq.c | 83 ++++++++++++++++-------------------------------
>  1 file changed, 28 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index af4f291..d9fc6e5 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -865,7 +865,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>         unsigned int j;
>         int ret = 0;
>
> -       for_each_cpu(j, policy->cpus) {
> +       for_each_cpu(j, policy->related_cpus) {
>                 struct device *cpu_dev;
>
>                 if (j == policy->kobj_cpu)
> @@ -968,7 +968,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>         int ret = 0;
>         unsigned long flags;
>
> -       if (has_target()) {
> +       if (cpumask_weight(policy->cpus) && has_target()) {

Probably cpumask_empty() would be more readable here.

>                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>                 if (ret) {
>                         pr_err("%s: Failed to stop governor\n", __func__);
> @@ -997,7 +997,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>                 }
>         }
>
> -       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
> +       return 0;
>  }
>  #endif
>
> @@ -1100,9 +1100,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         struct cpufreq_policy *policy;
>         unsigned long flags;
>         bool recover_policy = cpufreq_suspended;
> -#ifdef CONFIG_HOTPLUG_CPU
> -       struct cpufreq_policy *tpolicy;
> -#endif
>
>         if (cpu_is_offline(cpu))
>                 return 0;
> @@ -1113,28 +1110,22 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         /* check whether a different CPU already registered this
>          * CPU because it is in the same boat. */
>         policy = cpufreq_cpu_get(cpu);
> -       if (unlikely(policy)) {
> +       if (policy) {
> +               if (!cpumask_test_cpu(cpu, policy->cpus))
> +                       ret = cpufreq_add_policy_cpu(policy, cpu, dev);
> +               else
> +                       ret = 0;
>                 cpufreq_cpu_put(policy);
> -               return 0;
> +               return ret;
>         }
>  #endif
>
>         if (!down_read_trylock(&cpufreq_rwsem))
>                 return 0;
>
> -#ifdef CONFIG_HOTPLUG_CPU
> -       /* Check if this cpu was hot-unplugged earlier and has siblings */
> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
> -       list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
> -               if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
> -                       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -                       ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
> -                       up_read(&cpufreq_rwsem);
> -                       return ret;
> -               }
> -       }
> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -#endif
> +       /* If we get this far, this is the first time we are adding the
> +        * policy */

I think I have already asked you to use proper comment style?

> +       recover_policy = false;

For this patch, probably it will work fine but I hope you will get rid of
this variable completely in next patches..


> @@ -1340,21 +1331,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>                                         struct subsys_interface *sif)
>  {
>         unsigned int cpu = dev->id, cpus;
> -       int new_cpu, ret;
> +       int new_cpu, ret = 0;

Why?

>         unsigned long flags;
>         struct cpufreq_policy *policy;
>
>         pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>
> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> +       read_lock_irqsave(&cpufreq_driver_lock, flags);
>         policy = per_cpu(cpufreq_cpu_data, cpu);
> -
> -       /* Save the policy somewhere when doing a light-weight tear-down */
> -       if (cpufreq_suspended)
> -               per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
> -
> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>
>         if (!policy) {
>                 pr_debug("%s: No cpu_data found\n", __func__);
> @@ -1369,24 +1354,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>                 }
>         }
>
> -       if (!cpufreq_driver->setpolicy)
> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
> -                       policy->governor->name, CPUFREQ_NAME_LEN);
> -

Why? Probably I did mention this earlier as well?

>         down_read(&policy->rwsem);
>         cpus = cpumask_weight(policy->cpus);
>         up_read(&policy->rwsem);
>
> -       if (cpu != policy->cpu) {
> -               sysfs_remove_link(&dev->kobj, "cpufreq");
> -       } else if (cpus > 1) {
> -               new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
> -               if (new_cpu >= 0) {
> -                       update_policy_cpu(policy, new_cpu);
> -
> -                       if (!cpufreq_suspended)
> -                               pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
> -                                        __func__, new_cpu, cpu);
> +       if (cpus > 1) {
> +               if (cpu == policy->cpu) {
> +                       new_cpu = cpumask_any_but(policy->cpus, cpu);
> +                       if (new_cpu >= 0)

Can this ever be false?

> +                               update_policy_cpu(policy, new_cpu);
>                 }
>         } else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
>                 cpufreq_driver->stop_cpu(policy);
> @@ -1431,6 +1407,9 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>         cpus = cpumask_weight(policy->cpus);
>         up_read(&policy->rwsem);
>
> +       if (cpu != policy->kobj_cpu)
> +               sysfs_remove_link(&dev->kobj, "cpufreq");
> +

Why?

>         /* If cpu is last user of policy, free policy */
>         if (cpus == 0) {
>                 if (has_target()) {
> @@ -1475,12 +1454,10 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>  static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>  {
>         unsigned int cpu = dev->id;
> -       int ret;
> -
> -       if (cpu_is_offline(cpu))
> -               return 0;
> +       int ret = 0;
>
> -       ret = __cpufreq_remove_dev_prepare(dev, sif);
> +       if (cpu_online(cpu))
> +               ret = __cpufreq_remove_dev_prepare(dev, sif);

Why do you need a change here?

>         if (!ret)
>                 ret = __cpufreq_remove_dev_finish(dev, sif);
> @@ -2307,10 +2284,6 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>                         __cpufreq_remove_dev_prepare(dev, NULL);
>                         break;
>
> -               case CPU_POST_DEAD:
> -                       __cpufreq_remove_dev_finish(dev, NULL);
> -                       break;
> -

Sure? Who will call dev_finish() now?

>                 case CPU_DOWN_FAILED:
>                         __cpufreq_add_dev(dev, NULL);
>                         break;
> --
> 1.8.2.1
>
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-07-31 21:56         ` Rafael J. Wysocki
  2014-07-31 22:15           ` Saravana Kannan
  2014-07-31 23:48           ` Saravana Kannan
@ 2014-08-07 10:51           ` Viresh Kumar
  2014-08-12  9:17             ` Viresh Kumar
  2 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-08-07 10:51 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Saravana Kannan, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 1 August 2014 03:26, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> I'm still quite unsure how this is going to work with the real CPU hot-remove
> that makes the entire sysfs cpu directories go away.  Can you please explain
> that?

I have little less knowledge on this kind of hotplugs, can you please enlighten
me with some info about this?

Are we talking about big servers which are actually a combination of multiple
motherboards (with SoC's), and any motherboard can be plugged out at
run time. Obviously a single kernel would be running for all these motherboards.

I don't know if we already support that.. Sorry for my lack of
knowledge on this..

--
viresh

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 4/5] cpufreq: Properly handle physical CPU hot-add/hot-remove
  2014-07-25  1:07       ` [PATCH v4 4/5] cpufreq: Properly handle physical CPU hot-add/hot-remove Saravana Kannan
@ 2014-08-07 11:02         ` Viresh Kumar
  2014-08-11 22:15           ` Saravana Kannan
  0 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-08-07 11:02 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
> When CPUs are physically added/removed, its cpuX sysfs directory is
> dynamically added/removed. To handle this correctly, the cpufreq sysfs
> nodes also need to be added/removed dynamically.

Hmm, in that case why should we take this thread? I mean, if we do need
to add/remove sysfs links or move kobjects around, what would we achieve
with this patchset?

> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>  drivers/cpufreq/cpufreq.c | 46 +++++++++++++++++++++++++++++++++-------------
>  1 file changed, 33 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index d9fc6e5..97edf05 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -41,6 +41,7 @@ static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback);
>  static DEFINE_RWLOCK(cpufreq_driver_lock);
>  DEFINE_MUTEX(cpufreq_governor_lock);
>  static LIST_HEAD(cpufreq_policy_list);
> +static cpumask_t has_symlink;
>
>  /* This one keeps track of the previously set governor of a removed CPU */
>  static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
> @@ -865,7 +866,10 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>         unsigned int j;
>         int ret = 0;
>
> -       for_each_cpu(j, policy->related_cpus) {
> +       /* Only some of the related CPUs might be present. So, create
> +        * symlinks only for those.
> +        */

Proper styles please.

> +       for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) {
>                 struct device *cpu_dev;
>
>                 if (j == policy->kobj_cpu)
> @@ -877,6 +881,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>                                         "cpufreq");
>                 if (ret)
>                         break;
> +               cpumask_set_cpu(j, &has_symlink);
>         }
>         return ret;
>  }
> @@ -1101,9 +1106,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         unsigned long flags;
>         bool recover_policy = cpufreq_suspended;
>
> -       if (cpu_is_offline(cpu))
> -               return 0;
> -

Why?

>         pr_debug("adding CPU %u\n", cpu);
>
>  #ifdef CONFIG_SMP
> @@ -1111,7 +1113,19 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>          * CPU because it is in the same boat. */
>         policy = cpufreq_cpu_get(cpu);
>         if (policy) {
> -               if (!cpumask_test_cpu(cpu, policy->cpus))
> +               /* If a CPU gets physically plugged in after one or more of
> +                * its related CPUs are ONLINE, we need to create a symlink
> +                * for it since it wouldn't have been created when the policy
> +                * was initialized. Do this as soon as it's plugged in.
> +                */
> +               if (sif && !cpumask_test_cpu(cpu, &has_symlink)) {

Why check for sif?

> +                       ret = sysfs_create_link(&dev->kobj, &policy->kobj,
> +                                               "cpufreq");
> +                       if (!ret)
> +                               cpumask_set_cpu(cpu, &has_symlink);
> +               }
> +

Move all this to cpufreq_add_policy_cpu()..

> +               if (!cpumask_test_cpu(cpu, policy->cpus) && cpu_online(cpu))
>                         ret = cpufreq_add_policy_cpu(policy, cpu, dev);
>                 else
>                         ret = 0;
> @@ -1120,6 +1134,9 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         }
>  #endif
>
> +       if (cpu_is_offline(cpu))
> +               return 0;
> +

Don't know why we moved it here.. cpufreq_add_dev will only be called for
online CPUs..

>         if (!down_read_trylock(&cpufreq_rwsem))
>                 return 0;
>
> @@ -1303,25 +1320,24 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
>                                            unsigned int old_cpu)
>  {
>         struct device *cpu_dev;
> +       unsigned int new_cpu;
>         int ret;
>
>         /* first sibling now owns the new sysfs dir */
> -       cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
> +       for_each_cpu_and(new_cpu, policy->related_cpus, cpu_present_mask)
> +               if (new_cpu != old_cpu)
> +                       break;
> +       cpu_dev = get_cpu_device(new_cpu);
>
>         sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
>         ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
>         if (ret) {
>                 pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
> -
> -               down_write(&policy->rwsem);
> -               cpumask_set_cpu(old_cpu, policy->cpus);
> -               up_write(&policy->rwsem);
> -
>                 ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
>                                         "cpufreq");
> -
>                 return -EINVAL;
>         }
> +       cpumask_clear_cpu(new_cpu, &has_symlink);
>         policy->kobj_cpu = cpu_dev->id;
>
>         return cpu_dev->id;
> @@ -1407,8 +1423,12 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>         cpus = cpumask_weight(policy->cpus);
>         up_read(&policy->rwsem);
>
> -       if (cpu != policy->kobj_cpu)
> +       if (cpu != policy->kobj_cpu) {
>                 sysfs_remove_link(&dev->kobj, "cpufreq");
> +               cpumask_clear_cpu(cpu, &has_symlink);
> +       } else {
> +               cpufreq_nominate_new_policy_cpu(policy, cpu);
> +       }

This has_symlink thing has made it much more complicated..

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 5/5] cpufreq: Delete dead code related to policy save/restore
  2014-07-25  1:07       ` [PATCH v4 5/5] cpufreq: Delete dead code related to policy save/restore Saravana Kannan
@ 2014-08-07 11:06         ` Viresh Kumar
  0 siblings, 0 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-08-07 11:06 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
> @@ -1142,31 +1124,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>
>         /* If we get this far, this is the first time we are adding the
>          * policy */

We don't need this comment as well..

> -       recover_policy = false;
> -
> -       /*
> -        * Restore the saved policy when doing light-weight init and fall back
> -        * to the full init if that fails.
> -        */
> -       policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
> -       if (!policy) {
> -               recover_policy = false;
> -               policy = cpufreq_policy_alloc();
> -               if (!policy)
> -                       goto nomem_out;
> -       }
> -
> -       /*
> -        * In the resume path, since we restore a saved policy, the assignment
> -        * to policy->cpu is like an update of the existing policy, rather than
> -        * the creation of a brand new one. So we need to perform this update
> -        * by invoking update_policy_cpu().
> -        */
> -       if (recover_policy && cpu != policy->cpu)
> -               update_policy_cpu(policy, cpu);

Since we don't do this anymore, what will happen to policy->cpu after
all CPUs of a policy are hotplugged-out and then brought back in?

> -       else
> -               policy->cpu = cpu;
> +       policy = cpufreq_policy_alloc();
> +       if (!policy)
> +               goto nomem_out;
>
> +       policy->cpu = cpu;
>         cpumask_copy(policy->cpus, cpumask_of(cpu));
>
>         init_completion(&policy->kobj_unregister);
> @@ -1190,10 +1152,8 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>          */
>         cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
>
> -       if (!recover_policy) {
> -               policy->user_policy.min = policy->min;
> -               policy->user_policy.max = policy->max;
> -       }
> +       policy->user_policy.min = policy->min;
> +       policy->user_policy.max = policy->max;
>
>         down_write(&policy->rwsem);
>         write_lock_irqsave(&cpufreq_driver_lock, flags);
> @@ -1252,13 +1212,11 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
>                                      CPUFREQ_START, policy);
>
> -       if (!recover_policy) {
> -               ret = cpufreq_add_dev_interface(policy, dev);
> -               if (ret)
> -                       goto err_out_unregister;
> -               blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> -                               CPUFREQ_CREATE_POLICY, policy);
> -       }
> +       ret = cpufreq_add_dev_interface(policy, dev);
> +       if (ret)
> +               goto err_out_unregister;
> +       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> +                       CPUFREQ_CREATE_POLICY, policy);
>
>         write_lock_irqsave(&cpufreq_driver_lock, flags);
>         list_add(&policy->policy_list, &cpufreq_policy_list);
> @@ -1266,10 +1224,8 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>
>         cpufreq_init_policy(policy);
>
> -       if (!recover_policy) {
> -               policy->user_policy.policy = policy->policy;
> -               policy->user_policy.governor = policy->governor;
> -       }
> +       policy->user_policy.policy = policy->policy;
> +       policy->user_policy.governor = policy->governor;
>         up_write(&policy->rwsem);
>
>         kobject_uevent(&policy->kobj, KOBJ_ADD);
> @@ -1289,13 +1245,7 @@ err_get_freq:
>         if (cpufreq_driver->exit)
>                 cpufreq_driver->exit(policy);
>  err_set_policy_cpu:
> -       if (recover_policy) {
> -               /* Do not leave stale fallback data behind. */
> -               per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL;
> -               cpufreq_policy_put_kobj(policy);
> -       }
>         cpufreq_policy_free(policy);
> -
>  nomem_out:
>         up_read(&cpufreq_rwsem);
>
> @@ -1442,8 +1392,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>                         }
>                 }
>
> -               if (!cpufreq_suspended)
> -                       cpufreq_policy_put_kobj(policy);
> +               cpufreq_policy_put_kobj(policy);

Sure? This will free kobject.

>                 /*
>                  * Perform the ->exit() even during light-weight tear-down,
> @@ -1458,8 +1407,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>                 list_del(&policy->policy_list);
>                 write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>
> -               if (!cpufreq_suspended)
> -                       cpufreq_policy_free(policy);
> +               cpufreq_policy_free(policy);

Same here.

>         }
>
>         per_cpu(cpufreq_cpu_data, cpu) = NULL;
> --
> 1.8.2.1
>
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-08-07 10:48         ` Viresh Kumar
@ 2014-08-11 22:13           ` Saravana Kannan
  2014-08-12  8:51             ` Viresh Kumar
  0 siblings, 1 reply; 76+ messages in thread
From: Saravana Kannan @ 2014-08-11 22:13 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 08/07/2014 03:48 AM, Viresh Kumar wrote:
> On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
>> This patch simplifies a lot of the hotplug/suspend code by not
>> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
>> the cpufreq directory and policy in place irrespective of whether the CPUs
>> are ONLINE/OFFLINE.
>>
>> Leaving the policy, sysfs and kobject in place also brings these additional
>> benefits:
>> * Faster suspend/resume
>> * Faster hotplug
>> * Sysfs file permissions maintained across hotplug
>> * Policy settings and governor tunables maintained across hotplug
>> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>>    queried even after CPU goes OFFLINE
>>
>> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
>> ---
>>   drivers/cpufreq/cpufreq.c | 83 ++++++++++++++++-------------------------------
>>   1 file changed, 28 insertions(+), 55 deletions(-)
>>
>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>> index af4f291..d9fc6e5 100644
>> --- a/drivers/cpufreq/cpufreq.c
>> +++ b/drivers/cpufreq/cpufreq.c
>> @@ -865,7 +865,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>>          unsigned int j;
>>          int ret = 0;
>>
>> -       for_each_cpu(j, policy->cpus) {
>> +       for_each_cpu(j, policy->related_cpus) {
>>                  struct device *cpu_dev;
>>
>>                  if (j == policy->kobj_cpu)
>> @@ -968,7 +968,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>>          int ret = 0;
>>          unsigned long flags;
>>
>> -       if (has_target()) {
>> +       if (cpumask_weight(policy->cpus) && has_target()) {
>
> Probably cpumask_empty() would be more readable here.

Agreed.

>
>>                  ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>>                  if (ret) {
>>                          pr_err("%s: Failed to stop governor\n", __func__);
>> @@ -997,7 +997,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>>                  }
>>          }
>>
>> -       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
>> +       return 0;
>>   }
>>   #endif
>>
>> @@ -1100,9 +1100,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          struct cpufreq_policy *policy;
>>          unsigned long flags;
>>          bool recover_policy = cpufreq_suspended;
>> -#ifdef CONFIG_HOTPLUG_CPU
>> -       struct cpufreq_policy *tpolicy;
>> -#endif
>>
>>          if (cpu_is_offline(cpu))
>>                  return 0;
>> @@ -1113,28 +1110,22 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          /* check whether a different CPU already registered this
>>           * CPU because it is in the same boat. */
>>          policy = cpufreq_cpu_get(cpu);
>> -       if (unlikely(policy)) {
>> +       if (policy) {
>> +               if (!cpumask_test_cpu(cpu, policy->cpus))
>> +                       ret = cpufreq_add_policy_cpu(policy, cpu, dev);
>> +               else
>> +                       ret = 0;
>>                  cpufreq_cpu_put(policy);
>> -               return 0;
>> +               return ret;
>>          }
>>   #endif
>>
>>          if (!down_read_trylock(&cpufreq_rwsem))
>>                  return 0;
>>
>> -#ifdef CONFIG_HOTPLUG_CPU
>> -       /* Check if this cpu was hot-unplugged earlier and has siblings */
>> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
>> -       list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
>> -               if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
>> -                       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> -                       ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
>> -                       up_read(&cpufreq_rwsem);
>> -                       return ret;
>> -               }
>> -       }
>> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> -#endif
>> +       /* If we get this far, this is the first time we are adding the
>> +        * policy */
>
> I think I have already asked you to use proper comment style?

I did. Then I think I noticed some of the existing comments did keep the 
/* in its own line even for multiline comments. So, I got confused. Will 
fix.

>
>> +       recover_policy = false;
>
> For this patch, probably it will work fine but I hope you will get rid of
> this variable completely in next patches..
>

Yup. In 5/5

>
>> @@ -1340,21 +1331,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>>                                          struct subsys_interface *sif)
>>   {
>>          unsigned int cpu = dev->id, cpus;
>> -       int new_cpu, ret;
>> +       int new_cpu, ret = 0;
>
> Why?

Apparently for no good reason :) Probably some stale change when I was 
splitting up the patches. I'll double check and remove this.


>>          unsigned long flags;
>>          struct cpufreq_policy *policy;
>>
>>          pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>>
>> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
>> -
>> +       read_lock_irqsave(&cpufreq_driver_lock, flags);
>>          policy = per_cpu(cpufreq_cpu_data, cpu);
>> -
>> -       /* Save the policy somewhere when doing a light-weight tear-down */
>> -       if (cpufreq_suspended)
>> -               per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
>> -
>> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>>
>>          if (!policy) {
>>                  pr_debug("%s: No cpu_data found\n", __func__);
>> @@ -1369,24 +1354,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>>                  }
>>          }
>>
>> -       if (!cpufreq_driver->setpolicy)
>> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
>> -                       policy->governor->name, CPUFREQ_NAME_LEN);
>> -
>
> Why? Probably I did mention this earlier as well?

This code is saving the governor name here to restore it when the policy 
is created again after suspend/resume or hotplug of all CPUs. Since we 
no longer throw away the policy struct, there's no point in doing this.

I should remove this per cpu variable though. Will do it in v5.

>
>>          down_read(&policy->rwsem);
>>          cpus = cpumask_weight(policy->cpus);
>>          up_read(&policy->rwsem);
>>
>> -       if (cpu != policy->cpu) {
>> -               sysfs_remove_link(&dev->kobj, "cpufreq");
>> -       } else if (cpus > 1) {
>> -               new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
>> -               if (new_cpu >= 0) {
>> -                       update_policy_cpu(policy, new_cpu);
>> -
>> -                       if (!cpufreq_suspended)
>> -                               pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
>> -                                        __func__, new_cpu, cpu);
>> +       if (cpus > 1) {
>> +               if (cpu == policy->cpu) {
>> +                       new_cpu = cpumask_any_but(policy->cpus, cpu);
>> +                       if (new_cpu >= 0)
>
> Can this ever be false?

If this is the last CPU going down. This part of the code didn't really 
change. I just moved the cpumask_any_but() from nominate policy to here 
since I'm not longer moving the kobj around.

>
>> +                               update_policy_cpu(policy, new_cpu);
>>                  }
>>          } else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
>>                  cpufreq_driver->stop_cpu(policy);

>> @@ -1431,6 +1407,9 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>>          cpus = cpumask_weight(policy->cpus);
>>          up_read(&policy->rwsem);
>>
>> +       if (cpu != policy->kobj_cpu)
>> +               sysfs_remove_link(&dev->kobj, "cpufreq");
>> +
>
> Why?

For the physical hot-remove case or when the cpufreq driver is unregistered.

>
>>          /* If cpu is last user of policy, free policy */
>>          if (cpus == 0) {
>>                  if (has_target()) {
>> @@ -1475,12 +1454,10 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>>   static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>>   {
>>          unsigned int cpu = dev->id;
>> -       int ret;
>> -
>> -       if (cpu_is_offline(cpu))
>> -               return 0;
>> +       int ret = 0;
>>
>> -       ret = __cpufreq_remove_dev_prepare(dev, sif);
>> +       if (cpu_online(cpu))
>> +               ret = __cpufreq_remove_dev_prepare(dev, sif);
>
> Why do you need a change here?

Since we no longer do remove_dev_finish during hotplug, we can't just 
short circuit the entire function. We have to finish the remove when the 
CPU is hot-removed or when the cpufreq driver is unregistered.

>
>>          if (!ret)
>>                  ret = __cpufreq_remove_dev_finish(dev, sif);
>> @@ -2307,10 +2284,6 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>>                          __cpufreq_remove_dev_prepare(dev, NULL);
>>                          break;
>>
>> -               case CPU_POST_DEAD:
>> -                       __cpufreq_remove_dev_finish(dev, NULL);
>> -                       break;
>> -
>
> Sure? Who will call dev_finish() now?

At this point, all remove_dev_finish() does is remove the sysfs links 
and destroy the policy. So, it never needs to be called for hotplug. 
Only during physical hot-remove or during cpufreq driver unregister.

>
>>                  case CPU_DOWN_FAILED:
>>                          __cpufreq_add_dev(dev, NULL);
>>                          break;
>> --
>> 1.8.2.1
>>
>> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
>> hosted by The Linux Foundation

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 4/5] cpufreq: Properly handle physical CPU hot-add/hot-remove
  2014-08-07 11:02         ` Viresh Kumar
@ 2014-08-11 22:15           ` Saravana Kannan
  0 siblings, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-08-11 22:15 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 08/07/2014 04:02 AM, Viresh Kumar wrote:
> On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
>> When CPUs are physically added/removed, its cpuX sysfs directory is
>> dynamically added/removed. To handle this correctly, the cpufreq sysfs
>> nodes also need to be added/removed dynamically.
>
> Hmm, in that case why should we take this thread? I mean, if we do need
> to add/remove sysfs links or move kobjects around, what would we achieve
> with this patchset?

For the reasons mentioned in 3/5.
* Faster suspend/resume
* Faster hotplug
* Sysfs file permissions maintained across hotplug
* Policy settings and governor tunables maintained across hotplug
* Cpufreq stats would be maintained across hotplug for all CPUs and can
   be queried even after CPU goes OFFLINE

Also, logical hotplug happens way more often than physical hot-remove. 
Just because we need to do this during physical hot-remove doesn't mean 
we should do this all the time.

Btw, v5 will have another patch that should allow a lot of code reuse 
that won't be easy with symlink manipulation.

>
>> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
>> ---
>>   drivers/cpufreq/cpufreq.c | 46 +++++++++++++++++++++++++++++++++-------------
>>   1 file changed, 33 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>> index d9fc6e5..97edf05 100644
>> --- a/drivers/cpufreq/cpufreq.c
>> +++ b/drivers/cpufreq/cpufreq.c
>> @@ -41,6 +41,7 @@ static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback);
>>   static DEFINE_RWLOCK(cpufreq_driver_lock);
>>   DEFINE_MUTEX(cpufreq_governor_lock);
>>   static LIST_HEAD(cpufreq_policy_list);
>> +static cpumask_t has_symlink;
>>
>>   /* This one keeps track of the previously set governor of a removed CPU */
>>   static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
>> @@ -865,7 +866,10 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>>          unsigned int j;
>>          int ret = 0;
>>
>> -       for_each_cpu(j, policy->related_cpus) {
>> +       /* Only some of the related CPUs might be present. So, create
>> +        * symlinks only for those.
>> +        */
>
> Proper styles please.
>
>> +       for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) {
>>                  struct device *cpu_dev;
>>
>>                  if (j == policy->kobj_cpu)
>> @@ -877,6 +881,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>>                                          "cpufreq");
>>                  if (ret)
>>                          break;
>> +               cpumask_set_cpu(j, &has_symlink);
>>          }
>>          return ret;
>>   }
>> @@ -1101,9 +1106,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          unsigned long flags;
>>          bool recover_policy = cpufreq_suspended;
>>
>> -       if (cpu_is_offline(cpu))
>> -               return 0;
>> -
>
> Why?

So that when a CPU is physically hot-added again, we create the symlinks 
again.

>
>>          pr_debug("adding CPU %u\n", cpu);
>>
>>   #ifdef CONFIG_SMP
>> @@ -1111,7 +1113,19 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>           * CPU because it is in the same boat. */
>>          policy = cpufreq_cpu_get(cpu);
>>          if (policy) {
>> -               if (!cpumask_test_cpu(cpu, policy->cpus))
>> +               /* If a CPU gets physically plugged in after one or more of
>> +                * its related CPUs are ONLINE, we need to create a symlink
>> +                * for it since it wouldn't have been created when the policy
>> +                * was initialized. Do this as soon as it's plugged in.
>> +                */
>> +               if (sif && !cpumask_test_cpu(cpu, &has_symlink)) {
>
> Why check for sif?

sif is only set when this is called from hot-add/hot-remove context or 
cpufreq is registered for the first time.

>
>> +                       ret = sysfs_create_link(&dev->kobj, &policy->kobj,
>> +                                               "cpufreq");
>> +                       if (!ret)
>> +                               cpumask_set_cpu(cpu, &has_symlink);
>> +               }
>> +
>
> Move all this to cpufreq_add_policy_cpu()..

The code above is not for online CPUs. So, this can't be added to 
cpufreq_add_policy_cpu().

>
>> +               if (!cpumask_test_cpu(cpu, policy->cpus) && cpu_online(cpu))
>>                          ret = cpufreq_add_policy_cpu(policy, cpu, dev);
>>                  else
>>                          ret = 0;
>> @@ -1120,6 +1134,9 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          }
>>   #endif
>>
>> +       if (cpu_is_offline(cpu))
>> +               return 0;
>> +
>
> Don't know why we moved it here.. cpufreq_add_dev will only be called for
> online CPUs..

As you said, I just moved it down here. If what you say was true, we 
wouldn't have needed this in the first place.

It's needed because __cpufreq_add_dev() is also called for a present, 
but offline CPU during cpufreq driver register.

>
>>          if (!down_read_trylock(&cpufreq_rwsem))
>>                  return 0;
>>
>> @@ -1303,25 +1320,24 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
>>                                             unsigned int old_cpu)
>>   {
>>          struct device *cpu_dev;
>> +       unsigned int new_cpu;
>>          int ret;
>>
>>          /* first sibling now owns the new sysfs dir */
>> -       cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
>> +       for_each_cpu_and(new_cpu, policy->related_cpus, cpu_present_mask)
>> +               if (new_cpu != old_cpu)
>> +                       break;
>> +       cpu_dev = get_cpu_device(new_cpu);
>>
>>          sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
>>          ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
>>          if (ret) {
>>                  pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
>> -
>> -               down_write(&policy->rwsem);
>> -               cpumask_set_cpu(old_cpu, policy->cpus);
>> -               up_write(&policy->rwsem);
>> -
>>                  ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
>>                                          "cpufreq");
>> -
>>                  return -EINVAL;
>>          }
>> +       cpumask_clear_cpu(new_cpu, &has_symlink);
>>          policy->kobj_cpu = cpu_dev->id;
>>
>>          return cpu_dev->id;
>> @@ -1407,8 +1423,12 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>>          cpus = cpumask_weight(policy->cpus);
>>          up_read(&policy->rwsem);
>>
>> -       if (cpu != policy->kobj_cpu)
>> +       if (cpu != policy->kobj_cpu) {
>>                  sysfs_remove_link(&dev->kobj, "cpufreq");
>> +               cpumask_clear_cpu(cpu, &has_symlink);
>> +       } else {
>> +               cpufreq_nominate_new_policy_cpu(policy, cpu);
>> +       }
>
> This has_symlink thing has made it much more complicated..

Actually, I disagree. No, convoluted deduction of what condition this is 
getting called under, etc. It's pretty simple -- if symlink is present, 
the bit is set; else, it's not set.

Btw, I could have make this similar to policy->related_cpus and 
policy->cpus and it might have looked "simpler". But no point in having 
multiple cpumasks when we are just tracking the global presence of symlinks.

Also, whether it's convoluted or not, it's definitely an improvement 
over removing and adding these all the time.

-Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-08-11 22:13           ` Saravana Kannan
@ 2014-08-12  8:51             ` Viresh Kumar
  0 siblings, 0 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-08-12  8:51 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 12 August 2014 03:43, Saravana Kannan <skannan@codeaurora.org> wrote:
> On 08/07/2014 03:48 AM, Viresh Kumar wrote:

>>> @@ -1369,24 +1354,15 @@ static int __cpufreq_remove_dev_prepare(struct
>>> device *dev,
>>>                  }
>>>          }
>>>
>>> -       if (!cpufreq_driver->setpolicy)
>>> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
>>> -                       policy->governor->name, CPUFREQ_NAME_LEN);
>>> -
>>
>>
>> Why? Probably I did mention this earlier as well?
>
>
> This code is saving the governor name here to restore it when the policy is
> created again after suspend/resume or hotplug of all CPUs. Since we no
> longer throw away the policy struct, there's no point in doing this.
>
> I should remove this per cpu variable though. Will do it in v5.

Hmm, makes sense. So probably keep this code in this patch and get rid
of all uses of 'cpufreq_cpu_governor' in a separate patch.

>>> +       if (cpus > 1) {
>>> +               if (cpu == policy->cpu) {
>>> +                       new_cpu = cpumask_any_but(policy->cpus, cpu);
>>> +                       if (new_cpu >= 0)
>>
>>
>> Can this ever be false?
>
>
> If this is the last CPU going down.

Can that be true? Its present in (cpus > 1) block :)

>>>   static int cpufreq_remove_dev(struct device *dev, struct
>>> subsys_interface *sif)
>>>   {
>>>          unsigned int cpu = dev->id;
>>> -       int ret;
>>> -
>>> -       if (cpu_is_offline(cpu))
>>> -               return 0;
>>> +       int ret = 0;
>>>
>>> -       ret = __cpufreq_remove_dev_prepare(dev, sif);
>>> +       if (cpu_online(cpu))
>>> +               ret = __cpufreq_remove_dev_prepare(dev, sif);
>>
>>
>> Why do you need a change here?
>
>
> Since we no longer do remove_dev_finish during hotplug, we can't just short
> circuit the entire function. We have to finish the remove when the CPU is
> hot-removed or when the cpufreq driver is unregistered.

I think this is tricky and we must have a clear comment here..
I missed this on my initial reviews..

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend
  2014-08-07 10:51           ` Viresh Kumar
@ 2014-08-12  9:17             ` Viresh Kumar
  0 siblings, 0 replies; 76+ messages in thread
From: Viresh Kumar @ 2014-08-12  9:17 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Saravana Kannan, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 7 August 2014 16:21, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 1 August 2014 03:26, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>> I'm still quite unsure how this is going to work with the real CPU hot-remove
>> that makes the entire sysfs cpu directories go away.  Can you please explain
>> that?
>
> I have little less knowledge on this kind of hotplugs, can you please enlighten
> me with some info about this?
>
> Are we talking about big servers which are actually a combination of multiple
> motherboards (with SoC's), and any motherboard can be plugged out at
> run time. Obviously a single kernel would be running for all these motherboards.
>
> I don't know if we already support that.. Sorry for my lack of
> knowledge on this..

Ping!!

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 0/5] Simplify hotplug/suspend handling
  2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
                         ` (6 preceding siblings ...)
  2014-07-30  0:29       ` Rafael J. Wysocki
@ 2014-10-16  8:53       ` Viresh Kumar
  2014-10-23 21:41         ` Saravana Kannan
  7 siblings, 1 reply; 76+ messages in thread
From: Viresh Kumar @ 2014-10-16  8:53 UTC (permalink / raw)
  To: Saravana Kannan
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
> Series of patchs to simplify policy/sysfs/kobj/locking handling across
> suspend/resume
>
> The following have been tested so far on a 2x2 cluster environment:
> - Boot with 2 cpus and no cpufreq driver.
> - mod probe driver and see cpufreq sysfs files show up only for the 1st cluster.
> - Online the rest of the 2 CPUs and have files show up correctly.
> - rmmod the driver and see the files go away.
> - modprobe again (or back and forth multiples times) and see it work.
> - suspend/resume works as expected.
> - When a cluster is offline, all read/writes to its sysfs files return an error
>
> v4
> - Split it up into smaller patches
> - Will handle physical CPU removal correctly
> - Fixed earlier mistake of deleting code under !recover_policy
> - Dropped some code refactor that reuses a lot of code between add/remove
> - Dropped fix for exiting hotplug race with cpufreq driver probe/rmmod
> - Dropped changes will come later once this series is acked.

Hi Saravana,

Any  updates on this? We might need some of this soon or should somebody
else start working on this ?

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v4 0/5] Simplify hotplug/suspend handling
  2014-10-16  8:53       ` Viresh Kumar
@ 2014-10-23 21:41         ` Saravana Kannan
  0 siblings, 0 replies; 76+ messages in thread
From: Saravana Kannan @ 2014-10-23 21:41 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Rafael J . Wysocki, Todd Poynor, Srivatsa S . Bhat, linux-pm,
	Linux Kernel Mailing List, linux-arm-msm, linux-arm-kernel,
	Stephen Boyd

On 10/16/2014 01:53 AM, Viresh Kumar wrote:
> On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
>> Series of patchs to simplify policy/sysfs/kobj/locking handling across
>> suspend/resume
>>
>> The following have been tested so far on a 2x2 cluster environment:
>> - Boot with 2 cpus and no cpufreq driver.
>> - mod probe driver and see cpufreq sysfs files show up only for the 1st cluster.
>> - Online the rest of the 2 CPUs and have files show up correctly.
>> - rmmod the driver and see the files go away.
>> - modprobe again (or back and forth multiples times) and see it work.
>> - suspend/resume works as expected.
>> - When a cluster is offline, all read/writes to its sysfs files return an error
>>
>> v4
>> - Split it up into smaller patches
>> - Will handle physical CPU removal correctly
>> - Fixed earlier mistake of deleting code under !recover_policy
>> - Dropped some code refactor that reuses a lot of code between add/remove
>> - Dropped fix for exiting hotplug race with cpufreq driver probe/rmmod
>> - Dropped changes will come later once this series is acked.
>
> Hi Saravana,
>
> Any  updates on this? We might need some of this soon or should somebody
> else start working on this ?
>

Hey,

Sorry for the delay. Got side tracked with some commercial stuff. I'm 
still invested in finishing this up. I'll try to send out something 
within a week.

I did notice (didn't read mych) the "Locking issues with cpufreq and 
sysfs" thread. I think my patches should side step most of it.

Thanks,
Saravana

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 76+ messages in thread

end of thread, other threads:[~2014-10-23 21:41 UTC | newest]

Thread overview: 76+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-10  2:37 [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
2014-07-11  4:18 ` [PATCH v2] " Saravana Kannan
2014-07-11  6:19   ` Viresh Kumar
2014-07-11  9:59     ` skannan
2014-07-11 10:07       ` skannan
2014-07-11 10:52       ` Viresh Kumar
2014-07-12  2:44         ` Saravana Kannan
2014-07-14  6:09           ` Viresh Kumar
2014-07-14 19:08             ` Saravana Kannan
2014-07-15  4:35               ` Viresh Kumar
2014-07-15  5:36                 ` Saravana Kannan
2014-07-15  5:52                   ` Viresh Kumar
2014-07-15  6:58                   ` Srivatsa S. Bhat
2014-07-15 17:35                     ` skannan
2014-07-16  7:44                       ` Srivatsa S. Bhat
2014-07-16  5:44                     ` Viresh Kumar
2014-07-16  7:49                       ` Srivatsa S. Bhat
2014-07-12  3:06     ` Saravana Kannan
2014-07-14  6:13       ` Viresh Kumar
2014-07-14 19:10         ` Saravana Kannan
2014-07-11  7:43   ` Srivatsa S. Bhat
2014-07-11 10:02     ` skannan
2014-07-15 22:47   ` [PATCH v3 0/2] Simplify hotplug/suspend handling Saravana Kannan
2014-07-15 22:47     ` [PATCH v3 1/2] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
2014-07-16  0:28       ` Saravana Kannan
2014-07-16  8:30         ` Viresh Kumar
2014-07-16 19:19           ` Saravana Kannan
2014-07-16  8:24       ` Viresh Kumar
2014-07-16 11:16         ` Srivatsa S. Bhat
2014-07-16 13:13           ` Viresh Kumar
2014-07-16 18:04             ` Srivatsa S. Bhat
2014-07-16 19:56             ` Saravana Kannan
2014-07-17  5:51               ` Viresh Kumar
2014-07-16 19:56           ` Saravana Kannan
2014-07-17  5:35             ` Viresh Kumar
2014-07-18  3:25               ` Saravana Kannan
2014-07-18  4:19                 ` Viresh Kumar
2014-07-16 20:25         ` Saravana Kannan
2014-07-16 21:45           ` Saravana Kannan
2014-07-17  6:24           ` Viresh Kumar
2014-07-16 14:29       ` Dirk Brandewie
2014-07-16 15:28         ` Viresh Kumar
2014-07-16 19:42           ` Saravana Kannan
2014-07-15 22:47     ` [PATCH v3 2/2] cpufreq: Simplify and fix mutual exclusion with hotplug Saravana Kannan
2014-07-16  8:48       ` Viresh Kumar
2014-07-16 19:34         ` Saravana Kannan
2014-07-25  1:07     ` [PATCH v4 0/5] Simplify hotplug/suspend handling Saravana Kannan
2014-07-25  1:07       ` [PATCH v4 1/5] cpufreq: Don't wait for CPU to going offline to restart governor Saravana Kannan
2014-07-31 20:47         ` Saravana Kannan
2014-07-25  1:07       ` [PATCH v4 2/5] cpufreq: Keep track of which CPU owns the kobj/sysfs nodes separately Saravana Kannan
2014-08-07  9:02         ` Viresh Kumar
2014-07-25  1:07       ` [PATCH v4 3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Saravana Kannan
2014-07-31 21:56         ` Rafael J. Wysocki
2014-07-31 22:15           ` Saravana Kannan
2014-07-31 23:48           ` Saravana Kannan
2014-08-07 10:51           ` Viresh Kumar
2014-08-12  9:17             ` Viresh Kumar
2014-08-07 10:48         ` Viresh Kumar
2014-08-11 22:13           ` Saravana Kannan
2014-08-12  8:51             ` Viresh Kumar
2014-07-25  1:07       ` [PATCH v4 4/5] cpufreq: Properly handle physical CPU hot-add/hot-remove Saravana Kannan
2014-08-07 11:02         ` Viresh Kumar
2014-08-11 22:15           ` Saravana Kannan
2014-07-25  1:07       ` [PATCH v4 5/5] cpufreq: Delete dead code related to policy save/restore Saravana Kannan
2014-08-07 11:06         ` Viresh Kumar
2014-07-29  5:52       ` [PATCH v4 0/5] Simplify hotplug/suspend handling skannan
2014-07-30  0:29       ` Rafael J. Wysocki
2014-07-31 20:25         ` Saravana Kannan
2014-08-07  6:04         ` skannan
2014-10-16  8:53       ` Viresh Kumar
2014-10-23 21:41         ` Saravana Kannan
2014-07-16 22:02 ` [PATCH] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend Rafael J. Wysocki
2014-07-16 22:35   ` Saravana Kannan
2014-07-24  3:02   ` Saravana Kannan
2014-07-24  5:04     ` Viresh Kumar
2014-07-24  9:12       ` skannan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).