linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code
@ 2021-03-01 21:21 Daniel Lezcano
  2021-03-01 21:21 ` [PATCH 2/5] powercap/drivers/dtpm: Create a registering system Daniel Lezcano
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Daniel Lezcano @ 2021-03-01 21:21 UTC (permalink / raw)
  To: daniel.lezcano, rafael; +Cc: linux-kernel, linux-pm

In order to increase the self-encapsulation of the dtpm generic code,
the following changes are adding a power update ops to the dtpm
ops. That allows the generic code to call directly the dtpm backend
function to update the power values.

The power update function does compute the power characteristics when
the function is invoked. In the case of the CPUs, the power
consumption depends on the number of online CPUs. The online CPUs mask
is not up to date at CPUHP_AP_ONLINE_DYN state in the tear down
callback. That is the reason why the online / offline are at separate
state. As there is already an existing state for DTPM, this one is
only moved to the DEAD state, so there is no addition of new state
with these changes.

That simplifies the code for the next changes and results in a more
self-encapsulated code.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/powercap/dtpm.c     |  54 ++++++++--------
 drivers/powercap/dtpm_cpu.c | 124 +++++++++++++-----------------------
 include/linux/cpuhotplug.h  |   2 +-
 include/linux/dtpm.h        |   3 +-
 4 files changed, 76 insertions(+), 107 deletions(-)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index c2185ec5f887..1085dccf9c58 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm)
 		parent->power_limit -= dtpm->power_limit;
 		parent = parent->parent;
 	}
-
-	__dtpm_rebalance_weight(root);
 }
 
 static void __dtpm_add_power(struct dtpm *dtpm)
@@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm)
 		parent->power_limit += dtpm->power_limit;
 		parent = parent->parent;
 	}
+}
+
+static int __dtpm_update_power(struct dtpm *dtpm)
+{
+	int ret;
+
+	__dtpm_sub_power(dtpm);
 
-	__dtpm_rebalance_weight(root);
+	ret = dtpm->ops->upt_power_uw(dtpm);
+	if (ret)
+		pr_err("Failed to update power for '%s': %d\n",
+		       dtpm->zone.name, ret);
+
+	if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
+		dtpm->power_limit = dtpm->power_max;
+
+	__dtpm_add_power(dtpm);
+
+	if (root)
+		__dtpm_rebalance_weight(root);
+
+	return ret;
 }
 
 /**
  * dtpm_update_power - Update the power on the dtpm
  * @dtpm: a pointer to a dtpm structure to update
- * @power_min: a u64 representing the new power_min value
- * @power_max: a u64 representing the new power_max value
  *
  * Function to update the power values of the dtpm node specified in
  * parameter. These new values will be propagated to the tree.
  *
  * Return: zero on success, -EINVAL if the values are inconsistent
  */
-int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max)
+int dtpm_update_power(struct dtpm *dtpm)
 {
-	int ret = 0;
+	int ret;
 
 	mutex_lock(&dtpm_lock);
-
-	if (power_min == dtpm->power_min && power_max == dtpm->power_max)
-		goto unlock;
-
-	if (power_max < power_min) {
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	__dtpm_sub_power(dtpm);
-
-	dtpm->power_min = power_min;
-	dtpm->power_max = power_max;
-	if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
-		dtpm->power_limit = power_max;
-
-	__dtpm_add_power(dtpm);
-
-unlock:
+	ret = __dtpm_update_power(dtpm);
 	mutex_unlock(&dtpm_lock);
 
 	return ret;
@@ -436,6 +434,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 
 	if (dtpm->ops && !(dtpm->ops->set_power_uw &&
 			   dtpm->ops->get_power_uw &&
+			   dtpm->ops->upt_power_uw &&
 			   dtpm->ops->release))
 		return -EINVAL;
 
@@ -455,7 +454,8 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 		root = dtpm;
 	}
 
-	__dtpm_add_power(dtpm);
+	if (dtpm->ops && !dtpm->ops->upt_power_uw(dtpm))
+		__dtpm_add_power(dtpm);
 
 	pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
 		dtpm->zone.name, dtpm->power_min, dtpm->power_max);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 51c366938acd..aff79c649345 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -14,6 +14,8 @@
  * The CPU hotplug is supported and the power numbers will be updated
  * if a CPU is hot plugged / unplugged.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/cpumask.h>
 #include <linux/cpufreq.h>
 #include <linux/cpuhotplug.h>
@@ -23,8 +25,6 @@
 #include <linux/slab.h>
 #include <linux/units.h>
 
-static struct dtpm *__parent;
-
 static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
 
 struct dtpm_cpu {
@@ -32,57 +32,16 @@ struct dtpm_cpu {
 	int cpu;
 };
 
-/*
- * When a new CPU is inserted at hotplug or boot time, add the power
- * contribution and update the dtpm tree.
- */
-static int power_add(struct dtpm *dtpm, struct em_perf_domain *em)
-{
-	u64 power_min, power_max;
-
-	power_min = em->table[0].power;
-	power_min *= MICROWATT_PER_MILLIWATT;
-	power_min += dtpm->power_min;
-
-	power_max = em->table[em->nr_perf_states - 1].power;
-	power_max *= MICROWATT_PER_MILLIWATT;
-	power_max += dtpm->power_max;
-
-	return dtpm_update_power(dtpm, power_min, power_max);
-}
-
-/*
- * When a CPU is unplugged, remove its power contribution from the
- * dtpm tree.
- */
-static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em)
-{
-	u64 power_min, power_max;
-
-	power_min = em->table[0].power;
-	power_min *= MICROWATT_PER_MILLIWATT;
-	power_min = dtpm->power_min - power_min;
-
-	power_max = em->table[em->nr_perf_states - 1].power;
-	power_max *= MICROWATT_PER_MILLIWATT;
-	power_max = dtpm->power_max - power_max;
-
-	return dtpm_update_power(dtpm, power_min, power_max);
-}
-
 static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
 {
 	struct dtpm_cpu *dtpm_cpu = dtpm->private;
-	struct em_perf_domain *pd;
+	struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
 	struct cpumask cpus;
 	unsigned long freq;
 	u64 power;
 	int i, nr_cpus;
 
-	pd = em_cpu_get(dtpm_cpu->cpu);
-
 	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
-
 	nr_cpus = cpumask_weight(&cpus);
 
 	for (i = 0; i < pd->nr_perf_states; i++) {
@@ -113,6 +72,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
 
 	pd = em_cpu_get(dtpm_cpu->cpu);
 	freq = cpufreq_quick_get(dtpm_cpu->cpu);
+
 	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
 	nr_cpus = cpumask_weight(&cpus);
 
@@ -128,6 +88,27 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
 	return 0;
 }
 
+static int upt_pd_power_uw(struct dtpm *dtpm)
+{
+	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
+	struct cpumask cpus;
+	int nr_cpus;
+
+	cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
+	nr_cpus = cpumask_weight(&cpus);
+
+	dtpm->power_min = em->table[0].power;
+	dtpm->power_min *= MICROWATT_PER_MILLIWATT;
+	dtpm->power_min *= nr_cpus;
+
+	dtpm->power_max = em->table[em->nr_perf_states - 1].power;
+	dtpm->power_max *= MICROWATT_PER_MILLIWATT;
+	dtpm->power_max *= nr_cpus;
+
+	return 0;
+}
+
 static void pd_release(struct dtpm *dtpm)
 {
 	struct dtpm_cpu *dtpm_cpu = dtpm->private;
@@ -141,37 +122,25 @@ static void pd_release(struct dtpm *dtpm)
 static struct dtpm_ops dtpm_ops = {
 	.set_power_uw = set_pd_power_limit,
 	.get_power_uw = get_pd_power_uw,
+	.upt_power_uw = upt_pd_power_uw,
 	.release = pd_release,
 };
 
 static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
 {
-	struct cpufreq_policy *policy;
+	struct cpumask cpus;
 	struct em_perf_domain *pd;
 	struct dtpm *dtpm;
 
-	policy = cpufreq_cpu_get(cpu);
-
-	if (!policy)
-		return 0;
-
 	pd = em_cpu_get(cpu);
 	if (!pd)
 		return -EINVAL;
 
-	dtpm = per_cpu(dtpm_per_cpu, cpu);
-
-	power_sub(dtpm, pd);
-
-	if (cpumask_weight(policy->cpus) != 1)
-		return 0;
-
-	for_each_cpu(cpu, policy->related_cpus)
-		per_cpu(dtpm_per_cpu, cpu) = NULL;
+	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
 
-	dtpm_unregister(dtpm);
+	dtpm = per_cpu(dtpm_per_cpu, cpu);
 
-	return 0;
+	return dtpm_update_power(dtpm);
 }
 
 static int cpuhp_dtpm_cpu_online(unsigned int cpu)
@@ -184,7 +153,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	int ret = -ENOMEM;
 
 	policy = cpufreq_cpu_get(cpu);
-
 	if (!policy)
 		return 0;
 
@@ -194,7 +162,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 
 	dtpm = per_cpu(dtpm_per_cpu, cpu);
 	if (dtpm)
-		return power_add(dtpm, pd);
+		return dtpm_update_power(dtpm);
 
 	dtpm = dtpm_alloc(&dtpm_ops);
 	if (!dtpm)
@@ -210,27 +178,20 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	for_each_cpu(cpu, policy->related_cpus)
 		per_cpu(dtpm_per_cpu, cpu) = dtpm;
 
-	sprintf(name, "cpu%d", dtpm_cpu->cpu);
+	sprintf(name, "cpu%d-cpufreq", dtpm_cpu->cpu);
 
-	ret = dtpm_register(name, dtpm, __parent);
+	ret = dtpm_register(name, dtpm, NULL);
 	if (ret)
 		goto out_kfree_dtpm_cpu;
 
-	ret = power_add(dtpm, pd);
-	if (ret)
-		goto out_dtpm_unregister;
-
 	ret = freq_qos_add_request(&policy->constraints,
 				   &dtpm_cpu->qos_req, FREQ_QOS_MAX,
 				   pd->table[pd->nr_perf_states - 1].frequency);
 	if (ret)
-		goto out_power_sub;
+		goto out_dtpm_unregister;
 
 	return 0;
 
-out_power_sub:
-	power_sub(dtpm, pd);
-
 out_dtpm_unregister:
 	dtpm_unregister(dtpm);
 	dtpm_cpu = NULL;
@@ -248,10 +209,17 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 
 int dtpm_register_cpu(struct dtpm *parent)
 {
-	__parent = parent;
+	int ret;
 
-	return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE,
-				 "dtpm_cpu:online",
-				 cpuhp_dtpm_cpu_online,
-				 cpuhp_dtpm_cpu_offline);
+	ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline",
+				NULL, cpuhp_dtpm_cpu_offline);
+	if (ret < 0)
+		return ret;
+
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online",
+				cpuhp_dtpm_cpu_online, NULL);
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index ee09a39627d6..fcb2967fb5ba 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -61,6 +61,7 @@ enum cpuhp_state {
 	CPUHP_LUSTRE_CFS_DEAD,
 	CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
 	CPUHP_PADATA_DEAD,
+	CPUHP_AP_DTPM_CPU_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
@@ -193,7 +194,6 @@ enum cpuhp_state {
 	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
 	CPUHP_AP_X86_HPET_ONLINE,
 	CPUHP_AP_X86_KVM_CLK_ONLINE,
-	CPUHP_AP_DTPM_CPU_ONLINE,
 	CPUHP_AP_ACTIVE,
 	CPUHP_ONLINE,
 };
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index e80a332e3d8a..d29be6a0e513 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -29,6 +29,7 @@ struct dtpm {
 struct dtpm_ops {
 	u64 (*set_power_uw)(struct dtpm *, u64);
 	u64 (*get_power_uw)(struct dtpm *);
+	int (*upt_power_uw)(struct dtpm *);
 	void (*release)(struct dtpm *);
 };
 
@@ -62,7 +63,7 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
 	return container_of(zone, struct dtpm, zone);
 }
 
-int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max);
+int dtpm_update_power(struct dtpm *dtpm);
 
 int dtpm_release_zone(struct powercap_zone *pcz);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/5] powercap/drivers/dtpm: Create a registering system
  2021-03-01 21:21 [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
@ 2021-03-01 21:21 ` Daniel Lezcano
  2021-03-09 14:46   ` Lukasz Luba
  2021-03-01 21:21 ` [PATCH 3/5] powercap/drivers/dtpm: Simplify the dtpm table Daniel Lezcano
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Daniel Lezcano @ 2021-03-01 21:21 UTC (permalink / raw)
  To: daniel.lezcano, rafael; +Cc: linux-kernel, linux-pm

A SoC can be differently structured depending on the platform and the
kernel can not be aware of all the combinations, as well as the
specific tweaks for a particular board.

The creation of the hierarchy must be delegated to userspace.

These changes provide a registering mechanism where the different
subsystems will initialize their dtpm backends and register with a
name the dtpm node in a list.

The next changes will provide an userspace interface to create
hierachically the different nodes. Those will be created by name and
found via the list filled by the different subsystem.

If a specified name is not found in the list, it is assumed to be a
virtual node which will have children and the default is to allocate
such node.

When the node register in the list, the function will be dtpm_register
where the previous semantic was to create the node. Thus, the
functions are renamed to reflect their purpose.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/powercap/dtpm.c     | 158 ++++++++++++++++++++++++++++++++++--
 drivers/powercap/dtpm_cpu.c |   4 +-
 include/linux/dtpm.h        |  12 ++-
 3 files changed, 161 insertions(+), 13 deletions(-)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 1085dccf9c58..20728a28ff0d 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -20,6 +20,7 @@
 #include <linux/dtpm.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/powercap.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
@@ -34,6 +35,14 @@ static DEFINE_MUTEX(dtpm_lock);
 static struct powercap_control_type *pct;
 static struct dtpm *root;
 
+struct dtpm_node {
+	const char *name;
+	struct dtpm *dtpm;
+	struct list_head node;
+};
+
+static LIST_HEAD(dtpm_list);
+
 static int get_time_window_us(struct powercap_zone *pcz, int cid, u64 *window)
 {
 	return -ENOSYS;
@@ -152,6 +161,135 @@ static int __dtpm_update_power(struct dtpm *dtpm)
 	return ret;
 }
 
+static struct dtpm *__dtpm_lookup(const char *name)
+{
+	struct dtpm_node *node;
+
+	list_for_each_entry(node, &dtpm_list, node) {
+		if (!strcmp(name, node->name))
+			return node->dtpm;
+	}
+
+	return NULL;
+}
+
+/**
+ * dtpm_get - Get a reference to a dtpm structure
+ * @name: the name of the dtpm device
+ *
+ * The function looks up in the list of the registered dtpm
+ * devices. If the dtpm device is not found, a virtual one is
+ * allocated. This function must be called to create a dtpm node in
+ * the powercap hierarchy.
+ *
+ * Return: a pointer to a dtpm structure, NULL if there is not enough
+ * memory
+ */
+struct dtpm *dtpm_get(const char *name)
+{
+	struct dtpm *dtpm;
+
+	mutex_lock(&dtpm_lock);
+	dtpm = __dtpm_lookup(name);
+	if (!dtpm)
+		dtpm = dtpm_alloc(NULL);
+	else
+		kref_get(&dtpm->kref);
+	mutex_unlock(&dtpm_lock);
+
+	return dtpm;
+}
+
+static void dtpm_release(struct kref *kref)
+{
+	struct dtpm *dtpm = container_of(kref, struct dtpm, kref);
+
+	kfree(dtpm);
+}
+
+/**
+ * dtpm_put - Release a reference on a dtpm device
+ * @dtpm: a pointer to a dtpm structure
+ *
+ * Release the reference on the specified dtpm device. The last
+ * reference leads to a memory release.
+ */
+void dtpm_put(struct dtpm *dtpm)
+{
+	kref_put(&dtpm->kref, dtpm_release);
+}
+
+/**
+ * dtpm_register - Register the dtpm in the dtpm list
+ * @name: a name used as an identifier
+ * @dtpm: the dtpm node to be registered
+ *
+ * Stores the dtpm device in a list.
+ *
+ * Return: 0 on success, -EEXIST if the device name is already present
+ * in the list, -ENOMEM in case of memory allocation failure.
+ */
+int dtpm_register(const char *name, struct dtpm *dtpm)
+{
+	struct dtpm_node *node;
+
+	mutex_lock(&dtpm_lock);
+
+	if (__dtpm_lookup(name)) {
+		mutex_unlock(&dtpm_lock);
+		return -EEXIST;
+	}
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	node->name = kstrdup(name, GFP_KERNEL);
+	if (!node->name) {
+		kfree(node);
+		return -ENOMEM;
+	}
+
+	node->dtpm = dtpm;
+
+	list_add(&node->node, &dtpm_list);
+
+	pr_info("Registered %s\n", name);
+
+	mutex_unlock(&dtpm_lock);
+
+	return 0;
+}
+
+/**
+ * dtpm_unregister - Remove the dtpm device from the list
+ * @name: the dtpm device name to be removed
+ *
+ * Remove the dtpm device from the list of the registered devices.
+ */
+void dtpm_unregister(const char *name)
+{
+	struct dtpm_node *node;
+
+	mutex_lock(&dtpm_lock);
+
+	list_for_each_entry(node, &dtpm_list, node) {
+
+		if (strcmp(name, node->name))
+			continue;
+
+		list_del(&node->node);
+		kfree(node->name);
+		kfree(node);
+
+		pr_info("Unregistered %s\n", name);
+
+		break;
+	}
+
+	mutex_unlock(&dtpm_lock);
+}
+
 /**
  * dtpm_update_power - Update the power on the dtpm
  * @dtpm: a pointer to a dtpm structure to update
@@ -208,7 +346,7 @@ int dtpm_release_zone(struct powercap_zone *pcz)
 	if (root == dtpm)
 		root = NULL;
 
-	kfree(dtpm);
+	dtpm_put(dtpm);
 
 	return 0;
 }
@@ -370,6 +508,7 @@ struct dtpm *dtpm_alloc(struct dtpm_ops *ops)
 	if (dtpm) {
 		INIT_LIST_HEAD(&dtpm->children);
 		INIT_LIST_HEAD(&dtpm->sibling);
+		kref_init(&dtpm->kref);
 		dtpm->weight = 1024;
 		dtpm->ops = ops;
 	}
@@ -378,28 +517,29 @@ struct dtpm *dtpm_alloc(struct dtpm_ops *ops)
 }
 
 /**
- * dtpm_unregister - Unregister a dtpm node from the hierarchy tree
- * @dtpm: a pointer to a dtpm structure corresponding to the node to be removed
+ * dtpm_destroy - Destroy a dtpm node from the hierarchy tree
+ * @dtpm: a pointer to a dtpm structure corresponding to the node to be
+ *	  removed and destroyed
  *
  * Call the underlying powercap unregister function. That will call
  * the release callback of the powercap zone.
  */
-void dtpm_unregister(struct dtpm *dtpm)
+void dtpm_destroy(struct dtpm *dtpm)
 {
 	powercap_unregister_zone(pct, &dtpm->zone);
 
-	pr_info("Unregistered dtpm node '%s'\n", dtpm->zone.name);
+	pr_info("Destroyed dtpm node '%s'\n", dtpm->zone.name);
 }
 
 /**
- * dtpm_register - Register a dtpm node in the hierarchy tree
+ * dtpm_create - Create a dtpm node in the hierarchy tree
  * @name: a string specifying the name of the node
  * @dtpm: a pointer to a dtpm structure corresponding to the new node
  * @parent: a pointer to a dtpm structure corresponding to the parent node
  *
  * Create a dtpm node in the tree. If no parent is specified, the node
  * is the root node of the hierarchy. If the root node already exists,
- * then the registration will fail. The powercap controller must be
+ * then the creation will fail. The powercap controller must be
  * initialized before calling this function.
  *
  * The dtpm structure must be initialized with the power numbers
@@ -413,7 +553,7 @@ void dtpm_unregister(struct dtpm *dtpm)
  *           * parent have ops which are reserved for leaves
  *   Other negative values are reported back from the powercap framework
  */
-int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
+int dtpm_create(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 {
 	struct powercap_zone *pcz;
 
@@ -457,7 +597,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 	if (dtpm->ops && !dtpm->ops->upt_power_uw(dtpm))
 		__dtpm_add_power(dtpm);
 
-	pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
+	pr_info("Created dtpm node '%s' / %llu-%llu uW, \n",
 		dtpm->zone.name, dtpm->power_min, dtpm->power_max);
 
 	mutex_unlock(&dtpm_lock);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index aff79c649345..1a10537c4434 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -180,7 +180,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 
 	sprintf(name, "cpu%d-cpufreq", dtpm_cpu->cpu);
 
-	ret = dtpm_register(name, dtpm, NULL);
+	ret = dtpm_register(name, dtpm);
 	if (ret)
 		goto out_kfree_dtpm_cpu;
 
@@ -193,7 +193,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	return 0;
 
 out_dtpm_unregister:
-	dtpm_unregister(dtpm);
+	dtpm_unregister(name);
 	dtpm_cpu = NULL;
 	dtpm = NULL;
 
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index d29be6a0e513..447ea6c60b59 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -14,6 +14,7 @@
 
 struct dtpm {
 	struct powercap_zone zone;
+	struct kref kref;
 	struct dtpm *parent;
 	struct list_head sibling;
 	struct list_head children;
@@ -69,10 +70,17 @@ int dtpm_release_zone(struct powercap_zone *pcz);
 
 struct dtpm *dtpm_alloc(struct dtpm_ops *ops);
 
-void dtpm_unregister(struct dtpm *dtpm);
+void dtpm_destroy(struct dtpm *dtpm);
 
-int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
+int dtpm_create(const char *name, struct dtpm *dtpm, struct dtpm *parent);
 
 int dtpm_register_cpu(struct dtpm *parent);
 
+int dtpm_register(const char *name, struct dtpm *dtpm);
+
+void dtpm_unregister(const char *name);
+
+struct dtpm *dtpm_get(const char *name);
+
+void dtpm_put(struct dtpm *dtpm);
 #endif
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/5] powercap/drivers/dtpm: Simplify the dtpm table
  2021-03-01 21:21 [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
  2021-03-01 21:21 ` [PATCH 2/5] powercap/drivers/dtpm: Create a registering system Daniel Lezcano
@ 2021-03-01 21:21 ` Daniel Lezcano
  2021-03-09 15:02   ` Lukasz Luba
  2021-03-01 21:21 ` [PATCH 4/5] powercap/drivers/dtpm: Use container_of instead of a private data field Daniel Lezcano
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Daniel Lezcano @ 2021-03-01 21:21 UTC (permalink / raw)
  To: daniel.lezcano, rafael; +Cc: linux-kernel, linux-pm

The dtpm table is an array of pointers, that forces the user of the
table to define initdata along with the declaration of the table
entry. It is more efficient to create an array of dtpm structure, so
the declaration of the table entry can be done by initializing the
different fields.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/powercap/dtpm.c     |  4 ++--
 drivers/powercap/dtpm_cpu.c |  4 +++-
 include/linux/dtpm.h        | 22 +++++++++-------------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 20728a28ff0d..a1a70dc48f63 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -607,7 +607,7 @@ int dtpm_create(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 
 static int __init dtpm_init(void)
 {
-	struct dtpm_descr **dtpm_descr;
+	struct dtpm_descr *dtpm_descr;
 
 	pct = powercap_register_control_type(NULL, "dtpm", NULL);
 	if (IS_ERR(pct)) {
@@ -616,7 +616,7 @@ static int __init dtpm_init(void)
 	}
 
 	for_each_dtpm_table(dtpm_descr)
-		(*dtpm_descr)->init(*dtpm_descr);
+		dtpm_descr->init();
 
 	return 0;
 }
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 1a10537c4434..c5fe98eeec52 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -207,7 +207,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	return ret;
 }
 
-int dtpm_register_cpu(struct dtpm *parent)
+static int __init dtpm_cpu_init(void)
 {
 	int ret;
 
@@ -223,3 +223,5 @@ int dtpm_register_cpu(struct dtpm *parent)
 
 	return 0;
 }
+
+DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index 447ea6c60b59..8a2dbbc334b9 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -34,25 +34,23 @@ struct dtpm_ops {
 	void (*release)(struct dtpm *);
 };
 
-struct dtpm_descr;
-
-typedef int (*dtpm_init_t)(struct dtpm_descr *);
+typedef int (*dtpm_init_t)(void);
 
 struct dtpm_descr {
-	struct dtpm *parent;
-	const char *name;
 	dtpm_init_t init;
 };
 
 /* Init section thermal table */
-extern struct dtpm_descr *__dtpm_table[];
-extern struct dtpm_descr *__dtpm_table_end[];
+extern struct dtpm_descr __dtpm_table[];
+extern struct dtpm_descr __dtpm_table_end[];
 
-#define DTPM_TABLE_ENTRY(name)			\
-	static typeof(name) *__dtpm_table_entry_##name	\
-	__used __section("__dtpm_table") = &name
+#define DTPM_TABLE_ENTRY(name, __init)				\
+	static struct dtpm_descr __dtpm_table_entry_##name	\
+	__used __section("__dtpm_table") = {			\
+		.init = __init,					\
+	}
 
-#define DTPM_DECLARE(name)	DTPM_TABLE_ENTRY(name)
+#define DTPM_DECLARE(name, init)	DTPM_TABLE_ENTRY(name, init)
 
 #define for_each_dtpm_table(__dtpm)	\
 	for (__dtpm = __dtpm_table;	\
@@ -74,8 +72,6 @@ void dtpm_destroy(struct dtpm *dtpm);
 
 int dtpm_create(const char *name, struct dtpm *dtpm, struct dtpm *parent);
 
-int dtpm_register_cpu(struct dtpm *parent);
-
 int dtpm_register(const char *name, struct dtpm *dtpm);
 
 void dtpm_unregister(const char *name);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/5] powercap/drivers/dtpm: Use container_of instead of a private data field
  2021-03-01 21:21 [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
  2021-03-01 21:21 ` [PATCH 2/5] powercap/drivers/dtpm: Create a registering system Daniel Lezcano
  2021-03-01 21:21 ` [PATCH 3/5] powercap/drivers/dtpm: Simplify the dtpm table Daniel Lezcano
@ 2021-03-01 21:21 ` Daniel Lezcano
  2021-03-09 15:17   ` Lukasz Luba
  2021-03-01 21:21 ` [PATCH 5/5] powercap/drivers/dtpm: Scale the power with the load Daniel Lezcano
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Daniel Lezcano @ 2021-03-01 21:21 UTC (permalink / raw)
  To: daniel.lezcano, rafael; +Cc: linux-kernel, linux-pm

The dtpm framework provides an API to allocate a dtpm node. However
when a backend dtpm driver needs to allocate a dtpm node it must
define its own structure and store the pointer of this structure in
the private field of the dtpm structure.

It is more elegant to use the container_of macro and add the dtpm
structure inside the dtpm backend specific structure. The code will be
able to deal properly with the dtpm structure as a generic entity,
making all this even more self-encapsulated.

The dtpm_alloc() function does no longer make sense as the dtpm
structure will be allocated when allocating the device specific dtpm
structure. The dtpm_init() is provided instead.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/powercap/dtpm.c     | 27 ++++++++++------------
 drivers/powercap/dtpm_cpu.c | 46 ++++++++++++++++++-------------------
 include/linux/dtpm.h        |  3 +--
 3 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index a1a70dc48f63..40157e720263 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -191,10 +191,13 @@ struct dtpm *dtpm_get(const char *name)
 
 	mutex_lock(&dtpm_lock);
 	dtpm = __dtpm_lookup(name);
-	if (!dtpm)
-		dtpm = dtpm_alloc(NULL);
-	else
+	if (!dtpm) {
+		dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
+		if (dtpm)
+			dtpm_init(dtpm, NULL);
+	} else {
 		kref_get(&dtpm->kref);
+	}
 	mutex_unlock(&dtpm_lock);
 
 	return dtpm;
@@ -495,16 +498,12 @@ static struct powercap_zone_ops zone_ops = {
 };
 
 /**
- * dtpm_alloc - Allocate and initialize a dtpm struct
- * @name: a string specifying the name of the node
- *
- * Return: a struct dtpm pointer, NULL in case of error
+ * dtpm_init - Allocate and initialize a dtpm struct
+ * @dtpm: The dtpm struct pointer to be initialized
+ * @ops: The dtpm device specific ops, NULL for a virtual node
  */
-struct dtpm *dtpm_alloc(struct dtpm_ops *ops)
+void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops)
 {
-	struct dtpm *dtpm;
-
-	dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
 	if (dtpm) {
 		INIT_LIST_HEAD(&dtpm->children);
 		INIT_LIST_HEAD(&dtpm->sibling);
@@ -512,8 +511,6 @@ struct dtpm *dtpm_alloc(struct dtpm_ops *ops)
 		dtpm->weight = 1024;
 		dtpm->ops = ops;
 	}
-
-	return dtpm;
 }
 
 /**
@@ -605,7 +602,7 @@ int dtpm_create(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 	return 0;
 }
 
-static int __init dtpm_init(void)
+static int __init init_dtpm(void)
 {
 	struct dtpm_descr *dtpm_descr;
 
@@ -620,4 +617,4 @@ static int __init dtpm_init(void)
 
 	return 0;
 }
-late_initcall(dtpm_init);
+late_initcall(init_dtpm);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index c5fe98eeec52..e728ebd6d0ca 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -25,16 +25,22 @@
 #include <linux/slab.h>
 #include <linux/units.h>
 
-static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
-
 struct dtpm_cpu {
+	struct dtpm dtpm;
 	struct freq_qos_request qos_req;
 	int cpu;
 };
 
+static DEFINE_PER_CPU(struct dtpm_cpu *, dtpm_per_cpu);
+
+static struct dtpm_cpu *to_dtpm_cpu(struct dtpm *dtpm)
+{
+	return container_of(dtpm, struct dtpm_cpu, dtpm);
+}
+
 static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
 {
-	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
 	struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
 	struct cpumask cpus;
 	unsigned long freq;
@@ -64,7 +70,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
 
 static u64 get_pd_power_uw(struct dtpm *dtpm)
 {
-	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
 	struct em_perf_domain *pd;
 	struct cpumask cpus;
 	unsigned long freq;
@@ -90,7 +96,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
 
 static int upt_pd_power_uw(struct dtpm *dtpm)
 {
-	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
 	struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
 	struct cpumask cpus;
 	int nr_cpus;
@@ -111,7 +117,7 @@ static int upt_pd_power_uw(struct dtpm *dtpm)
 
 static void pd_release(struct dtpm *dtpm)
 {
-	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
 
 	if (freq_qos_request_active(&dtpm_cpu->qos_req))
 		freq_qos_remove_request(&dtpm_cpu->qos_req);
@@ -130,7 +136,7 @@ static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
 {
 	struct cpumask cpus;
 	struct em_perf_domain *pd;
-	struct dtpm *dtpm;
+	struct dtpm_cpu *dtpm_cpu;
 
 	pd = em_cpu_get(cpu);
 	if (!pd)
@@ -138,14 +144,13 @@ static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
 
 	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
 
-	dtpm = per_cpu(dtpm_per_cpu, cpu);
+	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
 
-	return dtpm_update_power(dtpm);
+	return dtpm_update_power(&dtpm_cpu->dtpm);
 }
 
 static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 {
-	struct dtpm *dtpm;
 	struct dtpm_cpu *dtpm_cpu;
 	struct cpufreq_policy *policy;
 	struct em_perf_domain *pd;
@@ -160,27 +165,23 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	if (!pd)
 		return -EINVAL;
 
-	dtpm = per_cpu(dtpm_per_cpu, cpu);
-	if (dtpm)
-		return dtpm_update_power(dtpm);
-
-	dtpm = dtpm_alloc(&dtpm_ops);
-	if (!dtpm)
-		return -EINVAL;
+	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+	if (dtpm_cpu)
+		return dtpm_update_power(&dtpm_cpu->dtpm);
 
 	dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
 	if (!dtpm_cpu)
-		goto out_kfree_dtpm;
+		return -ENOMEM;
 
-	dtpm->private = dtpm_cpu;
+	dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops);
 	dtpm_cpu->cpu = cpu;
 
 	for_each_cpu(cpu, policy->related_cpus)
-		per_cpu(dtpm_per_cpu, cpu) = dtpm;
+		per_cpu(dtpm_per_cpu, cpu) = dtpm_cpu;
 
 	sprintf(name, "cpu%d-cpufreq", dtpm_cpu->cpu);
 
-	ret = dtpm_register(name, dtpm);
+	ret = dtpm_register(name, &dtpm_cpu->dtpm);
 	if (ret)
 		goto out_kfree_dtpm_cpu;
 
@@ -195,15 +196,12 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 out_dtpm_unregister:
 	dtpm_unregister(name);
 	dtpm_cpu = NULL;
-	dtpm = NULL;
 
 out_kfree_dtpm_cpu:
 	for_each_cpu(cpu, policy->related_cpus)
 		per_cpu(dtpm_per_cpu, cpu) = NULL;
 	kfree(dtpm_cpu);
 
-out_kfree_dtpm:
-	kfree(dtpm);
 	return ret;
 }
 
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index 8a2dbbc334b9..bebb1c9a94c7 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -24,7 +24,6 @@ struct dtpm {
 	u64 power_max;
 	u64 power_min;
 	int weight;
-	void *private;
 };
 
 struct dtpm_ops {
@@ -66,7 +65,7 @@ int dtpm_update_power(struct dtpm *dtpm);
 
 int dtpm_release_zone(struct powercap_zone *pcz);
 
-struct dtpm *dtpm_alloc(struct dtpm_ops *ops);
+void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops);
 
 void dtpm_destroy(struct dtpm *dtpm);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 5/5] powercap/drivers/dtpm: Scale the power with the load
  2021-03-01 21:21 [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
                   ` (2 preceding siblings ...)
  2021-03-01 21:21 ` [PATCH 4/5] powercap/drivers/dtpm: Use container_of instead of a private data field Daniel Lezcano
@ 2021-03-01 21:21 ` Daniel Lezcano
  2021-03-09 10:01   ` Lukasz Luba
  2021-03-08 19:31 ` [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
  2021-03-09 14:02 ` Lukasz Luba
  5 siblings, 1 reply; 16+ messages in thread
From: Daniel Lezcano @ 2021-03-01 21:21 UTC (permalink / raw)
  To: daniel.lezcano, rafael; +Cc: linux-kernel, linux-pm

Currently the power consumption is based on the current OPP power
assuming the entire performance domain is fully loaded.

That gives very gross power estimation and we can do much better by
using the load to scale the power consumption.

Use the utilization to normalize and scale the power usage over the
max possible power.

Tested on a rock960 with 2 big CPUS, the power consumption estimation
conforms with the expected one.

Before this change:

~$ ~/dhrystone -t 1 -l 10000&
~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw
2260000

After this change:

~$ ~/dhrystone -t 1 -l 10000&
~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw
1130000

~$ ~/dhrystone -t 2 -l 10000&
~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw
2260000

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/powercap/dtpm_cpu.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index e728ebd6d0ca..8379b96468ef 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -68,27 +68,40 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
 	return power_limit;
 }
 
+static u64 scale_pd_power_uw(struct cpumask *cpus, u64 power)
+{
+	unsigned long max, util;
+	int cpu, load = 0;
+
+	for_each_cpu(cpu, cpus) {
+		max = arch_scale_cpu_capacity(cpu);
+		util = sched_cpu_util(cpu, max);
+		load += ((util * 100) / max);
+	}
+
+	return (power * load) / 100;
+}
+
 static u64 get_pd_power_uw(struct dtpm *dtpm)
 {
 	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
 	struct em_perf_domain *pd;
 	struct cpumask cpus;
 	unsigned long freq;
-	int i, nr_cpus;
+	int i;
 
 	pd = em_cpu_get(dtpm_cpu->cpu);
 	freq = cpufreq_quick_get(dtpm_cpu->cpu);
 
 	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
-	nr_cpus = cpumask_weight(&cpus);
 
 	for (i = 0; i < pd->nr_perf_states; i++) {
 
 		if (pd->table[i].frequency < freq)
 			continue;
 
-		return pd->table[i].power *
-			MICROWATT_PER_MILLIWATT * nr_cpus;
+		return scale_pd_power_uw(&cpus, pd->table[i].power *
+					 MICROWATT_PER_MILLIWATT);
 	}
 
 	return 0;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code
  2021-03-01 21:21 [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
                   ` (3 preceding siblings ...)
  2021-03-01 21:21 ` [PATCH 5/5] powercap/drivers/dtpm: Scale the power with the load Daniel Lezcano
@ 2021-03-08 19:31 ` Daniel Lezcano
  2021-03-08 19:55   ` Lukasz Luba
  2021-03-09 14:02 ` Lukasz Luba
  5 siblings, 1 reply; 16+ messages in thread
From: Daniel Lezcano @ 2021-03-08 19:31 UTC (permalink / raw)
  To: rafael; +Cc: linux-kernel, linux-pm, Lukasz Luba


On 01/03/2021 22:21, Daniel Lezcano wrote:
> In order to increase the self-encapsulation of the dtpm generic code,
> the following changes are adding a power update ops to the dtpm
> ops. That allows the generic code to call directly the dtpm backend
> function to update the power values.
> 
> The power update function does compute the power characteristics when
> the function is invoked. In the case of the CPUs, the power
> consumption depends on the number of online CPUs. The online CPUs mask
> is not up to date at CPUHP_AP_ONLINE_DYN state in the tear down
> callback. That is the reason why the online / offline are at separate
> state. As there is already an existing state for DTPM, this one is
> only moved to the DEAD state, so there is no addition of new state
> with these changes.
> 
> That simplifies the code for the next changes and results in a more
> self-encapsulated code.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

Is there any comment on this series ?

> ---
>  drivers/powercap/dtpm.c     |  54 ++++++++--------
>  drivers/powercap/dtpm_cpu.c | 124 +++++++++++++-----------------------
>  include/linux/cpuhotplug.h  |   2 +-
>  include/linux/dtpm.h        |   3 +-
>  4 files changed, 76 insertions(+), 107 deletions(-)
> 
> diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
> index c2185ec5f887..1085dccf9c58 100644
> --- a/drivers/powercap/dtpm.c
> +++ b/drivers/powercap/dtpm.c
> @@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm)
>  		parent->power_limit -= dtpm->power_limit;
>  		parent = parent->parent;
>  	}
> -
> -	__dtpm_rebalance_weight(root);
>  }
>  
>  static void __dtpm_add_power(struct dtpm *dtpm)
> @@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm)
>  		parent->power_limit += dtpm->power_limit;
>  		parent = parent->parent;
>  	}
> +}
> +
> +static int __dtpm_update_power(struct dtpm *dtpm)
> +{
> +	int ret;
> +
> +	__dtpm_sub_power(dtpm);
>  
> -	__dtpm_rebalance_weight(root);
> +	ret = dtpm->ops->upt_power_uw(dtpm);
> +	if (ret)
> +		pr_err("Failed to update power for '%s': %d\n",
> +		       dtpm->zone.name, ret);
> +
> +	if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
> +		dtpm->power_limit = dtpm->power_max;
> +
> +	__dtpm_add_power(dtpm);
> +
> +	if (root)
> +		__dtpm_rebalance_weight(root);
> +
> +	return ret;
>  }
>  
>  /**
>   * dtpm_update_power - Update the power on the dtpm
>   * @dtpm: a pointer to a dtpm structure to update
> - * @power_min: a u64 representing the new power_min value
> - * @power_max: a u64 representing the new power_max value
>   *
>   * Function to update the power values of the dtpm node specified in
>   * parameter. These new values will be propagated to the tree.
>   *
>   * Return: zero on success, -EINVAL if the values are inconsistent
>   */
> -int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max)
> +int dtpm_update_power(struct dtpm *dtpm)
>  {
> -	int ret = 0;
> +	int ret;
>  
>  	mutex_lock(&dtpm_lock);
> -
> -	if (power_min == dtpm->power_min && power_max == dtpm->power_max)
> -		goto unlock;
> -
> -	if (power_max < power_min) {
> -		ret = -EINVAL;
> -		goto unlock;
> -	}
> -
> -	__dtpm_sub_power(dtpm);
> -
> -	dtpm->power_min = power_min;
> -	dtpm->power_max = power_max;
> -	if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
> -		dtpm->power_limit = power_max;
> -
> -	__dtpm_add_power(dtpm);
> -
> -unlock:
> +	ret = __dtpm_update_power(dtpm);
>  	mutex_unlock(&dtpm_lock);
>  
>  	return ret;
> @@ -436,6 +434,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
>  
>  	if (dtpm->ops && !(dtpm->ops->set_power_uw &&
>  			   dtpm->ops->get_power_uw &&
> +			   dtpm->ops->upt_power_uw &&
>  			   dtpm->ops->release))
>  		return -EINVAL;
>  
> @@ -455,7 +454,8 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
>  		root = dtpm;
>  	}
>  
> -	__dtpm_add_power(dtpm);
> +	if (dtpm->ops && !dtpm->ops->upt_power_uw(dtpm))
> +		__dtpm_add_power(dtpm);
>  
>  	pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
>  		dtpm->zone.name, dtpm->power_min, dtpm->power_max);
> diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
> index 51c366938acd..aff79c649345 100644
> --- a/drivers/powercap/dtpm_cpu.c
> +++ b/drivers/powercap/dtpm_cpu.c
> @@ -14,6 +14,8 @@
>   * The CPU hotplug is supported and the power numbers will be updated
>   * if a CPU is hot plugged / unplugged.
>   */
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
>  #include <linux/cpumask.h>
>  #include <linux/cpufreq.h>
>  #include <linux/cpuhotplug.h>
> @@ -23,8 +25,6 @@
>  #include <linux/slab.h>
>  #include <linux/units.h>
>  
> -static struct dtpm *__parent;
> -
>  static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
>  
>  struct dtpm_cpu {
> @@ -32,57 +32,16 @@ struct dtpm_cpu {
>  	int cpu;
>  };
>  
> -/*
> - * When a new CPU is inserted at hotplug or boot time, add the power
> - * contribution and update the dtpm tree.
> - */
> -static int power_add(struct dtpm *dtpm, struct em_perf_domain *em)
> -{
> -	u64 power_min, power_max;
> -
> -	power_min = em->table[0].power;
> -	power_min *= MICROWATT_PER_MILLIWATT;
> -	power_min += dtpm->power_min;
> -
> -	power_max = em->table[em->nr_perf_states - 1].power;
> -	power_max *= MICROWATT_PER_MILLIWATT;
> -	power_max += dtpm->power_max;
> -
> -	return dtpm_update_power(dtpm, power_min, power_max);
> -}
> -
> -/*
> - * When a CPU is unplugged, remove its power contribution from the
> - * dtpm tree.
> - */
> -static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em)
> -{
> -	u64 power_min, power_max;
> -
> -	power_min = em->table[0].power;
> -	power_min *= MICROWATT_PER_MILLIWATT;
> -	power_min = dtpm->power_min - power_min;
> -
> -	power_max = em->table[em->nr_perf_states - 1].power;
> -	power_max *= MICROWATT_PER_MILLIWATT;
> -	power_max = dtpm->power_max - power_max;
> -
> -	return dtpm_update_power(dtpm, power_min, power_max);
> -}
> -
>  static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
>  {
>  	struct dtpm_cpu *dtpm_cpu = dtpm->private;
> -	struct em_perf_domain *pd;
> +	struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
>  	struct cpumask cpus;
>  	unsigned long freq;
>  	u64 power;
>  	int i, nr_cpus;
>  
> -	pd = em_cpu_get(dtpm_cpu->cpu);
> -
>  	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
> -
>  	nr_cpus = cpumask_weight(&cpus);
>  
>  	for (i = 0; i < pd->nr_perf_states; i++) {
> @@ -113,6 +72,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
>  
>  	pd = em_cpu_get(dtpm_cpu->cpu);
>  	freq = cpufreq_quick_get(dtpm_cpu->cpu);
> +
>  	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
>  	nr_cpus = cpumask_weight(&cpus);
>  
> @@ -128,6 +88,27 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
>  	return 0;
>  }
>  
> +static int upt_pd_power_uw(struct dtpm *dtpm)
> +{
> +	struct dtpm_cpu *dtpm_cpu = dtpm->private;
> +	struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
> +	struct cpumask cpus;
> +	int nr_cpus;
> +
> +	cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
> +	nr_cpus = cpumask_weight(&cpus);
> +
> +	dtpm->power_min = em->table[0].power;
> +	dtpm->power_min *= MICROWATT_PER_MILLIWATT;
> +	dtpm->power_min *= nr_cpus;
> +
> +	dtpm->power_max = em->table[em->nr_perf_states - 1].power;
> +	dtpm->power_max *= MICROWATT_PER_MILLIWATT;
> +	dtpm->power_max *= nr_cpus;
> +
> +	return 0;
> +}
> +
>  static void pd_release(struct dtpm *dtpm)
>  {
>  	struct dtpm_cpu *dtpm_cpu = dtpm->private;
> @@ -141,37 +122,25 @@ static void pd_release(struct dtpm *dtpm)
>  static struct dtpm_ops dtpm_ops = {
>  	.set_power_uw = set_pd_power_limit,
>  	.get_power_uw = get_pd_power_uw,
> +	.upt_power_uw = upt_pd_power_uw,
>  	.release = pd_release,
>  };
>  
>  static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
>  {
> -	struct cpufreq_policy *policy;
> +	struct cpumask cpus;
>  	struct em_perf_domain *pd;
>  	struct dtpm *dtpm;
>  
> -	policy = cpufreq_cpu_get(cpu);
> -
> -	if (!policy)
> -		return 0;
> -
>  	pd = em_cpu_get(cpu);
>  	if (!pd)
>  		return -EINVAL;
>  
> -	dtpm = per_cpu(dtpm_per_cpu, cpu);
> -
> -	power_sub(dtpm, pd);
> -
> -	if (cpumask_weight(policy->cpus) != 1)
> -		return 0;
> -
> -	for_each_cpu(cpu, policy->related_cpus)
> -		per_cpu(dtpm_per_cpu, cpu) = NULL;
> +	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
>  
> -	dtpm_unregister(dtpm);
> +	dtpm = per_cpu(dtpm_per_cpu, cpu);
>  
> -	return 0;
> +	return dtpm_update_power(dtpm);
>  }
>  
>  static int cpuhp_dtpm_cpu_online(unsigned int cpu)
> @@ -184,7 +153,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>  	int ret = -ENOMEM;
>  
>  	policy = cpufreq_cpu_get(cpu);
> -
>  	if (!policy)
>  		return 0;
>  
> @@ -194,7 +162,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>  
>  	dtpm = per_cpu(dtpm_per_cpu, cpu);
>  	if (dtpm)
> -		return power_add(dtpm, pd);
> +		return dtpm_update_power(dtpm);
>  
>  	dtpm = dtpm_alloc(&dtpm_ops);
>  	if (!dtpm)
> @@ -210,27 +178,20 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>  	for_each_cpu(cpu, policy->related_cpus)
>  		per_cpu(dtpm_per_cpu, cpu) = dtpm;
>  
> -	sprintf(name, "cpu%d", dtpm_cpu->cpu);
> +	sprintf(name, "cpu%d-cpufreq", dtpm_cpu->cpu);
>  
> -	ret = dtpm_register(name, dtpm, __parent);
> +	ret = dtpm_register(name, dtpm, NULL);
>  	if (ret)
>  		goto out_kfree_dtpm_cpu;
>  
> -	ret = power_add(dtpm, pd);
> -	if (ret)
> -		goto out_dtpm_unregister;
> -
>  	ret = freq_qos_add_request(&policy->constraints,
>  				   &dtpm_cpu->qos_req, FREQ_QOS_MAX,
>  				   pd->table[pd->nr_perf_states - 1].frequency);
>  	if (ret)
> -		goto out_power_sub;
> +		goto out_dtpm_unregister;
>  
>  	return 0;
>  
> -out_power_sub:
> -	power_sub(dtpm, pd);
> -
>  out_dtpm_unregister:
>  	dtpm_unregister(dtpm);
>  	dtpm_cpu = NULL;
> @@ -248,10 +209,17 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>  
>  int dtpm_register_cpu(struct dtpm *parent)
>  {
> -	__parent = parent;
> +	int ret;
>  
> -	return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE,
> -				 "dtpm_cpu:online",
> -				 cpuhp_dtpm_cpu_online,
> -				 cpuhp_dtpm_cpu_offline);
> +	ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline",
> +				NULL, cpuhp_dtpm_cpu_offline);
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online",
> +				cpuhp_dtpm_cpu_online, NULL);
> +	if (ret < 0)
> +		return ret;
> +
> +	return 0;
>  }
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index ee09a39627d6..fcb2967fb5ba 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -61,6 +61,7 @@ enum cpuhp_state {
>  	CPUHP_LUSTRE_CFS_DEAD,
>  	CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
>  	CPUHP_PADATA_DEAD,
> +	CPUHP_AP_DTPM_CPU_DEAD,
>  	CPUHP_WORKQUEUE_PREP,
>  	CPUHP_POWER_NUMA_PREPARE,
>  	CPUHP_HRTIMERS_PREPARE,
> @@ -193,7 +194,6 @@ enum cpuhp_state {
>  	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
>  	CPUHP_AP_X86_HPET_ONLINE,
>  	CPUHP_AP_X86_KVM_CLK_ONLINE,
> -	CPUHP_AP_DTPM_CPU_ONLINE,
>  	CPUHP_AP_ACTIVE,
>  	CPUHP_ONLINE,
>  };
> diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
> index e80a332e3d8a..d29be6a0e513 100644
> --- a/include/linux/dtpm.h
> +++ b/include/linux/dtpm.h
> @@ -29,6 +29,7 @@ struct dtpm {
>  struct dtpm_ops {
>  	u64 (*set_power_uw)(struct dtpm *, u64);
>  	u64 (*get_power_uw)(struct dtpm *);
> +	int (*upt_power_uw)(struct dtpm *);
>  	void (*release)(struct dtpm *);
>  };
>  
> @@ -62,7 +63,7 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
>  	return container_of(zone, struct dtpm, zone);
>  }
>  
> -int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max);
> +int dtpm_update_power(struct dtpm *dtpm);
>  
>  int dtpm_release_zone(struct powercap_zone *pcz);
>  
> 


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code
  2021-03-08 19:31 ` [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
@ 2021-03-08 19:55   ` Lukasz Luba
  2021-03-08 21:20     ` Daniel Lezcano
  0 siblings, 1 reply; 16+ messages in thread
From: Lukasz Luba @ 2021-03-08 19:55 UTC (permalink / raw)
  To: Daniel Lezcano, rafael; +Cc: linux-kernel, linux-pm

Hi Daniel,

On 3/8/21 7:31 PM, Daniel Lezcano wrote:
> 
> On 01/03/2021 22:21, Daniel Lezcano wrote:
>> In order to increase the self-encapsulation of the dtpm generic code,
>> the following changes are adding a power update ops to the dtpm
>> ops. That allows the generic code to call directly the dtpm backend
>> function to update the power values.
>>
>> The power update function does compute the power characteristics when
>> the function is invoked. In the case of the CPUs, the power
>> consumption depends on the number of online CPUs. The online CPUs mask
>> is not up to date at CPUHP_AP_ONLINE_DYN state in the tear down
>> callback. That is the reason why the online / offline are at separate
>> state. As there is already an existing state for DTPM, this one is
>> only moved to the DEAD state, so there is no addition of new state
>> with these changes.
>>
>> That simplifies the code for the next changes and results in a more
>> self-encapsulated code.
>>
>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> 
> Is there any comment on this series ?

If you can wait 1 day, I will review it tomorrow...
I was quite busy recently and put it at the end of my list.

Regards,
Lukasz

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code
  2021-03-08 19:55   ` Lukasz Luba
@ 2021-03-08 21:20     ` Daniel Lezcano
  0 siblings, 0 replies; 16+ messages in thread
From: Daniel Lezcano @ 2021-03-08 21:20 UTC (permalink / raw)
  To: Lukasz Luba, rafael; +Cc: linux-kernel, linux-pm

On 08/03/2021 20:55, Lukasz Luba wrote:
> Hi Daniel,
> 
> On 3/8/21 7:31 PM, Daniel Lezcano wrote:
>>
>> On 01/03/2021 22:21, Daniel Lezcano wrote:
>>> In order to increase the self-encapsulation of the dtpm generic code,
>>> the following changes are adding a power update ops to the dtpm
>>> ops. That allows the generic code to call directly the dtpm backend
>>> function to update the power values.
>>>
>>> The power update function does compute the power characteristics when
>>> the function is invoked. In the case of the CPUs, the power
>>> consumption depends on the number of online CPUs. The online CPUs mask
>>> is not up to date at CPUHP_AP_ONLINE_DYN state in the tear down
>>> callback. That is the reason why the online / offline are at separate
>>> state. As there is already an existing state for DTPM, this one is
>>> only moved to the DEAD state, so there is no addition of new state
>>> with these changes.
>>>
>>> That simplifies the code for the next changes and results in a more
>>> self-encapsulated code.
>>>
>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>>
>> Is there any comment on this series ?
> 
> If you can wait 1 day, I will review it tomorrow...

Sure, thanks

  -- Daniel


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 5/5] powercap/drivers/dtpm: Scale the power with the load
  2021-03-01 21:21 ` [PATCH 5/5] powercap/drivers/dtpm: Scale the power with the load Daniel Lezcano
@ 2021-03-09 10:01   ` Lukasz Luba
  2021-03-09 19:03     ` Daniel Lezcano
  2021-03-09 19:22     ` Daniel Lezcano
  0 siblings, 2 replies; 16+ messages in thread
From: Lukasz Luba @ 2021-03-09 10:01 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: rafael, linux-kernel, linux-pm

Hi Daniel,

I've started reviewing the series, please find some comments below.

On 3/1/21 9:21 PM, Daniel Lezcano wrote:
> Currently the power consumption is based on the current OPP power
> assuming the entire performance domain is fully loaded.
> 
> That gives very gross power estimation and we can do much better by
> using the load to scale the power consumption.
> 
> Use the utilization to normalize and scale the power usage over the
> max possible power.
> 
> Tested on a rock960 with 2 big CPUS, the power consumption estimation
> conforms with the expected one.
> 
> Before this change:
> 
> ~$ ~/dhrystone -t 1 -l 10000&
> ~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw
> 2260000
> 
> After this change:
> 
> ~$ ~/dhrystone -t 1 -l 10000&
> ~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw
> 1130000
> 
> ~$ ~/dhrystone -t 2 -l 10000&
> ~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw
> 2260000
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>   drivers/powercap/dtpm_cpu.c | 21 +++++++++++++++++----
>   1 file changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
> index e728ebd6d0ca..8379b96468ef 100644
> --- a/drivers/powercap/dtpm_cpu.c
> +++ b/drivers/powercap/dtpm_cpu.c
> @@ -68,27 +68,40 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
>   	return power_limit;
>   }
>   
> +static u64 scale_pd_power_uw(struct cpumask *cpus, u64 power)

renamed 'cpus' into 'pd_mask', see below

> +{
> +	unsigned long max, util;
> +	int cpu, load = 0;

IMHO 'int load' looks odd when used with 'util' and 'max'.
I would put in the line above to have them all the same type and
renamed to 'sum_util'.

> +
> +	for_each_cpu(cpu, cpus) {

I would avoid the temporary CPU mask in the get_pd_power_uw()
with this modified loop:

for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {


> +		max = arch_scale_cpu_capacity(cpu);
> +		util = sched_cpu_util(cpu, max);
> +		load += ((util * 100) / max);

Below you can find 3 optimizations. Since we are not in the hot
path here, it's up to if you would like to use all/some of them
or just ignore.

1st optimization.
If we use 'load += (util << 10) / max' in the loop, then
we could avoid div by 100 and use a right shift:
(power * load) >> 10

2nd optimization.
Since we use EM CPU mask, which span all CPUs with the same
arch_scale_cpu_capacity(), you can avoid N divs inside the loop
and do it once, below the loop.

3rd optimization.
If we just simply add all 'util' into 'sum_util' (no mul or div in
the loop), then we might just have simple macro

#define CALC_POWER_USAGE(power, sum_util, max) \
	(((power * (sum_util << 10)) / max) >> 10)


> +	}
> +
> +	return (power * load) / 100;
> +}
> +
>   static u64 get_pd_power_uw(struct dtpm *dtpm)
>   {
>   	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
>   	struct em_perf_domain *pd;
>   	struct cpumask cpus;

Since we don't need the 'nr_cpus' we also don't need the
cpumask which occupy stack; Maybe use
	struct cpumask *pd_mask;

then

>   	unsigned long freq;
> -	int i, nr_cpus;
> +	int i;
>   
>   	pd = em_cpu_get(dtpm_cpu->cpu);
>   	freq = cpufreq_quick_get(dtpm_cpu->cpu);
>   
>   	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));

	remove ^^^^^ and set
	pd_mask = em_span_cpus(pd);

> -	nr_cpus = cpumask_weight(&cpus);
>   
>   	for (i = 0; i < pd->nr_perf_states; i++) {
>   
>   		if (pd->table[i].frequency < freq)
>   			continue;
>   
> -		return pd->table[i].power *
> -			MICROWATT_PER_MILLIWATT * nr_cpus;
> +		return scale_pd_power_uw(&cpus, pd->table[i].power *
> +					 MICROWATT_PER_MILLIWATT);

Instead of '&cpus' I would put 'pd_mask' and that should do the job.

>   	}
>   
>   	return 0;
> 

Apart from that, the design idea with util looks good.

Regards,
Lukasz

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code
  2021-03-01 21:21 [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
                   ` (4 preceding siblings ...)
  2021-03-08 19:31 ` [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
@ 2021-03-09 14:02 ` Lukasz Luba
  5 siblings, 0 replies; 16+ messages in thread
From: Lukasz Luba @ 2021-03-09 14:02 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: rafael, linux-kernel, linux-pm



On 3/1/21 9:21 PM, Daniel Lezcano wrote:
> In order to increase the self-encapsulation of the dtpm generic code,
> the following changes are adding a power update ops to the dtpm
> ops. That allows the generic code to call directly the dtpm backend
> function to update the power values.
> 
> The power update function does compute the power characteristics when
> the function is invoked. In the case of the CPUs, the power
> consumption depends on the number of online CPUs. The online CPUs mask
> is not up to date at CPUHP_AP_ONLINE_DYN state in the tear down
> callback. That is the reason why the online / offline are at separate
> state. As there is already an existing state for DTPM, this one is
> only moved to the DEAD state, so there is no addition of new state
> with these changes.

AFAICS in this implementation we don't remove the dtmp node when we
hotplug out the CPU - which is very good. It should be mentioned in this
description explicietely IMO.

I see the reason behind this new DEAD state, which we need for
operating on updated cpu_online_mask with the currently off CPU bit set
to 0. This also might be added either here or in the comment above the
cpuhp_


> 
> That simplifies the code for the next changes and results in a more
> self-encapsulated code.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>   drivers/powercap/dtpm.c     |  54 ++++++++--------
>   drivers/powercap/dtpm_cpu.c | 124 +++++++++++++-----------------------
>   include/linux/cpuhotplug.h  |   2 +-
>   include/linux/dtpm.h        |   3 +-
>   4 files changed, 76 insertions(+), 107 deletions(-)
> 
> diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
> index c2185ec5f887..1085dccf9c58 100644
> --- a/drivers/powercap/dtpm.c
> +++ b/drivers/powercap/dtpm.c
> @@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm)
>   		parent->power_limit -= dtpm->power_limit;
>   		parent = parent->parent;
>   	}
> -
> -	__dtpm_rebalance_weight(root);
>   }
>   
>   static void __dtpm_add_power(struct dtpm *dtpm)
> @@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm)
>   		parent->power_limit += dtpm->power_limit;
>   		parent = parent->parent;
>   	}
> +}
> +
> +static int __dtpm_update_power(struct dtpm *dtpm)
> +{
> +	int ret;
> +
> +	__dtpm_sub_power(dtpm);
>   
> -	__dtpm_rebalance_weight(root);
> +	ret = dtpm->ops->upt_power_uw(dtpm);
> +	if (ret)
> +		pr_err("Failed to update power for '%s': %d\n",
> +		       dtpm->zone.name, ret);
> +
> +	if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
> +		dtpm->power_limit = dtpm->power_max;
> +
> +	__dtpm_add_power(dtpm);
> +
> +	if (root)
> +		__dtpm_rebalance_weight(root);
> +
> +	return ret;
>   }
>   
>   /**
>    * dtpm_update_power - Update the power on the dtpm
>    * @dtpm: a pointer to a dtpm structure to update
> - * @power_min: a u64 representing the new power_min value
> - * @power_max: a u64 representing the new power_max value
>    *
>    * Function to update the power values of the dtpm node specified in
>    * parameter. These new values will be propagated to the tree.
>    *
>    * Return: zero on success, -EINVAL if the values are inconsistent
>    */
> -int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max)
> +int dtpm_update_power(struct dtpm *dtpm)
>   {
> -	int ret = 0;
> +	int ret;
>   
>   	mutex_lock(&dtpm_lock);
> -
> -	if (power_min == dtpm->power_min && power_max == dtpm->power_max)
> -		goto unlock;
> -
> -	if (power_max < power_min) {
> -		ret = -EINVAL;
> -		goto unlock;
> -	}
> -
> -	__dtpm_sub_power(dtpm);
> -
> -	dtpm->power_min = power_min;
> -	dtpm->power_max = power_max;
> -	if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
> -		dtpm->power_limit = power_max;
> -
> -	__dtpm_add_power(dtpm);
> -
> -unlock:
> +	ret = __dtpm_update_power(dtpm);
>   	mutex_unlock(&dtpm_lock);
>   
>   	return ret;
> @@ -436,6 +434,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
>   
>   	if (dtpm->ops && !(dtpm->ops->set_power_uw &&
>   			   dtpm->ops->get_power_uw &&
> +			   dtpm->ops->upt_power_uw &&
>   			   dtpm->ops->release))
>   		return -EINVAL;
>   
> @@ -455,7 +454,8 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
>   		root = dtpm;
>   	}
>   
> -	__dtpm_add_power(dtpm);
> +	if (dtpm->ops && !dtpm->ops->upt_power_uw(dtpm))
> +		__dtpm_add_power(dtpm);
>   
>   	pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
>   		dtpm->zone.name, dtpm->power_min, dtpm->power_max);
> diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
> index 51c366938acd..aff79c649345 100644
> --- a/drivers/powercap/dtpm_cpu.c
> +++ b/drivers/powercap/dtpm_cpu.c
> @@ -14,6 +14,8 @@
>    * The CPU hotplug is supported and the power numbers will be updated
>    * if a CPU is hot plugged / unplugged.
>    */
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
>   #include <linux/cpumask.h>
>   #include <linux/cpufreq.h>
>   #include <linux/cpuhotplug.h>
> @@ -23,8 +25,6 @@
>   #include <linux/slab.h>
>   #include <linux/units.h>
>   
> -static struct dtpm *__parent;
> -
>   static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
>   
>   struct dtpm_cpu {
> @@ -32,57 +32,16 @@ struct dtpm_cpu {
>   	int cpu;
>   };
>   
> -/*
> - * When a new CPU is inserted at hotplug or boot time, add the power
> - * contribution and update the dtpm tree.
> - */
> -static int power_add(struct dtpm *dtpm, struct em_perf_domain *em)
> -{
> -	u64 power_min, power_max;
> -
> -	power_min = em->table[0].power;
> -	power_min *= MICROWATT_PER_MILLIWATT;
> -	power_min += dtpm->power_min;
> -
> -	power_max = em->table[em->nr_perf_states - 1].power;
> -	power_max *= MICROWATT_PER_MILLIWATT;
> -	power_max += dtpm->power_max;
> -
> -	return dtpm_update_power(dtpm, power_min, power_max);
> -}
> -
> -/*
> - * When a CPU is unplugged, remove its power contribution from the
> - * dtpm tree.
> - */
> -static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em)
> -{
> -	u64 power_min, power_max;
> -
> -	power_min = em->table[0].power;
> -	power_min *= MICROWATT_PER_MILLIWATT;
> -	power_min = dtpm->power_min - power_min;
> -
> -	power_max = em->table[em->nr_perf_states - 1].power;
> -	power_max *= MICROWATT_PER_MILLIWATT;
> -	power_max = dtpm->power_max - power_max;
> -
> -	return dtpm_update_power(dtpm, power_min, power_max);
> -}
> -
>   static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
>   {
>   	struct dtpm_cpu *dtpm_cpu = dtpm->private;
> -	struct em_perf_domain *pd;
> +	struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
>   	struct cpumask cpus;
>   	unsigned long freq;
>   	u64 power;
>   	int i, nr_cpus;
>   
> -	pd = em_cpu_get(dtpm_cpu->cpu);
> -
>   	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
> -
>   	nr_cpus = cpumask_weight(&cpus);
>   
>   	for (i = 0; i < pd->nr_perf_states; i++) {
> @@ -113,6 +72,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
>   
>   	pd = em_cpu_get(dtpm_cpu->cpu);
>   	freq = cpufreq_quick_get(dtpm_cpu->cpu);
> +
>   	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
>   	nr_cpus = cpumask_weight(&cpus);
>   
> @@ -128,6 +88,27 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
>   	return 0;
>   }
>   
> +static int upt_pd_power_uw(struct dtpm *dtpm)
> +{
> +	struct dtpm_cpu *dtpm_cpu = dtpm->private;
> +	struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
> +	struct cpumask cpus;

Maybe using cpumask_var_t, allocate and then free is more frendly
for the static analyzies tools, instead of cpumask on stack.
It's in a few places, but should harm platforms which use EM, so
up to you.

> +	int nr_cpus;
> +
> +	cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
> +	nr_cpus = cpumask_weight(&cpus);
> +
> +	dtpm->power_min = em->table[0].power;
> +	dtpm->power_min *= MICROWATT_PER_MILLIWATT;
> +	dtpm->power_min *= nr_cpus;
> +
> +	dtpm->power_max = em->table[em->nr_perf_states - 1].power;
> +	dtpm->power_max *= MICROWATT_PER_MILLIWATT;
> +	dtpm->power_max *= nr_cpus;
> +
> +	return 0;
> +}
> +
>   static void pd_release(struct dtpm *dtpm)
>   {
>   	struct dtpm_cpu *dtpm_cpu = dtpm->private;
> @@ -141,37 +122,25 @@ static void pd_release(struct dtpm *dtpm)
>   static struct dtpm_ops dtpm_ops = {
>   	.set_power_uw = set_pd_power_limit,
>   	.get_power_uw = get_pd_power_uw,
> +	.upt_power_uw = upt_pd_power_uw,

I'd just use full names here.

>   	.release = pd_release,
>   };
>   
>   static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
>   {
> -	struct cpufreq_policy *policy;
> +	struct cpumask cpus;

It's not needed, or I'm missing something

>   	struct em_perf_domain *pd;
>   	struct dtpm *dtpm;
>   
> -	policy = cpufreq_cpu_get(cpu);
> -
> -	if (!policy)
> -		return 0;
> -
>   	pd = em_cpu_get(cpu);
>   	if (!pd)
>   		return -EINVAL;
>   
> -	dtpm = per_cpu(dtpm_per_cpu, cpu);
> -
> -	power_sub(dtpm, pd);
> -
> -	if (cpumask_weight(policy->cpus) != 1)
> -		return 0;
> -
> -	for_each_cpu(cpu, policy->related_cpus)
> -		per_cpu(dtpm_per_cpu, cpu) = NULL;
> +	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));

and the same here.

>   
> -	dtpm_unregister(dtpm);
> +	dtpm = per_cpu(dtpm_per_cpu, cpu);
>   
> -	return 0;
> +	return dtpm_update_power(dtpm);
>   }
>   
>   static int cpuhp_dtpm_cpu_online(unsigned int cpu)
> @@ -184,7 +153,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>   	int ret = -ENOMEM;
>   
>   	policy = cpufreq_cpu_get(cpu);
> -
>   	if (!policy)
>   		return 0;
>   
> @@ -194,7 +162,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>   
>   	dtpm = per_cpu(dtpm_per_cpu, cpu);
>   	if (dtpm)
> -		return power_add(dtpm, pd);
> +		return dtpm_update_power(dtpm);
>   
>   	dtpm = dtpm_alloc(&dtpm_ops);
>   	if (!dtpm)
> @@ -210,27 +178,20 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>   	for_each_cpu(cpu, policy->related_cpus)
>   		per_cpu(dtpm_per_cpu, cpu) = dtpm;
>   
> -	sprintf(name, "cpu%d", dtpm_cpu->cpu);
> +	sprintf(name, "cpu%d-cpufreq", dtpm_cpu->cpu);
>   
> -	ret = dtpm_register(name, dtpm, __parent);
> +	ret = dtpm_register(name, dtpm, NULL);
>   	if (ret)
>   		goto out_kfree_dtpm_cpu;
>   
> -	ret = power_add(dtpm, pd);
> -	if (ret)
> -		goto out_dtpm_unregister;
> -
>   	ret = freq_qos_add_request(&policy->constraints,
>   				   &dtpm_cpu->qos_req, FREQ_QOS_MAX,
>   				   pd->table[pd->nr_perf_states - 1].frequency);
>   	if (ret)
> -		goto out_power_sub;
> +		goto out_dtpm_unregister;
>   
>   	return 0;
>   
> -out_power_sub:
> -	power_sub(dtpm, pd);
> -
>   out_dtpm_unregister:
>   	dtpm_unregister(dtpm);
>   	dtpm_cpu = NULL;
> @@ -248,10 +209,17 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>   
>   int dtpm_register_cpu(struct dtpm *parent)
>   {
> -	__parent = parent;
> +	int ret;
>   
> -	return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE,
> -				 "dtpm_cpu:online",
> -				 cpuhp_dtpm_cpu_online,
> -				 cpuhp_dtpm_cpu_offline);
> +	ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline",
> +				NULL, cpuhp_dtpm_cpu_offline);

Maybe a comment  above this line with description why we need this?

> +	if (ret < 0)
> +		return ret;
> +
> +	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online",
> +				cpuhp_dtpm_cpu_online, NULL);

For this, a small comment also wouldn't harm.

> +	if (ret < 0)
> +		return ret;
> +
> +	return 0;
>   }
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index ee09a39627d6..fcb2967fb5ba 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -61,6 +61,7 @@ enum cpuhp_state {
>   	CPUHP_LUSTRE_CFS_DEAD,
>   	CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
>   	CPUHP_PADATA_DEAD,
> +	CPUHP_AP_DTPM_CPU_DEAD,
>   	CPUHP_WORKQUEUE_PREP,
>   	CPUHP_POWER_NUMA_PREPARE,
>   	CPUHP_HRTIMERS_PREPARE,
> @@ -193,7 +194,6 @@ enum cpuhp_state {
>   	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
>   	CPUHP_AP_X86_HPET_ONLINE,
>   	CPUHP_AP_X86_KVM_CLK_ONLINE,
> -	CPUHP_AP_DTPM_CPU_ONLINE,
>   	CPUHP_AP_ACTIVE,
>   	CPUHP_ONLINE,
>   };
> diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
> index e80a332e3d8a..d29be6a0e513 100644
> --- a/include/linux/dtpm.h
> +++ b/include/linux/dtpm.h
> @@ -29,6 +29,7 @@ struct dtpm {
>   struct dtpm_ops {
>   	u64 (*set_power_uw)(struct dtpm *, u64);
>   	u64 (*get_power_uw)(struct dtpm *);
> +	int (*upt_power_uw)(struct dtpm *);

IMHO as an API the full name 'update_power_uq' looks better here.

>   	void (*release)(struct dtpm *);
>   };
>   
> @@ -62,7 +63,7 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
>   	return container_of(zone, struct dtpm, zone);
>   }
>   
> -int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max);
> +int dtpm_update_power(struct dtpm *dtpm);
>   
>   int dtpm_release_zone(struct powercap_zone *pcz);
>   
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/5] powercap/drivers/dtpm: Create a registering system
  2021-03-01 21:21 ` [PATCH 2/5] powercap/drivers/dtpm: Create a registering system Daniel Lezcano
@ 2021-03-09 14:46   ` Lukasz Luba
  0 siblings, 0 replies; 16+ messages in thread
From: Lukasz Luba @ 2021-03-09 14:46 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: rafael, linux-kernel, linux-pm



On 3/1/21 9:21 PM, Daniel Lezcano wrote:
> A SoC can be differently structured depending on the platform and the
> kernel can not be aware of all the combinations, as well as the
> specific tweaks for a particular board.
> 
> The creation of the hierarchy must be delegated to userspace.
> 
> These changes provide a registering mechanism where the different
> subsystems will initialize their dtpm backends and register with a
> name the dtpm node in a list.
> 
> The next changes will provide an userspace interface to create
> hierachically the different nodes. Those will be created by name and
> found via the list filled by the different subsystem.
> 
> If a specified name is not found in the list, it is assumed to be a
> virtual node which will have children and the default is to allocate
> such node.
> 
> When the node register in the list, the function will be dtpm_register
> where the previous semantic was to create the node. Thus, the
> functions are renamed to reflect their purpose.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>   drivers/powercap/dtpm.c     | 158 ++++++++++++++++++++++++++++++++++--
>   drivers/powercap/dtpm_cpu.c |   4 +-
>   include/linux/dtpm.h        |  12 ++-
>   3 files changed, 161 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
> index 1085dccf9c58..20728a28ff0d 100644
> --- a/drivers/powercap/dtpm.c
> +++ b/drivers/powercap/dtpm.c
> @@ -20,6 +20,7 @@
>   #include <linux/dtpm.h>
>   #include <linux/init.h>
>   #include <linux/kernel.h>
> +#include <linux/kref.h>
>   #include <linux/powercap.h>
>   #include <linux/slab.h>
>   #include <linux/mutex.h>
> @@ -34,6 +35,14 @@ static DEFINE_MUTEX(dtpm_lock);
>   static struct powercap_control_type *pct;
>   static struct dtpm *root;
>   
> +struct dtpm_node {
> +	const char *name;
> +	struct dtpm *dtpm;
> +	struct list_head node;
> +};
> +
> +static LIST_HEAD(dtpm_list);
> +
>   static int get_time_window_us(struct powercap_zone *pcz, int cid, u64 *window)
>   {
>   	return -ENOSYS;
> @@ -152,6 +161,135 @@ static int __dtpm_update_power(struct dtpm *dtpm)
>   	return ret;
>   }
>   
> +static struct dtpm *__dtpm_lookup(const char *name)
> +{
> +	struct dtpm_node *node;
> +
> +	list_for_each_entry(node, &dtpm_list, node) {
> +		if (!strcmp(name, node->name))
> +			return node->dtpm;
> +	}
> +
> +	return NULL;
> +}
> +
> +/**
> + * dtpm_get - Get a reference to a dtpm structure
> + * @name: the name of the dtpm device
> + *
> + * The function looks up in the list of the registered dtpm
> + * devices. If the dtpm device is not found, a virtual one is
> + * allocated. This function must be called to create a dtpm node in
> + * the powercap hierarchy.
> + *
> + * Return: a pointer to a dtpm structure, NULL if there is not enough
> + * memory
> + */
> +struct dtpm *dtpm_get(const char *name)
> +{
> +	struct dtpm *dtpm;
> +
> +	mutex_lock(&dtpm_lock);
> +	dtpm = __dtpm_lookup(name);
> +	if (!dtpm)
> +		dtpm = dtpm_alloc(NULL);
> +	else
> +		kref_get(&dtpm->kref);
> +	mutex_unlock(&dtpm_lock);
> +
> +	return dtpm;
> +}
> +
> +static void dtpm_release(struct kref *kref)
> +{
> +	struct dtpm *dtpm = container_of(kref, struct dtpm, kref);
> +
> +	kfree(dtpm);
> +}
> +
> +/**
> + * dtpm_put - Release a reference on a dtpm device
> + * @dtpm: a pointer to a dtpm structure
> + *
> + * Release the reference on the specified dtpm device. The last
> + * reference leads to a memory release.
> + */
> +void dtpm_put(struct dtpm *dtpm)
> +{
> +	kref_put(&dtpm->kref, dtpm_release);
> +}
> +
> +/**
> + * dtpm_register - Register the dtpm in the dtpm list
> + * @name: a name used as an identifier
> + * @dtpm: the dtpm node to be registered
> + *
> + * Stores the dtpm device in a list.
> + *
> + * Return: 0 on success, -EEXIST if the device name is already present
> + * in the list, -ENOMEM in case of memory allocation failure.
> + */
> +int dtpm_register(const char *name, struct dtpm *dtpm)
> +{
> +	struct dtpm_node *node;
> +
> +	mutex_lock(&dtpm_lock);
> +
> +	if (__dtpm_lookup(name)) {
> +		mutex_unlock(&dtpm_lock);
> +		return -EEXIST;
> +	}
> +
> +	node = kzalloc(sizeof(*node), GFP_KERNEL);
> +	if (!node)

mutex_unlock()

> +		return -ENOMEM;
> +
> +	node->name = kstrdup(name, GFP_KERNEL);
> +	if (!node->name) {
> +		kfree(node);

mutex_unlock()

> +		return -ENOMEM;
> +	}
> +
> +	node->dtpm = dtpm;
> +
> +	list_add(&node->node, &dtpm_list);
> +
> +	pr_info("Registered %s\n", name);
> +
> +	mutex_unlock(&dtpm_lock);
> +
> +	return 0;
> +}
> +
> +/**
> + * dtpm_unregister - Remove the dtpm device from the list
> + * @name: the dtpm device name to be removed
> + *
> + * Remove the dtpm device from the list of the registered devices.
> + */
> +void dtpm_unregister(const char *name)
> +{
> +	struct dtpm_node *node;
> +
> +	mutex_lock(&dtpm_lock);
> +
> +	list_for_each_entry(node, &dtpm_list, node) {

At first glance list_for_each_entry_safe() is needed here, but
this code is safe. The node is remove and the loop stops.

> +
> +		if (strcmp(name, node->name))
> +			continue;
> +
> +		list_del(&node->node);
> +		kfree(node->name);
> +		kfree(node);
> +
> +		pr_info("Unregistered %s\n", name);
> +
> +		break;
> +	}
> +
> +	mutex_unlock(&dtpm_lock);
> +}
> +
>   /**
>    * dtpm_update_power - Update the power on the dtpm
>    * @dtpm: a pointer to a dtpm structure to update
> @@ -208,7 +346,7 @@ int dtpm_release_zone(struct powercap_zone *pcz)
>   	if (root == dtpm)
>   		root = NULL;
>   
> -	kfree(dtpm);
> +	dtpm_put(dtpm);
>   
>   	return 0;
>   }
> @@ -370,6 +508,7 @@ struct dtpm *dtpm_alloc(struct dtpm_ops *ops)
>   	if (dtpm) {
>   		INIT_LIST_HEAD(&dtpm->children);
>   		INIT_LIST_HEAD(&dtpm->sibling);
> +		kref_init(&dtpm->kref);
>   		dtpm->weight = 1024;
>   		dtpm->ops = ops;
>   	}
> @@ -378,28 +517,29 @@ struct dtpm *dtpm_alloc(struct dtpm_ops *ops)
>   }
>   
>   /**
> - * dtpm_unregister - Unregister a dtpm node from the hierarchy tree
> - * @dtpm: a pointer to a dtpm structure corresponding to the node to be removed
> + * dtpm_destroy - Destroy a dtpm node from the hierarchy tree
> + * @dtpm: a pointer to a dtpm structure corresponding to the node to be
> + *	  removed and destroyed
>    *
>    * Call the underlying powercap unregister function. That will call
>    * the release callback of the powercap zone.
>    */
> -void dtpm_unregister(struct dtpm *dtpm)
> +void dtpm_destroy(struct dtpm *dtpm)
>   {
>   	powercap_unregister_zone(pct, &dtpm->zone);
>   
> -	pr_info("Unregistered dtpm node '%s'\n", dtpm->zone.name);
> +	pr_info("Destroyed dtpm node '%s'\n", dtpm->zone.name);
>   }
>   
>   /**
> - * dtpm_register - Register a dtpm node in the hierarchy tree
> + * dtpm_create - Create a dtpm node in the hierarchy tree
>    * @name: a string specifying the name of the node
>    * @dtpm: a pointer to a dtpm structure corresponding to the new node
>    * @parent: a pointer to a dtpm structure corresponding to the parent node
>    *
>    * Create a dtpm node in the tree. If no parent is specified, the node
>    * is the root node of the hierarchy. If the root node already exists,
> - * then the registration will fail. The powercap controller must be
> + * then the creation will fail. The powercap controller must be
>    * initialized before calling this function.
>    *
>    * The dtpm structure must be initialized with the power numbers
> @@ -413,7 +553,7 @@ void dtpm_unregister(struct dtpm *dtpm)
>    *           * parent have ops which are reserved for leaves
>    *   Other negative values are reported back from the powercap framework
>    */
> -int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
> +int dtpm_create(const char *name, struct dtpm *dtpm, struct dtpm *parent)
>   {
>   	struct powercap_zone *pcz;
>   
> @@ -457,7 +597,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
>   	if (dtpm->ops && !dtpm->ops->upt_power_uw(dtpm))
>   		__dtpm_add_power(dtpm);
>   
> -	pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
> +	pr_info("Created dtpm node '%s' / %llu-%llu uW, \n",
>   		dtpm->zone.name, dtpm->power_min, dtpm->power_max);
>   
>   	mutex_unlock(&dtpm_lock);
> diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
> index aff79c649345..1a10537c4434 100644
> --- a/drivers/powercap/dtpm_cpu.c
> +++ b/drivers/powercap/dtpm_cpu.c
> @@ -180,7 +180,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>   
>   	sprintf(name, "cpu%d-cpufreq", dtpm_cpu->cpu);
>   
> -	ret = dtpm_register(name, dtpm, NULL);
> +	ret = dtpm_register(name, dtpm);
>   	if (ret)
>   		goto out_kfree_dtpm_cpu;
>   
> @@ -193,7 +193,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
>   	return 0;
>   
>   out_dtpm_unregister:
> -	dtpm_unregister(dtpm);
> +	dtpm_unregister(name);
>   	dtpm_cpu = NULL;
>   	dtpm = NULL;
>   
> diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
> index d29be6a0e513..447ea6c60b59 100644
> --- a/include/linux/dtpm.h
> +++ b/include/linux/dtpm.h
> @@ -14,6 +14,7 @@
>   
>   struct dtpm {
>   	struct powercap_zone zone;
> +	struct kref kref;
>   	struct dtpm *parent;
>   	struct list_head sibling;
>   	struct list_head children;
> @@ -69,10 +70,17 @@ int dtpm_release_zone(struct powercap_zone *pcz);
>   
>   struct dtpm *dtpm_alloc(struct dtpm_ops *ops);
>   
> -void dtpm_unregister(struct dtpm *dtpm);
> +void dtpm_destroy(struct dtpm *dtpm);
>   
> -int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
> +int dtpm_create(const char *name, struct dtpm *dtpm, struct dtpm *parent);
>   
>   int dtpm_register_cpu(struct dtpm *parent);
>   
> +int dtpm_register(const char *name, struct dtpm *dtpm);
> +
> +void dtpm_unregister(const char *name);
> +
> +struct dtpm *dtpm_get(const char *name);
> +
> +void dtpm_put(struct dtpm *dtpm);
>   #endif
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/5] powercap/drivers/dtpm: Simplify the dtpm table
  2021-03-01 21:21 ` [PATCH 3/5] powercap/drivers/dtpm: Simplify the dtpm table Daniel Lezcano
@ 2021-03-09 15:02   ` Lukasz Luba
  0 siblings, 0 replies; 16+ messages in thread
From: Lukasz Luba @ 2021-03-09 15:02 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: rafael, linux-kernel, linux-pm



On 3/1/21 9:21 PM, Daniel Lezcano wrote:
> The dtpm table is an array of pointers, that forces the user of the
> table to define initdata along with the declaration of the table
> entry. It is more efficient to create an array of dtpm structure, so
> the declaration of the table entry can be done by initializing the
> different fields.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>   drivers/powercap/dtpm.c     |  4 ++--
>   drivers/powercap/dtpm_cpu.c |  4 +++-
>   include/linux/dtpm.h        | 22 +++++++++-------------
>   3 files changed, 14 insertions(+), 16 deletions(-)
> 

LGTM

Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/5] powercap/drivers/dtpm: Use container_of instead of a private data field
  2021-03-01 21:21 ` [PATCH 4/5] powercap/drivers/dtpm: Use container_of instead of a private data field Daniel Lezcano
@ 2021-03-09 15:17   ` Lukasz Luba
  0 siblings, 0 replies; 16+ messages in thread
From: Lukasz Luba @ 2021-03-09 15:17 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: rafael, linux-kernel, linux-pm



On 3/1/21 9:21 PM, Daniel Lezcano wrote:
> The dtpm framework provides an API to allocate a dtpm node. However
> when a backend dtpm driver needs to allocate a dtpm node it must
> define its own structure and store the pointer of this structure in
> the private field of the dtpm structure.
> 
> It is more elegant to use the container_of macro and add the dtpm
> structure inside the dtpm backend specific structure. The code will be
> able to deal properly with the dtpm structure as a generic entity,
> making all this even more self-encapsulated.
> 
> The dtpm_alloc() function does no longer make sense as the dtpm
> structure will be allocated when allocating the device specific dtpm
> structure. The dtpm_init() is provided instead.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>   drivers/powercap/dtpm.c     | 27 ++++++++++------------
>   drivers/powercap/dtpm_cpu.c | 46 ++++++++++++++++++-------------------
>   include/linux/dtpm.h        |  3 +--
>   3 files changed, 35 insertions(+), 41 deletions(-)
> 

Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 5/5] powercap/drivers/dtpm: Scale the power with the load
  2021-03-09 10:01   ` Lukasz Luba
@ 2021-03-09 19:03     ` Daniel Lezcano
  2021-03-09 20:44       ` Lukasz Luba
  2021-03-09 19:22     ` Daniel Lezcano
  1 sibling, 1 reply; 16+ messages in thread
From: Daniel Lezcano @ 2021-03-09 19:03 UTC (permalink / raw)
  To: Lukasz Luba; +Cc: rafael, linux-kernel, linux-pm


Hi Lukasz,

thanks for your comments, one question below.

On 09/03/2021 11:01, Lukasz Luba wrote:

[ ... ]

>>   +static u64 scale_pd_power_uw(struct cpumask *cpus, u64 power)
> 
> renamed 'cpus' into 'pd_mask', see below
> 
>> +{
>> +    unsigned long max, util;
>> +    int cpu, load = 0;
> 
> IMHO 'int load' looks odd when used with 'util' and 'max'.
> I would put in the line above to have them all the same type and
> renamed to 'sum_util'.
> 
>> +
>> +    for_each_cpu(cpu, cpus) {
> 
> I would avoid the temporary CPU mask in the get_pd_power_uw()
> with this modified loop:
> 
> for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
> 
> 
>> +        max = arch_scale_cpu_capacity(cpu);
>> +        util = sched_cpu_util(cpu, max);
>> +        load += ((util * 100) / max);
> 
> Below you can find 3 optimizations. Since we are not in the hot
> path here, it's up to if you would like to use all/some of them
> or just ignore.
> 
> 1st optimization.
> If we use 'load += (util << 10) / max' in the loop, then
> we could avoid div by 100 and use a right shift:
> (power * load) >> 10
> 
> 2nd optimization.
> Since we use EM CPU mask, which span all CPUs with the same
> arch_scale_cpu_capacity(), you can avoid N divs inside the loop
> and do it once, below the loop.
> 
> 3rd optimization.
> If we just simply add all 'util' into 'sum_util' (no mul or div in
> the loop), then we might just have simple macro
> 
> #define CALC_POWER_USAGE(power, sum_util, max) \
>     (((power * (sum_util << 10)) / max) >> 10)

I don't understand the 'max' division, I was expecting here something
like: ((sum_util << 10) / sum_max) >> 10)

no ?




-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 5/5] powercap/drivers/dtpm: Scale the power with the load
  2021-03-09 10:01   ` Lukasz Luba
  2021-03-09 19:03     ` Daniel Lezcano
@ 2021-03-09 19:22     ` Daniel Lezcano
  1 sibling, 0 replies; 16+ messages in thread
From: Daniel Lezcano @ 2021-03-09 19:22 UTC (permalink / raw)
  To: Lukasz Luba; +Cc: rafael, linux-kernel, linux-pm

On 09/03/2021 11:01, Lukasz Luba wrote:
> Hi Daniel,
> 
> I've started reviewing the series, please find some comments below.
> 
> On 3/1/21 9:21 PM, Daniel Lezcano wrote:
>> Currently the power consumption is based on the current OPP power
>> assuming the entire performance domain is fully loaded.
>>
>> That gives very gross power estimation and we can do much better by
>> using the load to scale the power consumption.
>>
>> Use the utilization to normalize and scale the power usage over the
>> max possible power.
>>
>> Tested on a rock960 with 2 big CPUS, the power consumption estimation
>> conforms with the expected one.
>>
>> Before this change:
>>
>> ~$ ~/dhrystone -t 1 -l 10000&
>> ~$ cat
>> /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw
>>
>> 2260000
>>
>> After this change:
>>
>> ~$ ~/dhrystone -t 1 -l 10000&
>> ~$ cat
>> /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw
>>
>> 1130000
>>
>> ~$ ~/dhrystone -t 2 -l 10000&
>> ~$ cat
>> /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw
>>
>> 2260000
>>
>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>> ---
>>   drivers/powercap/dtpm_cpu.c | 21 +++++++++++++++++----
>>   1 file changed, 17 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
>> index e728ebd6d0ca..8379b96468ef 100644
>> --- a/drivers/powercap/dtpm_cpu.c
>> +++ b/drivers/powercap/dtpm_cpu.c
>> @@ -68,27 +68,40 @@ static u64 set_pd_power_limit(struct dtpm *dtpm,
>> u64 power_limit)
>>       return power_limit;
>>   }
>>   +static u64 scale_pd_power_uw(struct cpumask *cpus, u64 power)
> 
> renamed 'cpus' into 'pd_mask', see below
> 
>> +{
>> +    unsigned long max, util;
>> +    int cpu, load = 0;
> 
> IMHO 'int load' looks odd when used with 'util' and 'max'.
> I would put in the line above to have them all the same type and
> renamed to 'sum_util'.
> 
>> +
>> +    for_each_cpu(cpu, cpus) {
> 
> I would avoid the temporary CPU mask in the get_pd_power_uw()
> with this modified loop:
> 
> for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
> 
> 
>> +        max = arch_scale_cpu_capacity(cpu);
>> +        util = sched_cpu_util(cpu, max);
>> +        load += ((util * 100) / max);
> 
> Below you can find 3 optimizations. Since we are not in the hot
> path here, it's up to if you would like to use all/some of them
> or just ignore.
> 
> 1st optimization.
> If we use 'load += (util << 10) / max' in the loop, then
> we could avoid div by 100 and use a right shift:
> (power * load) >> 10
> 
> 2nd optimization.
> Since we use EM CPU mask, which span all CPUs with the same
> arch_scale_cpu_capacity(), you can avoid N divs inside the loop
> and do it once, below the loop.
> 
> 3rd optimization.
> If we just simply add all 'util' into 'sum_util' (no mul or div in
> the loop), then we might just have simple macro
> 
> #define CALC_POWER_USAGE(power, sum_util, max) \
>     (((power * (sum_util << 10)) / max) >> 10)

static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power)
{
        unsigned long max, sum_max = 0, sum_util = 0;
        int cpu;

        for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
                max = arch_scale_cpu_capacity(cpu);
                sum_util += sched_cpu_util(cpu, max);
                sum_max += max;
        }

        return (power * ((sum_util << 10) / sum_max)) >> 10;
}

??

-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 5/5] powercap/drivers/dtpm: Scale the power with the load
  2021-03-09 19:03     ` Daniel Lezcano
@ 2021-03-09 20:44       ` Lukasz Luba
  0 siblings, 0 replies; 16+ messages in thread
From: Lukasz Luba @ 2021-03-09 20:44 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: rafael, linux-kernel, linux-pm



On 3/9/21 7:03 PM, Daniel Lezcano wrote:
> 
> Hi Lukasz,
> 
> thanks for your comments, one question below.
> 
> On 09/03/2021 11:01, Lukasz Luba wrote:
> 
> [ ... ]
> 
>>>    +static u64 scale_pd_power_uw(struct cpumask *cpus, u64 power)
>>
>> renamed 'cpus' into 'pd_mask', see below
>>
>>> +{
>>> +    unsigned long max, util;
>>> +    int cpu, load = 0;
>>
>> IMHO 'int load' looks odd when used with 'util' and 'max'.
>> I would put in the line above to have them all the same type and
>> renamed to 'sum_util'.
>>
>>> +
>>> +    for_each_cpu(cpu, cpus) {
>>
>> I would avoid the temporary CPU mask in the get_pd_power_uw()
>> with this modified loop:
>>
>> for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
>>
>>
>>> +        max = arch_scale_cpu_capacity(cpu);
>>> +        util = sched_cpu_util(cpu, max);
>>> +        load += ((util * 100) / max);
>>
>> Below you can find 3 optimizations. Since we are not in the hot
>> path here, it's up to if you would like to use all/some of them
>> or just ignore.
>>
>> 1st optimization.
>> If we use 'load += (util << 10) / max' in the loop, then
>> we could avoid div by 100 and use a right shift:
>> (power * load) >> 10
>>
>> 2nd optimization.
>> Since we use EM CPU mask, which span all CPUs with the same
>> arch_scale_cpu_capacity(), you can avoid N divs inside the loop
>> and do it once, below the loop.
>>
>> 3rd optimization.
>> If we just simply add all 'util' into 'sum_util' (no mul or div in
>> the loop), then we might just have simple macro
>>
>> #define CALC_POWER_USAGE(power, sum_util, max) \
>>      (((power * (sum_util << 10)) / max) >> 10)
> 
> I don't understand the 'max' division, I was expecting here something
> like: ((sum_util << 10) / sum_max) >> 10)
> 
> no ?
> 

No, it should be single 'max', which is in range 0..1024.
We would like to calculate the power for the whole perf domain, e.g.
4 CPUs almost fully utilized would have util ~1000, then total power
should be around ~4 * EM_table[i].power. This '~4' is coming from
4 utils divided by one max util
4000 / 1024


The 'max' in the equation can be put before the bracket, as well as
'power'.

If we had floating point number, simple power for cpu1, cpu2, cpuN
would be just:
power_1 = power * util_1 / max
power_2 = power * util_2 / max
power_N = power * util_N / max
(since they have the same 'max' capacity and the same EM 'power')

The total domain power would be:
total_power = power_1 + power_2 + ... + power_N
which is:
total_power = (power * util_1 / max) + (power * util_2 / max) + ... +
               + (power * util_N / max)

put the 'power' and 'max' before the bracket:
total_power = power * (util_1 + util_2 + ... + util_N) * (1/max)

introduce the 'sum_util':
sum_util = util_1 + util_2 + ... + util_N
then:
total_power = power * sum_util / max

Unfortunately, we don't use floating point, so temporary fixed point
tricks, thus the '<< 10' and '>> 10' avoid some errors




^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-03-09 20:45 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-01 21:21 [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
2021-03-01 21:21 ` [PATCH 2/5] powercap/drivers/dtpm: Create a registering system Daniel Lezcano
2021-03-09 14:46   ` Lukasz Luba
2021-03-01 21:21 ` [PATCH 3/5] powercap/drivers/dtpm: Simplify the dtpm table Daniel Lezcano
2021-03-09 15:02   ` Lukasz Luba
2021-03-01 21:21 ` [PATCH 4/5] powercap/drivers/dtpm: Use container_of instead of a private data field Daniel Lezcano
2021-03-09 15:17   ` Lukasz Luba
2021-03-01 21:21 ` [PATCH 5/5] powercap/drivers/dtpm: Scale the power with the load Daniel Lezcano
2021-03-09 10:01   ` Lukasz Luba
2021-03-09 19:03     ` Daniel Lezcano
2021-03-09 20:44       ` Lukasz Luba
2021-03-09 19:22     ` Daniel Lezcano
2021-03-08 19:31 ` [PATCH 1/5] powercap/drivers/dtpm: Encapsulate even more the code Daniel Lezcano
2021-03-08 19:55   ` Lukasz Luba
2021-03-08 21:20     ` Daniel Lezcano
2021-03-09 14:02 ` Lukasz Luba

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).