All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] Devfreq cooling device
@ 2015-07-03 12:58 Javi Merino
  2015-07-03 12:58 ` [PATCH 1/4] PM / devfreq: Add function to set max/min frequency Javi Merino
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Javi Merino @ 2015-07-03 12:58 UTC (permalink / raw)
  To: linux-pm; +Cc: Javi Merino

This series introduces a devfreq cooling device in the thermal
framework.  Devfreq is used for DVFS for devices other than the CPUs.
With a devfreq cooling device, the thermal framework can throttle them
to control temperature.  The cooling device has the power extensions,
so it can be used by all governors in the thermal framework, including
the power allocator governor.

Javi Merino (2):
  devfreq: cache the last call to get_dev_status()
  devfreq_cooling: add trace information

Ørjan Eide (2):
  PM / devfreq: Add function to set max/min frequency
  thermal: Add devfreq cooling

 drivers/devfreq/devfreq.c                 |  77 ++++--
 drivers/devfreq/governor_simpleondemand.c |  33 +--
 drivers/thermal/Kconfig                   |  10 +
 drivers/thermal/Makefile                  |   3 +
 drivers/thermal/devfreq_cooling.c         | 395 ++++++++++++++++++++++++++++++
 include/linux/devfreq.h                   |  20 ++
 include/linux/devfreq_cooling.h           |  90 +++++++
 include/trace/events/thermal.h            |  51 ++++
 8 files changed, 640 insertions(+), 39 deletions(-)
 create mode 100644 drivers/thermal/devfreq_cooling.c
 create mode 100644 include/linux/devfreq_cooling.h

-- 
1.9.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] PM / devfreq: Add function to set max/min frequency
  2015-07-03 12:58 [PATCH 0/4] Devfreq cooling device Javi Merino
@ 2015-07-03 12:58 ` Javi Merino
  2015-07-04  2:44   ` MyungJoo Ham
  2015-07-03 12:58 ` [PATCH 2/4] PM / devfreq: cache the last call to get_dev_status() Javi Merino
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Javi Merino @ 2015-07-03 12:58 UTC (permalink / raw)
  To: linux-pm; +Cc: Ørjan Eide, MyungJoo Ham, Kyungmin Park

From: Ørjan Eide <orjan.eide@arm.com>

Factor out the logic to set minimum and maximum frequency for the device
so that it can be used by other parts of the kernel.

Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Ørjan Eide <orjan.eide@arm.com>
---
 drivers/devfreq/devfreq.c | 72 +++++++++++++++++++++++++++++++----------------
 include/linux/devfreq.h   | 13 +++++++++
 2 files changed, 61 insertions(+), 24 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index ca1b362d77e2..6390d5db6816 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -768,6 +768,48 @@ err_out:
 }
 EXPORT_SYMBOL(devfreq_remove_governor);
 
+int devfreq_qos_set_max(struct devfreq *df, unsigned long value)
+{
+	unsigned long min;
+	int ret = 0;
+
+	mutex_lock(&df->lock);
+	min = df->min_freq;
+	if (value && min && value < min) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	df->max_freq = value;
+	update_devfreq(df);
+unlock:
+	mutex_unlock(&df->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(devfreq_qos_set_max);
+
+int devfreq_qos_set_min(struct devfreq *df, unsigned long value)
+{
+	unsigned long max;
+	int ret = 0;
+
+	mutex_lock(&df->lock);
+	max = df->max_freq;
+	if (value && max && value > max) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	df->min_freq = value;
+	update_devfreq(df);
+unlock:
+	mutex_unlock(&df->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(devfreq_qos_set_min);
+
 static ssize_t governor_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
@@ -899,24 +941,15 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
 	struct devfreq *df = to_devfreq(dev);
 	unsigned long value;
 	int ret;
-	unsigned long max;
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
-	mutex_lock(&df->lock);
-	max = df->max_freq;
-	if (value && max && value > max) {
-		ret = -EINVAL;
-		goto unlock;
-	}
+	ret = devfreq_qos_set_min(df, value);
+	if (!ret)
+		ret = count;
 
-	df->min_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
-	mutex_unlock(&df->lock);
 	return ret;
 }
 
@@ -932,24 +965,15 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 	struct devfreq *df = to_devfreq(dev);
 	unsigned long value;
 	int ret;
-	unsigned long min;
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
-	mutex_lock(&df->lock);
-	min = df->min_freq;
-	if (value && min && value < min) {
-		ret = -EINVAL;
-		goto unlock;
-	}
+	ret = devfreq_qos_set_max(df, value);
+	if (!ret)
+		ret = count;
 
-	df->max_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
-	mutex_unlock(&df->lock);
 	return ret;
 }
 static DEVICE_ATTR_RW(min_freq);
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index ce447f0f1bad..ffbe1f62ec2c 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -204,6 +204,9 @@ extern int devm_devfreq_register_opp_notifier(struct device *dev,
 extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
 						struct devfreq *devfreq);
 
+int devfreq_qos_set_min(struct devfreq *df, unsigned long value);
+int devfreq_qos_set_max(struct devfreq *df, unsigned long value);
+
 #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
 /**
  * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq
@@ -289,6 +292,16 @@ static inline void devm_devfreq_unregister_opp_notifier(struct device *dev,
 							struct devfreq *devfreq)
 {
 }
+
+static inline int devfreq_qos_set_min(struct devfreq *df, unsigned long value)
+{
+	return -EINVAL;
+}
+
+static inline int devfreq_qos_set_max(struct devfreq *df, unsigned long value)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_PM_DEVFREQ */
 
 #endif /* __LINUX_DEVFREQ_H__ */
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/4] PM / devfreq: cache the last call to get_dev_status()
  2015-07-03 12:58 [PATCH 0/4] Devfreq cooling device Javi Merino
  2015-07-03 12:58 ` [PATCH 1/4] PM / devfreq: Add function to set max/min frequency Javi Merino
@ 2015-07-03 12:58 ` Javi Merino
  2015-07-04  3:15   ` MyungJoo Ham
  2015-07-03 12:58 ` [PATCH 3/4] thermal: Add devfreq cooling Javi Merino
  2015-07-03 12:58 ` [PATCH 4/4] devfreq_cooling: add trace information Javi Merino
  3 siblings, 1 reply; 11+ messages in thread
From: Javi Merino @ 2015-07-03 12:58 UTC (permalink / raw)
  To: linux-pm; +Cc: Javi Merino, MyungJoo Ham, Kyungmin Park

The return value of get_dev_status() can be reused.  Cache it so that
other parts of the kernel can reuse it instead of having to call the
same function again.

Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
---
 drivers/devfreq/devfreq.c                 |  5 +++++
 drivers/devfreq/governor_simpleondemand.c | 33 +++++++++++++++++--------------
 include/linux/devfreq.h                   |  7 +++++++
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 6390d5db6816..d253a73f2b86 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -810,6 +810,11 @@ unlock:
 }
 EXPORT_SYMBOL(devfreq_qos_set_min);
 
+int devfreq_update_stats(struct devfreq *df)
+{
+	return df->profile->get_dev_status(df->dev.parent, &df->last_status);
+}
+
 static ssize_t governor_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c
index 0720ba84ca92..ae72ba5e78df 100644
--- a/drivers/devfreq/governor_simpleondemand.c
+++ b/drivers/devfreq/governor_simpleondemand.c
@@ -21,17 +21,20 @@
 static int devfreq_simple_ondemand_func(struct devfreq *df,
 					unsigned long *freq)
 {
-	struct devfreq_dev_status stat;
-	int err = df->profile->get_dev_status(df->dev.parent, &stat);
+	int err;
+	struct devfreq_dev_status *stat;
 	unsigned long long a, b;
 	unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD;
 	unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL;
 	struct devfreq_simple_ondemand_data *data = df->data;
 	unsigned long max = (df->max_freq) ? df->max_freq : UINT_MAX;
 
+	err = devfreq_update_stats(df);
 	if (err)
 		return err;
 
+	stat = &df->last_status;
+
 	if (data) {
 		if (data->upthreshold)
 			dfso_upthreshold = data->upthreshold;
@@ -43,41 +46,41 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 		return -EINVAL;
 
 	/* Assume MAX if it is going to be divided by zero */
-	if (stat.total_time == 0) {
+	if (stat->total_time == 0) {
 		*freq = max;
 		return 0;
 	}
 
 	/* Prevent overflow */
-	if (stat.busy_time >= (1 << 24) || stat.total_time >= (1 << 24)) {
-		stat.busy_time >>= 7;
-		stat.total_time >>= 7;
+	if (stat->busy_time >= (1 << 24) || stat->total_time >= (1 << 24)) {
+		stat->busy_time >>= 7;
+		stat->total_time >>= 7;
 	}
 
 	/* Set MAX if it's busy enough */
-	if (stat.busy_time * 100 >
-	    stat.total_time * dfso_upthreshold) {
+	if (stat->busy_time * 100 >
+	    stat->total_time * dfso_upthreshold) {
 		*freq = max;
 		return 0;
 	}
 
 	/* Set MAX if we do not know the initial frequency */
-	if (stat.current_frequency == 0) {
+	if (stat->current_frequency == 0) {
 		*freq = max;
 		return 0;
 	}
 
 	/* Keep the current frequency */
-	if (stat.busy_time * 100 >
-	    stat.total_time * (dfso_upthreshold - dfso_downdifferential)) {
-		*freq = stat.current_frequency;
+	if (stat->busy_time * 100 >
+	    stat->total_time * (dfso_upthreshold - dfso_downdifferential)) {
+		*freq = stat->current_frequency;
 		return 0;
 	}
 
 	/* Set the desired frequency based on the load */
-	a = stat.busy_time;
-	a *= stat.current_frequency;
-	b = div_u64(a, stat.total_time);
+	a = stat->busy_time;
+	a *= stat->current_frequency;
+	b = div_u64(a, stat->total_time);
 	b *= 100;
 	b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2));
 	*freq = (unsigned long) b;
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index ffbe1f62ec2c..be351cfacec8 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -161,6 +161,7 @@ struct devfreq {
 	struct delayed_work work;
 
 	unsigned long previous_freq;
+	struct devfreq_dev_status last_status;
 
 	void *data; /* private data for governors */
 
@@ -206,6 +207,7 @@ extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
 
 int devfreq_qos_set_min(struct devfreq *df, unsigned long value);
 int devfreq_qos_set_max(struct devfreq *df, unsigned long value);
+int devfreq_update_stats(struct devfreq *df);
 
 #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
 /**
@@ -302,6 +304,11 @@ static inline int devfreq_qos_set_max(struct devfreq *df, unsigned long value)
 {
 	return -EINVAL;
 }
+
+static inline int devfreq_update_stats(struct devfreq *df)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_PM_DEVFREQ */
 
 #endif /* __LINUX_DEVFREQ_H__ */
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/4] thermal: Add devfreq cooling
  2015-07-03 12:58 [PATCH 0/4] Devfreq cooling device Javi Merino
  2015-07-03 12:58 ` [PATCH 1/4] PM / devfreq: Add function to set max/min frequency Javi Merino
  2015-07-03 12:58 ` [PATCH 2/4] PM / devfreq: cache the last call to get_dev_status() Javi Merino
@ 2015-07-03 12:58 ` Javi Merino
  2015-07-03 12:58 ` [PATCH 4/4] devfreq_cooling: add trace information Javi Merino
  3 siblings, 0 replies; 11+ messages in thread
From: Javi Merino @ 2015-07-03 12:58 UTC (permalink / raw)
  To: linux-pm; +Cc: Ørjan Eide, Zhang Rui, Eduardo Valentin

From: Ørjan Eide <orjan.eide@arm.com>

Add a generic thermal cooling device for devfreq, that is similar to
cpu_cooling.

The device must use devfreq.  In order to use the power extension of the
cooling device, it must have registered its OPPs using the OPP library.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Ørjan Eide <orjan.eide@arm.com>
---
 drivers/thermal/Kconfig           |  10 +
 drivers/thermal/Makefile          |   3 +
 drivers/thermal/devfreq_cooling.c | 388 ++++++++++++++++++++++++++++++++++++++
 include/linux/devfreq_cooling.h   |  90 +++++++++
 4 files changed, 491 insertions(+)
 create mode 100644 drivers/thermal/devfreq_cooling.c
 create mode 100644 include/linux/devfreq_cooling.h

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 118938ee8552..da339ff7c3c9 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -147,6 +147,16 @@ config CLOCK_THERMAL
 	  device that is configured to use this cooling mechanism will be
 	  controlled to reduce clock frequency whenever temperature is high.
 
+config DEVFREQ_THERMAL
+	bool "Generic device cooling support"
+	depends on PM_DEVFREQ
+	help
+	  This implements the generic devfreq cooling mechanism through
+	  frequency reduction for devices using devfreq.
+
+	  This will throttle the device by limiting the maximum allowed DVFS
+	  frequency corresponding to the cooling level.
+
 	  If you want this support, you should say Y here.
 
 config THERMAL_EMULATION
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 535dfee1496f..45f26978ff74 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -22,6 +22,9 @@ thermal_sys-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 # clock cooling
 thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
 
+# devfreq cooling
+thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
+
 # platform thermal drivers
 obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
new file mode 100644
index 000000000000..ee7b84ecd0c0
--- /dev/null
+++ b/drivers/thermal/devfreq_cooling.c
@@ -0,0 +1,388 @@
+/*
+ * devfreq_cooling: Thermal cooling device implementation for devices using
+ *                  devfreq
+ *
+ * Copyright (C) 2014-2015 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/devfreq.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/pm_opp.h>
+#include <linux/thermal.h>
+
+static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
+					 unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+
+	*state = df->profile->max_state - 1;
+
+	return 0;
+}
+
+static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+					 unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+
+	*state = dfc->cooling_state;
+
+	return 0;
+}
+
+static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+					 unsigned long state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	unsigned long freq, index;
+	unsigned long max_state = df->profile->max_state;
+
+	if (state == dfc->cooling_state)
+		return 0;
+
+	dev_dbg(dev, "Setting cooling state %lu\n", state);
+
+	if (state >= max_state)
+		return -EINVAL;
+
+	index = (max_state - 1) - state;
+	freq = df->profile->freq_table[index];
+
+	if (df->max_freq != freq)
+		devfreq_qos_set_max(df, freq);
+
+	dfc->cooling_state = state;
+
+	return 0;
+}
+
+static unsigned long
+freq_get_state(struct devfreq *df, unsigned long freq)
+{
+	unsigned long state = THERMAL_CSTATE_INVALID;
+	int i;
+
+	for (i = 0; i < df->profile->max_state; i++) {
+		if (df->profile->freq_table[i] == freq)
+			state = (df->profile->max_state - 1) - i;
+	}
+
+	return state;
+}
+
+static unsigned long
+state_get_freq(struct devfreq *df, unsigned long state)
+{
+	unsigned long index = (df->profile->max_state - 1) - state;
+
+	if (index >= df->profile->max_state)
+		return 0;
+
+	return df->profile->freq_table[index];
+}
+
+static unsigned long
+get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
+{
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	unsigned long voltage;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	opp = dev_pm_opp_find_freq_exact(dev, freq, true);
+	voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
+	rcu_read_unlock();
+
+	if (voltage == 0) {
+		dev_warn_ratelimited(dev,
+				     "Failed to get voltage for frequency %lu: %ld\n",
+				     freq, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+		return 0;
+	}
+
+	return dfc->power_ops->get_static_power(voltage);
+}
+
+static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
+					       struct thermal_zone_device *tz,
+					       u32 *power)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct devfreq_dev_status *status = &df->last_status;
+	unsigned long state;
+	unsigned long freq = status->current_frequency;
+	u32 dyn_power, static_power;
+
+	/* Get dynamic power for state */
+	state = freq_get_state(df, freq);
+	if (state == THERMAL_CSTATE_INVALID)
+		return -EAGAIN;
+
+	dyn_power = dfc->power_table[state];
+
+	/* Scale dynamic power for utilization */
+	dyn_power = (dyn_power * status->busy_time) / status->total_time;
+
+	/* Get static power */
+	static_power = get_static_power(dfc, freq);
+
+	*power = dyn_power + static_power;
+
+	return 0;
+}
+
+static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
+				       struct thermal_zone_device *tz,
+				       unsigned long state,
+				       u32 *power)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	unsigned long freq;
+	u32 static_power;
+
+	freq = state_get_freq(df, state);
+	static_power = get_static_power(dfc, freq);
+
+	*power = dfc->power_table[state] + static_power;
+	return 0;
+}
+
+static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
+				       struct thermal_zone_device *tz,
+				       u32 power, unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct devfreq_dev_status *status = &df->last_status;
+	unsigned long freq = status->current_frequency;
+	unsigned long busy_time;
+	s32 dyn_power;
+	u32 static_power;
+	int i;
+
+	static_power = get_static_power(dfc, freq);
+
+	dyn_power = power - static_power;
+	dyn_power = dyn_power > 0 ? dyn_power : 0;
+
+	/* Scale dynamic power for utilization */
+	busy_time = status->busy_time ?: 1;
+	dyn_power = (dyn_power * status->total_time) / busy_time;
+
+	/*
+	 * Find the first cooling state that is within the power
+	 * budget for dynamic power.
+	 */
+	for (i = 0; i < df->profile->max_state - 1; i++)
+		if (dyn_power >= dfc->power_table[i])
+			break;
+
+	*state = i;
+	return 0;
+}
+
+static struct thermal_cooling_device_ops devfreq_cooling_ops = {
+	.get_max_state = devfreq_cooling_get_max_state,
+	.get_cur_state = devfreq_cooling_get_cur_state,
+	.set_cur_state = devfreq_cooling_set_cur_state,
+};
+
+/**
+ * devfreq_cooling_gen_power_table(): - Generate power table.
+ * @dfc: Pointer to devfreq cooling device.
+ *
+ * Generate a table with the device's maximum power usage at each cooling
+ * state (OPP). The static and dynamic powerm using the appropriate voltage and
+ * frequency for the state, is acquired from the struct devfreq_cooling_ops, and
+ * summed to make the maximum power draw.
+ *
+ * The table is malloced, and a pointer put in dfc->power_table. This must be
+ * freed when unregistering the devfreq cooling device.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static int devfreq_cooling_gen_power_table(struct devfreq_cooling_device *dfc)
+{
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	int num_opps;
+	struct devfreq_cooling_ops *callbacks = dfc->power_ops;
+	unsigned long freq;
+	u32 *table;
+	int i;
+
+	if (!IS_ENABLED(CONFIG_PM_OPP)) {
+		dev_warn(dev, "Can't use power extensions of the devfreq cooling device without CONFIG_PM_OPP\n");
+		return -EINVAL;
+	}
+
+	rcu_read_lock();
+	num_opps = dev_pm_opp_get_opp_count(dev);
+
+	if (num_opps != dfc->devfreq->profile->max_state) {
+		rcu_read_unlock();
+		return -ERANGE;
+	}
+
+	table = devm_kcalloc(dev, num_opps, sizeof(*table), GFP_KERNEL);
+	if (!table) {
+		rcu_read_unlock();
+		return -ENOMEM;
+	}
+
+	for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
+		unsigned long power_dyn, voltage;
+		struct dev_pm_opp *opp;
+
+		opp = dev_pm_opp_find_freq_floor(dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
+
+		power_dyn = callbacks->get_dynamic_power(freq, voltage);
+
+		dev_info(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
+			 freq / 1000000, voltage, power_dyn, power_dyn);
+
+		table[i] = power_dyn;
+	}
+	rcu_read_unlock();
+
+	if (i != num_opps) {
+		devm_kfree(dev, table);
+		return -EFAULT;
+	}
+
+	dfc->power_table = table;
+
+	return 0;
+}
+
+/**
+ * of_devfreq_cooling_register_power(): - Register devfreq cooling device,
+ *                                      with OF and power information.
+ * @np: Pointer to OF device_node.
+ * @df: Pointer to devfreq device.
+ * @ops: Pointer to power ops.
+ */
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+				  struct devfreq_cooling_ops *ops)
+{
+	struct thermal_cooling_device *cdev;
+	struct devfreq_cooling_device *dfc;
+	struct device *dev = df->dev.parent;
+	int err;
+
+	/* freq_table is required to look map state index to frequency. */
+	if (!df->profile->max_state || !df->profile->freq_table)
+		return ERR_PTR(-EINVAL);
+
+	dfc = devm_kzalloc(dev, sizeof(*dfc), GFP_KERNEL);
+	if (!dfc)
+		return ERR_PTR(-ENOMEM);
+
+	dfc->devfreq = df;
+
+	if (ops) {
+		if (!ops->get_static_power || !ops->get_dynamic_power) {
+			err = -EINVAL;
+			goto free_dfc;
+		}
+		dfc->power_ops = ops;
+
+		err = devfreq_cooling_gen_power_table(dfc);
+		if (err)
+			goto free_dfc;
+
+		devfreq_cooling_ops.get_requested_power =
+			devfreq_cooling_get_requested_power;
+		devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
+		devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
+	}
+
+	cdev = thermal_of_cooling_device_register(np, "devfreq", dfc,
+						  &devfreq_cooling_ops);
+	if (IS_ERR(cdev)) {
+		err = PTR_ERR(cdev);
+		dev_err(df->dev.parent,
+			"Failed to register devfreq cooling device (%d)\n",
+			err);
+		goto free_power_table;
+	}
+
+	dfc->cdev = cdev;
+
+	return dfc;
+
+free_power_table:
+	if (dfc->power_table)
+		devm_kfree(dev, dfc->power_table);
+free_dfc:
+	devm_kfree(dev, dfc);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(of_devfreq_cooling_register_power);
+
+/**
+ * of_devfreq_cooling_register(): - Register devfreq cooling device,
+ *                                with OF information.
+ * @np: Pointer to OF device_node.
+ * @df: Pointer to devfreq device.
+ */
+struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
+{
+	return of_devfreq_cooling_register_power(np, df, NULL);
+}
+EXPORT_SYMBOL(of_devfreq_cooling_register);
+
+/**
+ * devfreq_cooling_register(): - Register devfreq cooling device.
+ * @df: Pointer to devfreq device.
+ */
+struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df)
+{
+	return of_devfreq_cooling_register(NULL, df);
+}
+EXPORT_SYMBOL(devfreq_cooling_register);
+
+/**
+ * devfreq_cooling_unregister(): - Unregister devfreq cooling device.
+ * @dfc: Pointer to devfreq cooling device to unregister.
+ */
+void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+{
+	struct device *dev;
+
+	if (!dfc)
+		return;
+
+	dev = dfc->devfreq->dev.parent;
+
+	thermal_cooling_device_unregister(dfc->cdev);
+
+	if (dfc->power_table)
+		devm_kfree(dev, dfc->power_table);
+	devm_kfree(dev, dfc);
+}
+EXPORT_SYMBOL(devfreq_cooling_unregister);
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
new file mode 100644
index 000000000000..6543594dc0e2
--- /dev/null
+++ b/include/linux/devfreq_cooling.h
@@ -0,0 +1,90 @@
+/*
+ * devfreq_cooling: Thermal cooling device implementation for devices using
+ *                  devfreq
+ *
+ * Copyright (C) 2014 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DEVFREQ_COOLING_H__
+#define __DEVFREQ_COOLING_H__
+
+#include <linux/devfreq.h>
+#include <linux/thermal.h>
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+
+/**
+ * struct devfreq_cooling_ops - Devfreq cooling power ops
+ * @get_static_power: Take voltage, in mV, and return the static power in mW.
+ * @get_dynamic_power: Take voltage, in mV, and frequency, in HZ, and return
+ * the dynamic power draw in mW.
+ */
+struct devfreq_cooling_ops {
+	unsigned long (*get_static_power)(unsigned long voltage);
+	unsigned long (*get_dynamic_power)(unsigned long freq,
+					   unsigned long voltage);
+};
+
+/**
+ * struct devfreq_cooling_device - Devfreq cooling device
+ * @cdev:          Pointer to associated thermal cooling device.
+ * @devfreq:       Pointer to associated devfreq device.
+ * @cooling_state: Current cooling state.
+ * @power_table:   Pointer to table with maximum power draw for each cooling
+ *                 state. State is the index into the table, and the power is
+ *                 stated in mW.
+ * @power_ops:     Pointer to power operations, used to generate @power_table.
+ */
+struct devfreq_cooling_device {
+	struct thermal_cooling_device *cdev;
+	struct devfreq *devfreq;
+	unsigned long cooling_state;
+	u32 *power_table;
+	struct devfreq_cooling_ops *power_ops;
+};
+
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+				  struct devfreq_cooling_ops *ops);
+struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
+struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df);
+void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc);
+
+#else /* !CONFIG_DEVFREQ_THERMAL */
+
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+				  struct devfreq_cooling_ops *ops)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct devfreq_cooling_device *
+devfreq_cooling_register(struct devfreq *df)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline void
+devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+{
+}
+
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* __DEVFREQ_COOLING_H__ */
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/4] devfreq_cooling: add trace information
  2015-07-03 12:58 [PATCH 0/4] Devfreq cooling device Javi Merino
                   ` (2 preceding siblings ...)
  2015-07-03 12:58 ` [PATCH 3/4] thermal: Add devfreq cooling Javi Merino
@ 2015-07-03 12:58 ` Javi Merino
  2015-07-06 18:58   ` Steven Rostedt
  3 siblings, 1 reply; 11+ messages in thread
From: Javi Merino @ 2015-07-03 12:58 UTC (permalink / raw)
  To: linux-pm
  Cc: Javi Merino, Zhang Rui, Eduardo Valentin, Steven Rostedt, Ingo Molnar

Tracing is useful for debugging and performance tuning.  Add similar
traces to what's present in the cpu cooling device.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
---
 drivers/thermal/devfreq_cooling.c |  9 ++++++-
 include/trace/events/thermal.h    | 51 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index ee7b84ecd0c0..30b9e7f6556d 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -21,6 +21,8 @@
 #include <linux/pm_opp.h>
 #include <linux/thermal.h>
 
+#include <trace/events/thermal.h>
+
 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
 					 unsigned long *state)
 {
@@ -127,7 +129,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
 	struct devfreq_dev_status *status = &df->last_status;
 	unsigned long state;
 	unsigned long freq = status->current_frequency;
-	u32 dyn_power, static_power;
+	u32 load, dyn_power, static_power;
 
 	/* Get dynamic power for state */
 	state = freq_get_state(df, freq);
@@ -142,6 +144,10 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
 	/* Get static power */
 	static_power = get_static_power(dfc, freq);
 
+	load = (100 * status->busy_time) / status->total_time;
+	trace_thermal_power_devfreq_get_power(cdev, freq, load, dyn_power,
+					      static_power);
+
 	*power = dyn_power + static_power;
 
 	return 0;
@@ -195,6 +201,7 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
 			break;
 
 	*state = i;
+	trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
 	return 0;
 }
 
diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h
index 8b1f80682b80..ffa870c0d2d2 100644
--- a/include/trace/events/thermal.h
+++ b/include/trace/events/thermal.h
@@ -135,6 +135,57 @@ TRACE_EVENT(thermal_power_cpu_limit,
 		__entry->power)
 );
 
+TRACE_EVENT(thermal_power_devfreq_get_power,
+	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long freq,
+		u32 load, u32 dynamic_power, u32 static_power),
+
+	TP_ARGS(cdev, freq, load, dynamic_power, static_power),
+
+	TP_STRUCT__entry(
+		__string(type,         cdev->type    )
+		__field(unsigned long, freq          )
+		__field(u32,           load          )
+		__field(u32,           dynamic_power )
+		__field(u32,           static_power  )
+	),
+
+	TP_fast_assign(
+		__assign_str(type, cdev->type);
+		__entry->freq = freq;
+		__entry->load = load;
+		__entry->dynamic_power = dynamic_power;
+		__entry->static_power = static_power;
+	),
+
+	TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u",
+		__get_str(type), __entry->freq,
+		__entry->load, __entry->dynamic_power, __entry->static_power)
+);
+
+TRACE_EVENT(thermal_power_devfreq_limit,
+	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long freq,
+		unsigned long cdev_state, u32 power),
+
+	TP_ARGS(cdev, freq, cdev_state, power),
+
+	TP_STRUCT__entry(
+		__string(type,         cdev->type)
+		__field(unsigned int,  freq      )
+		__field(unsigned long, cdev_state)
+		__field(u32,           power     )
+	),
+
+	TP_fast_assign(
+		__assign_str(type, cdev->type);
+		__entry->freq = freq;
+		__entry->cdev_state = cdev_state;
+		__entry->power = power;
+	),
+
+	TP_printk("type=%s freq=%u cdev_state=%lu power=%u",
+		__get_str(type), __entry->freq, __entry->cdev_state,
+		__entry->power)
+);
 #endif /* _TRACE_THERMAL_H */
 
 /* This part must be outside protection */
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] PM / devfreq: Add function to set max/min frequency
  2015-07-03 12:58 ` [PATCH 1/4] PM / devfreq: Add function to set max/min frequency Javi Merino
@ 2015-07-04  2:44   ` MyungJoo Ham
  2015-07-06  8:47     ` Javi Merino
  0 siblings, 1 reply; 11+ messages in thread
From: MyungJoo Ham @ 2015-07-04  2:44 UTC (permalink / raw)
  To: Javi Merino; +Cc: Linux PM list, Ørjan Eide, Kyungmin Park

On Fri, Jul 3, 2015 at 9:58 PM, Javi Merino <javi.merino@arm.com> wrote:
> From: Ørjan Eide <orjan.eide@arm.com>
>
> Factor out the logic to set minimum and maximum frequency for the device
> so that it can be used by other parts of the kernel.
>
> Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
> Cc: Kyungmin Park <kyungmin.park@samsung.com>
> Signed-off-by: Ørjan Eide <orjan.eide@arm.com>

Dear Javi,



Is there reasons to name the function "devfreq_qos_set_[min|max]"? not
"devfreq_set_[min|max]"? The functions may be used by non-PM-qos
components as well (e.g., thermal).


And I like the idea of using devfreq devices as cooling devices. :)


Cheers,
MyungJoo.


ps. I will be on a trip starting hours later for 8 days and then in a
military training for 3 days. Please expect that my responses during
the next 11-12 days will be very slow.



-- 
MyungJoo Ham, Ph.D.
Frontier CS Lab, S/W Center, Samsung Electronics

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] PM / devfreq: cache the last call to get_dev_status()
  2015-07-03 12:58 ` [PATCH 2/4] PM / devfreq: cache the last call to get_dev_status() Javi Merino
@ 2015-07-04  3:15   ` MyungJoo Ham
  2015-07-06  9:35     ` Javi Merino
  0 siblings, 1 reply; 11+ messages in thread
From: MyungJoo Ham @ 2015-07-04  3:15 UTC (permalink / raw)
  To: Javi Merino; +Cc: Linux PM list, Kyungmin Park

On Fri, Jul 3, 2015 at 9:58 PM, Javi Merino <javi.merino@arm.com> wrote:
> The return value of get_dev_status() can be reused.  Cache it so that
> other parts of the kernel can reuse it instead of having to call the
> same function again.
>
> Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
> Cc: Kyungmin Park <kyungmin.park@samsung.com>
> Signed-off-by: Javi Merino <javi.merino@arm.com>
> ---
>  drivers/devfreq/devfreq.c                 |  5 +++++
>  drivers/devfreq/governor_simpleondemand.c | 33 +++++++++++++++++--------------
>  include/linux/devfreq.h                   |  7 +++++++
>  3 files changed, 30 insertions(+), 15 deletions(-)
>

What if there are two entities using the stat at the same time? If you
are going to have the status at a shared object (struct devfreq), you
need to consider racing conditions as well: two threads accessing
last_status at the same time, resulting in calling
devfreq_update_stats() while the other has called it beforehand and
using the variables in the stat struct. By doing it locally (having
stat as struct devfreq_dev_status, not a pointer), we can avoid such
problems. The object (struct devfreq_dev_status) it not guaranteed to
be accessed by the governor only.


Or do you intend to share the stat value with another entity, with the
assumption that only one will be calling devfreq_update_stats() ?
In that case, what if the one who's supposed to call
devfreq_update_stats() is paused? For example, what if the user has
switched the governor from simpleondemand to userspace or performance?


Cheers,
MyungJoo.


> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 6390d5db6816..d253a73f2b86 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -810,6 +810,11 @@ unlock:
>  }
>  EXPORT_SYMBOL(devfreq_qos_set_min);
>
> +int devfreq_update_stats(struct devfreq *df)
> +{
> +       return df->profile->get_dev_status(df->dev.parent, &df->last_status);
> +}
> +
>  static ssize_t governor_show(struct device *dev,
>                              struct device_attribute *attr, char *buf)
>  {
> diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c
> index 0720ba84ca92..ae72ba5e78df 100644
> --- a/drivers/devfreq/governor_simpleondemand.c
> +++ b/drivers/devfreq/governor_simpleondemand.c
> @@ -21,17 +21,20 @@
>  static int devfreq_simple_ondemand_func(struct devfreq *df,
>                                         unsigned long *freq)
>  {
> -       struct devfreq_dev_status stat;
> -       int err = df->profile->get_dev_status(df->dev.parent, &stat);
> +       int err;
> +       struct devfreq_dev_status *stat;
>         unsigned long long a, b;
>         unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD;
>         unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL;
>         struct devfreq_simple_ondemand_data *data = df->data;
>         unsigned long max = (df->max_freq) ? df->max_freq : UINT_MAX;
>
> +       err = devfreq_update_stats(df);
>         if (err)
>                 return err;
>
> +       stat = &df->last_status;
> +
>         if (data) {
>                 if (data->upthreshold)
>                         dfso_upthreshold = data->upthreshold;
> @@ -43,41 +46,41 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
>                 return -EINVAL;
>
>         /* Assume MAX if it is going to be divided by zero */
> -       if (stat.total_time == 0) {
> +       if (stat->total_time == 0) {
>                 *freq = max;
>                 return 0;
>         }
>
>         /* Prevent overflow */
> -       if (stat.busy_time >= (1 << 24) || stat.total_time >= (1 << 24)) {
> -               stat.busy_time >>= 7;
> -               stat.total_time >>= 7;
> +       if (stat->busy_time >= (1 << 24) || stat->total_time >= (1 << 24)) {
> +               stat->busy_time >>= 7;
> +               stat->total_time >>= 7;
>         }
>
>         /* Set MAX if it's busy enough */
> -       if (stat.busy_time * 100 >
> -           stat.total_time * dfso_upthreshold) {
> +       if (stat->busy_time * 100 >
> +           stat->total_time * dfso_upthreshold) {
>                 *freq = max;
>                 return 0;
>         }
>
>         /* Set MAX if we do not know the initial frequency */
> -       if (stat.current_frequency == 0) {
> +       if (stat->current_frequency == 0) {
>                 *freq = max;
>                 return 0;
>         }
>
>         /* Keep the current frequency */
> -       if (stat.busy_time * 100 >
> -           stat.total_time * (dfso_upthreshold - dfso_downdifferential)) {
> -               *freq = stat.current_frequency;
> +       if (stat->busy_time * 100 >
> +           stat->total_time * (dfso_upthreshold - dfso_downdifferential)) {
> +               *freq = stat->current_frequency;
>                 return 0;
>         }
>
>         /* Set the desired frequency based on the load */
> -       a = stat.busy_time;
> -       a *= stat.current_frequency;
> -       b = div_u64(a, stat.total_time);
> +       a = stat->busy_time;
> +       a *= stat->current_frequency;
> +       b = div_u64(a, stat->total_time);
>         b *= 100;
>         b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2));
>         *freq = (unsigned long) b;
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index ffbe1f62ec2c..be351cfacec8 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -161,6 +161,7 @@ struct devfreq {
>         struct delayed_work work;
>
>         unsigned long previous_freq;
> +       struct devfreq_dev_status last_status;
>
>         void *data; /* private data for governors */
>
> @@ -206,6 +207,7 @@ extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
>
>  int devfreq_qos_set_min(struct devfreq *df, unsigned long value);
>  int devfreq_qos_set_max(struct devfreq *df, unsigned long value);
> +int devfreq_update_stats(struct devfreq *df);
>
>  #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
>  /**
> @@ -302,6 +304,11 @@ static inline int devfreq_qos_set_max(struct devfreq *df, unsigned long value)
>  {
>         return -EINVAL;
>  }
> +
> +static inline int devfreq_update_stats(struct devfreq *df)
> +{
> +       return -EINVAL;
> +}
>  #endif /* CONFIG_PM_DEVFREQ */
>
>  #endif /* __LINUX_DEVFREQ_H__ */
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
MyungJoo Ham, Ph.D.
Frontier CS Lab, S/W Center, Samsung Electronics

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] PM / devfreq: Add function to set max/min frequency
  2015-07-04  2:44   ` MyungJoo Ham
@ 2015-07-06  8:47     ` Javi Merino
  0 siblings, 0 replies; 11+ messages in thread
From: Javi Merino @ 2015-07-06  8:47 UTC (permalink / raw)
  To: myungjoo.ham; +Cc: Linux PM list, Ørjan Eide, Kyungmin Park

On Sat, Jul 04, 2015 at 03:44:08AM +0100, MyungJoo Ham wrote:
> On Fri, Jul 3, 2015 at 9:58 PM, Javi Merino <javi.merino@arm.com> wrote:
> > From: Ørjan Eide <orjan.eide@arm.com>
> >
> > Factor out the logic to set minimum and maximum frequency for the device
> > so that it can be used by other parts of the kernel.
> >
> > Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
> > Cc: Kyungmin Park <kyungmin.park@samsung.com>
> > Signed-off-by: Ørjan Eide <orjan.eide@arm.com>
> 
> Dear Javi,
> 
> Is there reasons to name the function "devfreq_qos_set_[min|max]"? not
> "devfreq_set_[min|max]"? The functions may be used by non-PM-qos
> components as well (e.g., thermal).

No, there are no reasons for those names.  I'll rename them to
devfreq_set_[min|max]

> And I like the idea of using devfreq devices as cooling devices. :)

Yes, we would like to be able to use them as passive cooling devices,
like what we do with cpufreq for the cpus. 

> ps. I will be on a trip starting hours later for 8 days and then in a
> military training for 3 days. Please expect that my responses during
> the next 11-12 days will be very slow.

No worries.  Thanks,
Javi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] PM / devfreq: cache the last call to get_dev_status()
  2015-07-04  3:15   ` MyungJoo Ham
@ 2015-07-06  9:35     ` Javi Merino
  0 siblings, 0 replies; 11+ messages in thread
From: Javi Merino @ 2015-07-06  9:35 UTC (permalink / raw)
  To: myungjoo.ham; +Cc: Linux PM list, Kyungmin Park

On Sat, Jul 04, 2015 at 04:15:51AM +0100, MyungJoo Ham wrote:
> On Fri, Jul 3, 2015 at 9:58 PM, Javi Merino <javi.merino@arm.com> wrote:
> > The return value of get_dev_status() can be reused.  Cache it so that
> > other parts of the kernel can reuse it instead of having to call the
> > same function again.
> >
> > Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
> > Cc: Kyungmin Park <kyungmin.park@samsung.com>
> > Signed-off-by: Javi Merino <javi.merino@arm.com>
> > ---
> >  drivers/devfreq/devfreq.c                 |  5 +++++
> >  drivers/devfreq/governor_simpleondemand.c | 33 +++++++++++++++++--------------
> >  include/linux/devfreq.h                   |  7 +++++++
> >  3 files changed, 30 insertions(+), 15 deletions(-)
> >
> 
> What if there are two entities using the stat at the same time? If you
> are going to have the status at a shared object (struct devfreq), you
> need to consider racing conditions as well: two threads accessing
> last_status at the same time, resulting in calling
> devfreq_update_stats() while the other has called it beforehand and
> using the variables in the stat struct. By doing it locally (having
> stat as struct devfreq_dev_status, not a pointer), we can avoid such
> problems. The object (struct devfreq_dev_status) it not guaranteed to
> be accessed by the governor only.

The main problem with get_dev_status() is that, as I understand it,
its results are described to be an update "since the last call",
without taking into account who was the last caller.  Therefore, in
practice it can only be called from one entity.  For example imagine a
devfreq governor and a devfreq cooling device want to have access to
the stats and they both call get_dev_stats() on a 100ms cadence, you
may end up with the following situation:

Time (ms)      devfreq governor      devfreq cooling device
000.000        get_dev_status()
001.000                              get_dev_status()
.
.
100.000        get_dev_status()
101.000                              get_dev_status()
.
.
200.000        get_dev_status()
201.000                              get_dev_status()

The devfreq cooling device calls get_dev_status() on a 100ms cadence,
the status is getting is only for the last 1ms (the time since the
last call to get_dev_status()).

> Or do you intend to share the stat value with another entity, with the
> assumption that only one will be calling devfreq_update_stats() ?
> In that case, what if the one who's supposed to call
> devfreq_update_stats() is paused? For example, what if the user has
> switched the governor from simpleondemand to userspace or performance?

Ideally, the way to solve this would be to change the definition of
get_dev_status() to make it absolute: total_time and busy_time refer
to time since the system was booted.  That way, different components
can call get_dev_status() whenever they feel like and calculate the
values for whichever interval they want to, independently.

With the current definition of get_dev_status(), the only solution
that I can think of is to have one entity to call get_dev_status()
which shares the result by putting it on the devfreq status.  The
solution in this patch puts it in the governor because I preferred to
have a simple solution to kick off this discussion.

To summarize, the solutions that I can think of to solve multiple
entities having access to the status information are:

  1) Change get_dev_status() to return absolute values for busy_time
     and total_time
  2) Make core devfreq call get_dev_status() periodically (for
     example, before calling the governor) and all the entities that
     want access can do so via a pointer in devfreq
  3) Make the simple ondemand governor call get_dev_status()
     periodically and caching it, forcing all the other entities to
     rely on that governor being active

What do you prefer?  Is there a better one that I haven't thought
about?

Thanks,
Javi

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] devfreq_cooling: add trace information
  2015-07-03 12:58 ` [PATCH 4/4] devfreq_cooling: add trace information Javi Merino
@ 2015-07-06 18:58   ` Steven Rostedt
  2015-07-07  7:56     ` Javi Merino
  0 siblings, 1 reply; 11+ messages in thread
From: Steven Rostedt @ 2015-07-06 18:58 UTC (permalink / raw)
  To: Javi Merino; +Cc: linux-pm, Zhang Rui, Eduardo Valentin, Ingo Molnar

On Fri,  3 Jul 2015 13:58:30 +0100
Javi Merino <javi.merino@arm.com> wrote:


> @@ -142,6 +144,10 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
>  	/* Get static power */
>  	static_power = get_static_power(dfc, freq);
>  
> +	load = (100 * status->busy_time) / status->total_time;

When tracing is disabled, this division will still be done.

Why not just pass in status, and have the load calculated in the
tracepoint TP_fast_assign() ?

> +	trace_thermal_power_devfreq_get_power(cdev, freq, load, dyn_power,
> +					      static_power);
> +
>  	*power = dyn_power + static_power;
>  
>  	return 0;
> @@ -195,6 +201,7 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
>  			break;
>  
>  	*state = i;
> +	trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
>  	return 0;
>  }
>  
> diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h
> index 8b1f80682b80..ffa870c0d2d2 100644
> --- a/include/trace/events/thermal.h
> +++ b/include/trace/events/thermal.h
> @@ -135,6 +135,57 @@ TRACE_EVENT(thermal_power_cpu_limit,
>  		__entry->power)
>  );
>  
> +TRACE_EVENT(thermal_power_devfreq_get_power,
> +	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long freq,
> +		u32 load, u32 dynamic_power, u32 static_power),
> +
> +	TP_ARGS(cdev, freq, load, dynamic_power, static_power),
> +
> +	TP_STRUCT__entry(
> +		__string(type,         cdev->type    )
> +		__field(unsigned long, freq          )
> +		__field(u32,           load          )
> +		__field(u32,           dynamic_power )
> +		__field(u32,           static_power  )
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(type, cdev->type);
> +		__entry->freq = freq;
> +		__entry->load = load;

 __entry->load = (100 * status->busy_time) / status->total_time;

-- Steve

> +		__entry->dynamic_power = dynamic_power;
> +		__entry->static_power = static_power;
> +	),
> +
> +	TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u",
> +		__get_str(type), __entry->freq,
> +		__entry->load, __entry->dynamic_power, __entry->static_power)
> +);
> +
> +TRACE_EVENT(thermal_power_devfreq_limit,
> +	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long freq,
> +		unsigned long cdev_state, u32 power),
> +
> +	TP_ARGS(cdev, freq, cdev_state, power),
> +
> +	TP_STRUCT__entry(
> +		__string(type,         cdev->type)
> +		__field(unsigned int,  freq      )
> +		__field(unsigned long, cdev_state)
> +		__field(u32,           power     )
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(type, cdev->type);
> +		__entry->freq = freq;
> +		__entry->cdev_state = cdev_state;
> +		__entry->power = power;
> +	),
> +
> +	TP_printk("type=%s freq=%u cdev_state=%lu power=%u",
> +		__get_str(type), __entry->freq, __entry->cdev_state,
> +		__entry->power)
> +);
>  #endif /* _TRACE_THERMAL_H */
>  
>  /* This part must be outside protection */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] devfreq_cooling: add trace information
  2015-07-06 18:58   ` Steven Rostedt
@ 2015-07-07  7:56     ` Javi Merino
  0 siblings, 0 replies; 11+ messages in thread
From: Javi Merino @ 2015-07-07  7:56 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-pm, Zhang Rui, Eduardo Valentin, Ingo Molnar

On Mon, Jul 06, 2015 at 07:58:43PM +0100, Steven Rostedt wrote:
> On Fri,  3 Jul 2015 13:58:30 +0100
> Javi Merino <javi.merino@arm.com> wrote:
> 
> 
> > @@ -142,6 +144,10 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
> >  	/* Get static power */
> >  	static_power = get_static_power(dfc, freq);
> >  
> > +	load = (100 * status->busy_time) / status->total_time;
> 
> When tracing is disabled, this division will still be done.
> 
> Why not just pass in status, and have the load calculated in the
> tracepoint TP_fast_assign() ?

Yes, no point doing it here.  I'll move it to the tracepoint's code.

Thanks for the review,
Javi

> > +	trace_thermal_power_devfreq_get_power(cdev, freq, load, dyn_power,
> > +					      static_power);
> > +
> >  	*power = dyn_power + static_power;
> >  
> >  	return 0;
> > @@ -195,6 +201,7 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
> >  			break;
> >  
> >  	*state = i;
> > +	trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
> >  	return 0;
> >  }
> >  
> > diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h
> > index 8b1f80682b80..ffa870c0d2d2 100644
> > --- a/include/trace/events/thermal.h
> > +++ b/include/trace/events/thermal.h
> > @@ -135,6 +135,57 @@ TRACE_EVENT(thermal_power_cpu_limit,
> >  		__entry->power)
> >  );
> >  
> > +TRACE_EVENT(thermal_power_devfreq_get_power,
> > +	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long freq,
> > +		u32 load, u32 dynamic_power, u32 static_power),
> > +
> > +	TP_ARGS(cdev, freq, load, dynamic_power, static_power),
> > +
> > +	TP_STRUCT__entry(
> > +		__string(type,         cdev->type    )
> > +		__field(unsigned long, freq          )
> > +		__field(u32,           load          )
> > +		__field(u32,           dynamic_power )
> > +		__field(u32,           static_power  )
> > +	),
> > +
> > +	TP_fast_assign(
> > +		__assign_str(type, cdev->type);
> > +		__entry->freq = freq;
> > +		__entry->load = load;
> 
>  __entry->load = (100 * status->busy_time) / status->total_time;
> 
> -- Steve
> 
> > +		__entry->dynamic_power = dynamic_power;
> > +		__entry->static_power = static_power;
> > +	),
> > +
> > +	TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u",
> > +		__get_str(type), __entry->freq,
> > +		__entry->load, __entry->dynamic_power, __entry->static_power)
> > +);
> > +
> > +TRACE_EVENT(thermal_power_devfreq_limit,
> > +	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long freq,
> > +		unsigned long cdev_state, u32 power),
> > +
> > +	TP_ARGS(cdev, freq, cdev_state, power),
> > +
> > +	TP_STRUCT__entry(
> > +		__string(type,         cdev->type)
> > +		__field(unsigned int,  freq      )
> > +		__field(unsigned long, cdev_state)
> > +		__field(u32,           power     )
> > +	),
> > +
> > +	TP_fast_assign(
> > +		__assign_str(type, cdev->type);
> > +		__entry->freq = freq;
> > +		__entry->cdev_state = cdev_state;
> > +		__entry->power = power;
> > +	),
> > +
> > +	TP_printk("type=%s freq=%u cdev_state=%lu power=%u",
> > +		__get_str(type), __entry->freq, __entry->cdev_state,
> > +		__entry->power)
> > +);
> >  #endif /* _TRACE_THERMAL_H */
> >  
> >  /* This part must be outside protection */
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-07-07  7:57 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-03 12:58 [PATCH 0/4] Devfreq cooling device Javi Merino
2015-07-03 12:58 ` [PATCH 1/4] PM / devfreq: Add function to set max/min frequency Javi Merino
2015-07-04  2:44   ` MyungJoo Ham
2015-07-06  8:47     ` Javi Merino
2015-07-03 12:58 ` [PATCH 2/4] PM / devfreq: cache the last call to get_dev_status() Javi Merino
2015-07-04  3:15   ` MyungJoo Ham
2015-07-06  9:35     ` Javi Merino
2015-07-03 12:58 ` [PATCH 3/4] thermal: Add devfreq cooling Javi Merino
2015-07-03 12:58 ` [PATCH 4/4] devfreq_cooling: add trace information Javi Merino
2015-07-06 18:58   ` Steven Rostedt
2015-07-07  7:56     ` Javi Merino

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.