All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update()
@ 2020-04-08  4:19 Zhang Rui
  2020-04-08  4:19 ` [RFC PATCH 2/5] thermal: create statistics table in two steps Zhang Rui
                   ` (5 more replies)
  0 siblings, 6 replies; 26+ messages in thread
From: Zhang Rui @ 2020-04-08  4:19 UTC (permalink / raw)
  To: linux-pm; +Cc: rui.zhang, daniel.lezcano, tiwai, viresh.kumar

Rename thermal_cooling_device_stats_update() to
thermal_cdev_stats_update_cur()

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_core.h    | 4 ++--
 drivers/thermal/thermal_helpers.c | 2 +-
 drivers/thermal/thermal_sysfs.c   | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index a9bf00e91d64..722902d5724a 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -81,11 +81,11 @@ ssize_t weight_store(struct device *, struct device_attribute *, const char *,
 		     size_t);
 
 #ifdef CONFIG_THERMAL_STATISTICS
-void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
+void thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
 					 unsigned long new_state);
 #else
 static inline void
-thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
+thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
 				    unsigned long new_state) {}
 #endif /* CONFIG_THERMAL_STATISTICS */
 
diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c
index 2ba756af76b7..3af895e5dfce 100644
--- a/drivers/thermal/thermal_helpers.c
+++ b/drivers/thermal/thermal_helpers.c
@@ -186,7 +186,7 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
 	}
 
 	if (!cdev->ops->set_cur_state(cdev, target))
-		thermal_cooling_device_stats_update(cdev, target);
+		thermal_cdev_stats_update_cur(cdev, target);
 
 	cdev->updated = true;
 	mutex_unlock(&cdev->lock);
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index aa99edb4dff7..00caa7787b71 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -716,7 +716,7 @@ cur_state_store(struct device *dev, struct device_attribute *attr,
 
 	result = cdev->ops->set_cur_state(cdev, state);
 	if (!result)
-		thermal_cooling_device_stats_update(cdev, state);
+		thermal_cdev_stats_update_cur(cdev, state);
 
 	mutex_unlock(&cdev->lock);
 	return result ? result : count;
@@ -765,7 +765,7 @@ static void update_time_in_state(struct cooling_dev_stats *stats)
 	stats->last_time = now;
 }
 
-void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
+void thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
 					 unsigned long new_state)
 {
 	struct cooling_dev_stats *stats = cdev->stats;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [RFC PATCH 2/5] thermal: create statistics table in two steps
  2020-04-08  4:19 [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Zhang Rui
@ 2020-04-08  4:19 ` Zhang Rui
  2020-04-08  4:19 ` [RFC PATCH 3/5] thermal: support statistics table resizing at runtime Zhang Rui
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 26+ messages in thread
From: Zhang Rui @ 2020-04-08  4:19 UTC (permalink / raw)
  To: linux-pm; +Cc: rui.zhang, daniel.lezcano, tiwai, viresh.kumar

Part of cooling device stats table is created based on the number of
cooling states supported, and this may be changed at runtime.
Introduce cooling_device_stats_resize() to handle this piece of the
statistics table.

Plus, check the existence of the statistics table before access because
the table may fail to be created during cooling device registration.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_sysfs.c | 56 ++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 14 deletions(-)

diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index 00caa7787b71..45cfc2746874 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -770,6 +770,9 @@ void thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
 {
 	struct cooling_dev_stats *stats = cdev->stats;
 
+	if (!stats)
+		return;
+
 	spin_lock(&stats->lock);
 
 	if (stats->state == new_state)
@@ -904,33 +907,52 @@ static const struct attribute_group cooling_device_stats_attr_group = {
 	.name = "stats"
 };
 
-static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
+static int cooling_device_stats_resize(struct thermal_cooling_device *cdev)
 {
-	struct cooling_dev_stats *stats;
+	struct cooling_dev_stats *stats = cdev->stats;
 	unsigned long states;
-	int var;
+	int ret;
 
-	if (cdev->ops->get_max_state(cdev, &states))
-		return;
+	ret = cdev->ops->get_max_state(cdev, &states);
+	if (ret)
+		return ret;
 
 	states++; /* Total number of states is highest state + 1 */
 
-	var = sizeof(*stats);
-	var += sizeof(*stats->time_in_state) * states;
-	var += sizeof(*stats->trans_table) * states * states;
+	stats->time_in_state = kcalloc(states, sizeof(*stats->time_in_state), GFP_KERNEL);
+	if (!stats->time_in_state)
+		return -ENOMEM;
 
-	stats = kzalloc(var, GFP_KERNEL);
-	if (!stats)
-		return;
+	stats->trans_table = kcalloc(states, sizeof(*stats->trans_table) * states, GFP_KERNEL);
+	if (!stats->trans_table) {
+		kfree(stats->time_in_state);
+		return -ENOMEM;
+	}
 
-	stats->time_in_state = (ktime_t *)(stats + 1);
-	stats->trans_table = (unsigned int *)(stats->time_in_state + states);
-	cdev->stats = stats;
 	stats->last_time = ktime_get();
 	stats->max_states = states;
 
+	return 0;
+}
+static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
+{
+	struct cooling_dev_stats *stats;
+	int var, ret;
+
+	stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		return;
+
+	cdev->stats = stats;
 	spin_lock_init(&stats->lock);
 
+	ret = cooling_device_stats_resize(cdev);
+	if (ret) {
+		kfree(cdev->stats);
+		cdev->stats = NULL;
+		return;
+	}
+
 	/* Fill the empty slot left in cooling_device_attr_groups */
 	var = ARRAY_SIZE(cooling_device_attr_groups) - 2;
 	cooling_device_attr_groups[var] = &cooling_device_stats_attr_group;
@@ -938,6 +960,12 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
 
 static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev)
 {
+	struct cooling_dev_stats *stats = cdev->stats;
+
+	if (!stats)
+		return;
+	kfree(stats->time_in_state);
+	kfree(stats->trans_table);
 	kfree(cdev->stats);
 	cdev->stats = NULL;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [RFC PATCH 3/5] thermal: support statistics table resizing at runtime
  2020-04-08  4:19 [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Zhang Rui
  2020-04-08  4:19 ` [RFC PATCH 2/5] thermal: create statistics table in two steps Zhang Rui
@ 2020-04-08  4:19 ` Zhang Rui
  2020-04-08  9:45   ` Takashi Iwai
  2020-04-08  4:19 ` [RFC PATCH 4/5] thermal: Add a sanity check for invalid state at stats update Zhang Rui
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 26+ messages in thread
From: Zhang Rui @ 2020-04-08  4:19 UTC (permalink / raw)
  To: linux-pm; +Cc: rui.zhang, daniel.lezcano, tiwai, viresh.kumar

Introduce thermal_cdev_stats_update_max() which can be used to update
the cooling device statistics table when maximum cooling state of a
cooling device is changed.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_sysfs.c | 65 ++++++++++++++++++++++++++-------
 include/linux/thermal.h         |  7 ++++
 2 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index 45cfc2746874..96e4a445952f 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -745,6 +745,10 @@ static const struct attribute_group *cooling_device_attr_groups[] = {
 };
 
 #ifdef CONFIG_THERMAL_STATISTICS
+static int cooling_device_stats_resize(struct thermal_cooling_device *cdev,
+					unsigned long cur_state,
+					unsigned long max_state);
+
 struct cooling_dev_stats {
 	spinlock_t lock;
 	unsigned int total_trans;
@@ -787,6 +791,23 @@ void thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
 	spin_unlock(&stats->lock);
 }
 
+void thermal_cdev_stats_update_max(struct thermal_cooling_device *cdev)
+{
+	struct cooling_dev_stats *stats = cdev->stats;
+	unsigned long cur_state, max_state;
+
+	if (!stats)
+		return;
+
+	if (cdev->ops->get_max_state(cdev, &max_state))
+		return;
+
+	if (cdev->ops->get_cur_state(cdev, &cur_state))
+		return;
+
+	cooling_device_stats_resize(cdev, cur_state, max_state);
+}
+
 static ssize_t total_trans_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -907,36 +928,44 @@ static const struct attribute_group cooling_device_stats_attr_group = {
 	.name = "stats"
 };
 
-static int cooling_device_stats_resize(struct thermal_cooling_device *cdev)
+static int cooling_device_stats_resize(struct thermal_cooling_device *cdev,
+					unsigned long cur_state,
+					unsigned long max_state)
 {
 	struct cooling_dev_stats *stats = cdev->stats;
-	unsigned long states;
-	int ret;
-
-	ret = cdev->ops->get_max_state(cdev, &states);
-	if (ret)
-		return ret;
+	unsigned long states = max_state + 1;
+	void *time_in_state, *trans_table;
 
-	states++; /* Total number of states is highest state + 1 */
+	if (stats->max_states == states)
+		return 0;
 
-	stats->time_in_state = kcalloc(states, sizeof(*stats->time_in_state), GFP_KERNEL);
-	if (!stats->time_in_state)
+	time_in_state = kcalloc(states, sizeof(*stats->time_in_state), GFP_KERNEL);
+	if (!time_in_state)
 		return -ENOMEM;
 
-	stats->trans_table = kcalloc(states, sizeof(*stats->trans_table) * states, GFP_KERNEL);
-	if (!stats->trans_table) {
-		kfree(stats->time_in_state);
+	trans_table = kcalloc(states, sizeof(*stats->trans_table) * states, GFP_KERNEL);
+	if (!trans_table) {
+		kfree(time_in_state);
 		return -ENOMEM;
 	}
 
+	spin_lock(&stats->lock);
+	kfree(stats->time_in_state);
+	kfree(stats->trans_table);
+	stats->time_in_state = time_in_state;
+	stats->trans_table = trans_table;
 	stats->last_time = ktime_get();
 	stats->max_states = states;
+	stats->state = cur_state;
+	stats->total_trans = 0;
+	spin_unlock(&stats->lock);
 
 	return 0;
 }
 static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
 {
 	struct cooling_dev_stats *stats;
+	unsigned long max_state;
 	int var, ret;
 
 	stats = kzalloc(sizeof(*stats), GFP_KERNEL);
@@ -946,7 +975,15 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
 	cdev->stats = stats;
 	spin_lock_init(&stats->lock);
 
-	ret = cooling_device_stats_resize(cdev);
+	ret = cdev->ops->get_max_state(cdev, &max_state);
+	if (ret)
+		return;
+
+	/**
+	 *  cooling device current state will be updated soon
+	 *  during registration
+	 **/
+	ret = cooling_device_stats_resize(cdev, 0, max_state);
 	if (ret) {
 		kfree(cdev->stats);
 		cdev->stats = NULL;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 126913c6a53b..cf3fad92f473 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -533,4 +533,11 @@ static inline void thermal_notify_framework(struct thermal_zone_device *tz,
 { }
 #endif /* CONFIG_THERMAL */
 
+#ifdef CONFIG_THERMAL_STATISTICS
+void thermal_cdev_stats_update_max(struct thermal_cooling_device *cdev);
+#else
+static inline void
+thermal_cdev_stats_update_max(struct thermal_cooling_device *cdev) {}
+#endif /* CONFIG_THERMAL_STATISTICS */
+
 #endif /* __THERMAL_H__ */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [RFC PATCH 4/5] thermal: Add a sanity check for invalid state at stats update
  2020-04-08  4:19 [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Zhang Rui
  2020-04-08  4:19 ` [RFC PATCH 2/5] thermal: create statistics table in two steps Zhang Rui
  2020-04-08  4:19 ` [RFC PATCH 3/5] thermal: support statistics table resizing at runtime Zhang Rui
@ 2020-04-08  4:19 ` Zhang Rui
  2020-04-08  4:19 ` [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed Zhang Rui
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 26+ messages in thread
From: Zhang Rui @ 2020-04-08  4:19 UTC (permalink / raw)
  To: linux-pm; +Cc: rui.zhang, daniel.lezcano, tiwai, viresh.kumar

From: Takashi Iwai <tiwai@suse.de>

This is from the origin changelog
"The thermal sysfs handler keeps the statistics table with the fixed
size that was determined from the initial max_states() call, and the
table entry is updated at each sysfs cur_state write call.  And, when
the driver's set_cur_state() ops accepts the value given from
user-space, the thermal sysfs core blindly applies it to the
statistics table entry, which may overflow and cause an Oops.
Although it's rather a bug in the driver's ops implementations, we
shouldn't crash but rather give a proper warning instead.

This patch adds a sanity check for avoiding such an OOB access and
warns with a stack trace to show the suspicious device in question."

Part of the problem described above is gone, but I'd like to keep this
patch so that we can detect the other max_state changes from other
drivers, and fix the drivers by invoking
thermal_cdev_stats_update_max() when necessary.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_sysfs.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index 96e4a445952f..e0e21c67e78a 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -779,6 +779,11 @@ void thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
 
 	spin_lock(&stats->lock);
 
+	if (dev_WARN_ONCE(&cdev->device, new_state >= stats->max_states,
+			  "new state %ld exceeds max_state %ld",
+			  new_state, stats->max_states))
+		goto unlock;
+
 	if (stats->state == new_state)
 		goto unlock;
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-08  4:19 [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Zhang Rui
                   ` (2 preceding siblings ...)
  2020-04-08  4:19 ` [RFC PATCH 4/5] thermal: Add a sanity check for invalid state at stats update Zhang Rui
@ 2020-04-08  4:19 ` Zhang Rui
  2020-04-09 13:34   ` Daniel Lezcano
                     ` (2 more replies)
  2020-04-08  9:47 ` [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Takashi Iwai
  2020-05-06 12:07 ` Amit Kucheria
  5 siblings, 3 replies; 26+ messages in thread
From: Zhang Rui @ 2020-04-08  4:19 UTC (permalink / raw)
  To: linux-pm; +Cc: rui.zhang, daniel.lezcano, tiwai, viresh.kumar

ACPI processor cooling device supports 1 cooling state before cpufreq
driver probed, and 4 cooling states after cpufreq driver probed.
Thus update the statistics table when the cpufeq driver is
probed/unprobed.

This fixes an OOB issue when updating the statistics of the processor
cooling device.

Fixes: 8ea229511e06 ("thermal: Add cooling device's statistics in sysfs")
Reported-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/acpi/processor_thermal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 41feb88ee92d..179d1b50ee2b 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -142,6 +142,7 @@ void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
 		if (ret < 0)
 			pr_err("Failed to add freq constraint for CPU%d (%d)\n",
 			       cpu, ret);
+		thermal_cdev_stats_update_max(pr->cdev);
 	}
 }
 
@@ -154,6 +155,7 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
 
 		if (pr)
 			freq_qos_remove_request(&pr->thermal_req);
+		thermal_cdev_stats_update_max(pr->cdev);
 	}
 }
 #else				/* ! CONFIG_CPU_FREQ */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 3/5] thermal: support statistics table resizing at runtime
  2020-04-08  4:19 ` [RFC PATCH 3/5] thermal: support statistics table resizing at runtime Zhang Rui
@ 2020-04-08  9:45   ` Takashi Iwai
  2020-04-09  2:57     ` Zhang Rui
  0 siblings, 1 reply; 26+ messages in thread
From: Takashi Iwai @ 2020-04-08  9:45 UTC (permalink / raw)
  To: Zhang Rui; +Cc: linux-pm, daniel.lezcano, viresh.kumar

On Wed, 08 Apr 2020 06:19:15 +0200,
Zhang Rui wrote:
> 
> Introduce thermal_cdev_stats_update_max() which can be used to update
> the cooling device statistics table when maximum cooling state of a
> cooling device is changed.
> 
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>

Just a couple of small nitpicking:

> @@ -787,6 +791,23 @@ void thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
>  	spin_unlock(&stats->lock);
>  }
>  
> +void thermal_cdev_stats_update_max(struct thermal_cooling_device *cdev)
> +{
> +	struct cooling_dev_stats *stats = cdev->stats;
> +	unsigned long cur_state, max_state;
> +
> +	if (!stats)
> +		return;
> +
> +	if (cdev->ops->get_max_state(cdev, &max_state))
> +		return;
> +
> +	if (cdev->ops->get_cur_state(cdev, &cur_state))
> +		return;
> +
> +	cooling_device_stats_resize(cdev, cur_state, max_state);
> +}

Don't we need to export this?

> @@ -946,7 +975,15 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
>  	cdev->stats = stats;
>  	spin_lock_init(&stats->lock);
>  
> -	ret = cooling_device_stats_resize(cdev);
> +	ret = cdev->ops->get_max_state(cdev, &max_state);
> +	if (ret)
> +		return;
> +
> +	/**
> +	 *  cooling device current state will be updated soon
> +	 *  during registration
> +	 **/

This comment style is confusing as if a kernel-doc.


thanks,

Takashi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update()
  2020-04-08  4:19 [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Zhang Rui
                   ` (3 preceding siblings ...)
  2020-04-08  4:19 ` [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed Zhang Rui
@ 2020-04-08  9:47 ` Takashi Iwai
  2020-04-09  2:59   ` Zhang Rui
  2020-05-06 12:07 ` Amit Kucheria
  5 siblings, 1 reply; 26+ messages in thread
From: Takashi Iwai @ 2020-04-08  9:47 UTC (permalink / raw)
  To: Zhang Rui; +Cc: linux-pm, daniel.lezcano, viresh.kumar

On Wed, 08 Apr 2020 06:19:13 +0200,
Zhang Rui wrote:
> 
> Rename thermal_cooling_device_stats_update() to
> thermal_cdev_stats_update_cur()
> 
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>

Thanks for the patch set!  I did a quick test and it seems working
fine.  Also through a glance, the patches are good, just a few trivial
issues as posted in another mail.

So, feel free to take my tags for the whole series:
  Tested-by: Takashi Iwai <tiwai@suse.de>
  Reviewed-by: Takashi Iwai <tiwai@suse.de>


Takashi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 3/5] thermal: support statistics table resizing at runtime
  2020-04-08  9:45   ` Takashi Iwai
@ 2020-04-09  2:57     ` Zhang Rui
  0 siblings, 0 replies; 26+ messages in thread
From: Zhang Rui @ 2020-04-09  2:57 UTC (permalink / raw)
  To: Takashi Iwai; +Cc: linux-pm, daniel.lezcano, viresh.kumar

On Wed, 2020-04-08 at 11:45 +0200, Takashi Iwai wrote:
> On Wed, 08 Apr 2020 06:19:15 +0200,
> Zhang Rui wrote:
> > 
> > Introduce thermal_cdev_stats_update_max() which can be used to
> > update
> > the cooling device statistics table when maximum cooling state of a
> > cooling device is changed.
> > 
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> 
> Just a couple of small nitpicking:
> 
> > @@ -787,6 +791,23 @@ void thermal_cdev_stats_update_cur(struct
> > thermal_cooling_device *cdev,
> >  	spin_unlock(&stats->lock);
> >  }
> >  
> > +void thermal_cdev_stats_update_max(struct thermal_cooling_device
> > *cdev)
> > +{
> > +	struct cooling_dev_stats *stats = cdev->stats;
> > +	unsigned long cur_state, max_state;
> > +
> > +	if (!stats)
> > +		return;
> > +
> > +	if (cdev->ops->get_max_state(cdev, &max_state))
> > +		return;
> > +
> > +	if (cdev->ops->get_cur_state(cdev, &cur_state))
> > +		return;
> > +
> > +	cooling_device_stats_resize(cdev, cur_state, max_state);
> > +}
> 
> Don't we need to export this?

Oh, right, will fix this.
> 
> > @@ -946,7 +975,15 @@ static void cooling_device_stats_setup(struct
> > thermal_cooling_device *cdev)
> >  	cdev->stats = stats;
> >  	spin_lock_init(&stats->lock);
> >  
> > -	ret = cooling_device_stats_resize(cdev);
> > +	ret = cdev->ops->get_max_state(cdev, &max_state);
> > +	if (ret)
> > +		return;
> > +
> > +	/**
> > +	 *  cooling device current state will be updated soon
> > +	 *  during registration
> > +	 **/
> 
> This comment style is confusing as if a kernel-doc.
> 
will fix it in next version.

thanks,
rui
> 
> thanks,
> 
> Takashi


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update()
  2020-04-08  9:47 ` [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Takashi Iwai
@ 2020-04-09  2:59   ` Zhang Rui
  0 siblings, 0 replies; 26+ messages in thread
From: Zhang Rui @ 2020-04-09  2:59 UTC (permalink / raw)
  To: Takashi Iwai; +Cc: linux-pm, daniel.lezcano, viresh.kumar

On Wed, 2020-04-08 at 11:47 +0200, Takashi Iwai wrote:
> On Wed, 08 Apr 2020 06:19:13 +0200,
> Zhang Rui wrote:
> > 
> > Rename thermal_cooling_device_stats_update() to
> > thermal_cdev_stats_update_cur()
> > 
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> 
> Thanks for the patch set!  I did a quick test and it seems working
> fine.  Also through a glance, the patches are good, just a few
> trivial
> issues as posted in another mail.
> 
> So, feel free to take my tags for the whole series:
>   Tested-by: Takashi Iwai <tiwai@suse.de>
>   Reviewed-by: Takashi Iwai <tiwai@suse.de>
> 
Thanks for the test and review.

I will refresh the patch set with your comments addressed and also with
the changelog of your patch rephrased.

thanks,
rui



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-08  4:19 ` [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed Zhang Rui
@ 2020-04-09 13:34   ` Daniel Lezcano
  2020-04-10  8:02     ` Zhang Rui
  2020-04-13 16:16   ` kbuild test robot
  2020-04-14 12:37     ` Dan Carpenter
  2 siblings, 1 reply; 26+ messages in thread
From: Daniel Lezcano @ 2020-04-09 13:34 UTC (permalink / raw)
  To: Zhang Rui, linux-pm; +Cc: tiwai, viresh.kumar


Hi Rui,


On 08/04/2020 06:19, Zhang Rui wrote:
> ACPI processor cooling device supports 1 cooling state before cpufreq
> driver probed, and 4 cooling states after cpufreq driver probed.

What is this one state ?

> Thus update the statistics table when the cpufeq driver is
> probed/unprobed.

To be honest, the series seems to skirt a problem in the acpi processor.

If there is a new policy, then there is a new cooling device. Why not
unregister the old one and register the new one ?


> This fixes an OOB issue when updating the statistics of the processor
> cooling device.
> 
> Fixes: 8ea229511e06 ("thermal: Add cooling device's statistics in sysfs")
> Reported-by: Takashi Iwai <tiwai@suse.de>
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> ---
>  drivers/acpi/processor_thermal.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
> index 41feb88ee92d..179d1b50ee2b 100644
> --- a/drivers/acpi/processor_thermal.c
> +++ b/drivers/acpi/processor_thermal.c
> @@ -142,6 +142,7 @@ void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
>  		if (ret < 0)
>  			pr_err("Failed to add freq constraint for CPU%d (%d)\n",
>  			       cpu, ret);
> +		thermal_cdev_stats_update_max(pr->cdev);
>  	}
>  }
>  
> @@ -154,6 +155,7 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
>  
>  		if (pr)
>  			freq_qos_remove_request(&pr->thermal_req);
> +		thermal_cdev_stats_update_max(pr->cdev);
>  	}
>  }
>  #else				/* ! CONFIG_CPU_FREQ */
> 


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-09 13:34   ` Daniel Lezcano
@ 2020-04-10  8:02     ` Zhang Rui
  2020-04-10 12:10       ` Daniel Lezcano
  2020-04-10 14:10       ` Rafael J. Wysocki
  0 siblings, 2 replies; 26+ messages in thread
From: Zhang Rui @ 2020-04-10  8:02 UTC (permalink / raw)
  To: Daniel Lezcano, linux-pm; +Cc: tiwai, viresh.kumar

On Thu, 2020-04-09 at 15:34 +0200, Daniel Lezcano wrote:
> Hi Rui,
> 
> 
> On 08/04/2020 06:19, Zhang Rui wrote:
> > ACPI processor cooling device supports 1 cooling state before
> > cpufreq
> > driver probed, and 4 cooling states after cpufreq driver probed.
> 
> What is this one state ?

One state means its original state and we can not set it to others.

ACPI processor cooling states are combined of p-state cooling states
and a couple of optional t-state cooling states.

Say, if we have a processor device supports 7 throttling states, it
actually supports 8 cooling states with cpufreq driver unprobed, and 11
cooling states with cpufreq driver probed.

> 
> > Thus update the statistics table when the cpufeq driver is
> > probed/unprobed.
> 
> To be honest, the series seems to skirt a problem in the acpi
> processor.
> 
> If there is a new policy, then there is a new cooling device. Why not
> unregister the old one and register the new one ?
> 
Good point.
IMO, the real problem is that do we support dynamic max_cooling_state
or not in the thermal framework.
Previously, I thought we don't have a hard rule of static
max_cooling_state because we always invoke .get_max_state() callback
when we need it. But after a second thought, actually we do have this
limitation. For example, when binding cooling devices to thermal zones,
the upper limit is set based on the return value of .get_max_state() at
the binding time, and we never update the upper limit later.
So this ACPI processor issue is not just about statistics table update
issue, we actually lose some of its cooling states.

Thus, a new max_state means that all the previous setting of the
cooling_device, i.e. all the thermal instances of the cooling device,
needs to get updated.

And to fix this, it's better to
a. unregister and re-register the cooling device as you suggested.
or
b. introduce an API that updates the cooling device entirely instead of
statistics table only.

For either of the above solutions, we'd better to cleanup the code to 
invoke .get_max_state() during registration/max_state_reset phase,
once, and then always use cached value later.
And plus, if we want to follow solution a), it's better to remove
.get_max_state() callback and use an integer instead so that every
driver knows this limitation.
I'd vote for solution a) if there is no soc thermal driver that may
return dynamic max_states.

Do I still miss something?

thanks,
rui
> 
> > This fixes an OOB issue when updating the statistics of the
> > processor
> > cooling device.
> > 
> > Fixes: 8ea229511e06 ("thermal: Add cooling device's statistics in
> > sysfs")
> > Reported-by: Takashi Iwai <tiwai@suse.de>
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > ---
> >  drivers/acpi/processor_thermal.c | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/drivers/acpi/processor_thermal.c
> > b/drivers/acpi/processor_thermal.c
> > index 41feb88ee92d..179d1b50ee2b 100644
> > --- a/drivers/acpi/processor_thermal.c
> > +++ b/drivers/acpi/processor_thermal.c
> > @@ -142,6 +142,7 @@ void acpi_thermal_cpufreq_init(struct
> > cpufreq_policy *policy)
> >  		if (ret < 0)
> >  			pr_err("Failed to add freq constraint for CPU%d
> > (%d)\n",
> >  			       cpu, ret);
> > +		thermal_cdev_stats_update_max(pr->cdev);
> >  	}
> >  }
> >  
> > @@ -154,6 +155,7 @@ void acpi_thermal_cpufreq_exit(struct
> > cpufreq_policy *policy)
> >  
> >  		if (pr)
> >  			freq_qos_remove_request(&pr->thermal_req);
> > +		thermal_cdev_stats_update_max(pr->cdev);
> >  	}
> >  }
> >  #else				/* ! CONFIG_CPU_FREQ */
> > 
> 
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-10  8:02     ` Zhang Rui
@ 2020-04-10 12:10       ` Daniel Lezcano
  2020-04-12  6:13         ` Zhang Rui
  2020-04-10 14:10       ` Rafael J. Wysocki
  1 sibling, 1 reply; 26+ messages in thread
From: Daniel Lezcano @ 2020-04-10 12:10 UTC (permalink / raw)
  To: Zhang Rui, linux-pm; +Cc: tiwai, viresh.kumar

On 10/04/2020 10:02, Zhang Rui wrote:
> On Thu, 2020-04-09 at 15:34 +0200, Daniel Lezcano wrote:
>> Hi Rui,
>>
>>
>> On 08/04/2020 06:19, Zhang Rui wrote:
>>> ACPI processor cooling device supports 1 cooling state before
>>> cpufreq
>>> driver probed, and 4 cooling states after cpufreq driver probed.
>>
>> What is this one state ?
> 
> One state means its original state and we can not set it to others.
> 
> ACPI processor cooling states are combined of p-state cooling states
> and a couple of optional t-state cooling states.
> 
> Say, if we have a processor device supports 7 throttling states, it
> actually supports 8 cooling states with cpufreq driver unprobed, and 11
> cooling states with cpufreq driver probed.
> 
>>
>>> Thus update the statistics table when the cpufeq driver is
>>> probed/unprobed.
>>
>> To be honest, the series seems to skirt a problem in the acpi
>> processor.
>>
>> If there is a new policy, then there is a new cooling device. Why not
>> unregister the old one and register the new one ?
>>
> Good point.
> IMO, the real problem is that do we support dynamic max_cooling_state
> or not in the thermal framework.
> Previously, I thought we don't have a hard rule of static
> max_cooling_state because we always invoke .get_max_state() callback
> when we need it. But after a second thought, actually we do have this
> limitation. For example, when binding cooling devices to thermal zones,
> the upper limit is set based on the return value of .get_max_state() at
> the binding time, and we never update the upper limit later.
> So this ACPI processor issue is not just about statistics table update
> issue, we actually lose some of its cooling states.
> 
> Thus, a new max_state means that all the previous setting of the
> cooling_device, i.e. all the thermal instances of the cooling device,
> needs to get updated.
> 
> And to fix this, it's better to
> a. unregister and re-register the cooling device as you suggested.
> or
> b. introduce an API that updates the cooling device entirely instead of
> statistics table only.
> 
> For either of the above solutions, we'd better to cleanup the code to 
> invoke .get_max_state() during registration/max_state_reset phase,
> once, and then always use cached value later.
> And plus, if we want to follow solution a), it's better to remove
> .get_max_state() callback and use an integer instead so that every
> driver knows this limitation.
> I'd vote for solution a) if there is no soc thermal driver that may
> return dynamic max_states.
> 
> Do I still miss something?

I agree for the a) solution too.

But regarding the get_max_state() callback being converted to a integer,
the driver int3406_thermal.c computes the upper and lower limits which
are updated on a INT3406_BRIGHTNESS_LIMITS_CHANGED notification and
get_max_state() does uppper - lower.



-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-10  8:02     ` Zhang Rui
  2020-04-10 12:10       ` Daniel Lezcano
@ 2020-04-10 14:10       ` Rafael J. Wysocki
  2020-04-11  4:41         ` Zhang Rui
  1 sibling, 1 reply; 26+ messages in thread
From: Rafael J. Wysocki @ 2020-04-10 14:10 UTC (permalink / raw)
  To: Zhang Rui; +Cc: Daniel Lezcano, Linux PM, Takashi Iwai, Viresh Kumar

On Fri, Apr 10, 2020 at 10:02 AM Zhang Rui <rui.zhang@intel.com> wrote:
>
> On Thu, 2020-04-09 at 15:34 +0200, Daniel Lezcano wrote:
> > Hi Rui,
> >
> >
> > On 08/04/2020 06:19, Zhang Rui wrote:
> > > ACPI processor cooling device supports 1 cooling state before
> > > cpufreq
> > > driver probed, and 4 cooling states after cpufreq driver probed.
> >
> > What is this one state ?
>
> One state means its original state and we can not set it to others.
>
> ACPI processor cooling states are combined of p-state cooling states
> and a couple of optional t-state cooling states.
>
> Say, if we have a processor device supports 7 throttling states, it
> actually supports 8 cooling states with cpufreq driver unprobed, and 11
> cooling states with cpufreq driver probed.
>
> >
> > > Thus update the statistics table when the cpufeq driver is
> > > probed/unprobed.
> >
> > To be honest, the series seems to skirt a problem in the acpi
> > processor.
> >
> > If there is a new policy, then there is a new cooling device. Why not
> > unregister the old one and register the new one ?
> >
> Good point.
> IMO, the real problem is that do we support dynamic max_cooling_state
> or not in the thermal framework.
> Previously, I thought we don't have a hard rule of static
> max_cooling_state because we always invoke .get_max_state() callback
> when we need it. But after a second thought, actually we do have this
> limitation. For example, when binding cooling devices to thermal zones,
> the upper limit is set based on the return value of .get_max_state() at
> the binding time, and we never update the upper limit later.
> So this ACPI processor issue is not just about statistics table update
> issue, we actually lose some of its cooling states.
>
> Thus, a new max_state means that all the previous setting of the
> cooling_device, i.e. all the thermal instances of the cooling device,
> needs to get updated.
>
> And to fix this, it's better to
> a. unregister and re-register the cooling device as you suggested.
> or
> b. introduce an API that updates the cooling device entirely instead of
> statistics table only.
>
> For either of the above solutions, we'd better to cleanup the code to
> invoke .get_max_state() during registration/max_state_reset phase,
> once, and then always use cached value later.
> And plus, if we want to follow solution a), it's better to remove
> .get_max_state() callback and use an integer instead so that every
> driver knows this limitation.
> I'd vote for solution a) if there is no soc thermal driver that may
> return dynamic max_states.

I believe I mentioned one more option, which would be to introduce an
optional callback into struct thermal_cooling_device_ops to return the
maximum possible return value of .get_max_state(), say
.get_max_state_limit().  That would be used for the allocation of the
stats table and the drivers where the .get_max_state() return value
could not change might set the new callback to NULL.

Then, upon a change of the .get_max_state() return value, the driver
providing it would be responsible for rearranging the stats to reflect
the new set of available states.

Cheers!

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-10 14:10       ` Rafael J. Wysocki
@ 2020-04-11  4:41         ` Zhang Rui
  0 siblings, 0 replies; 26+ messages in thread
From: Zhang Rui @ 2020-04-11  4:41 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Daniel Lezcano, Linux PM, Takashi Iwai, Viresh Kumar

On Fri, 2020-04-10 at 16:10 +0200, Rafael J. Wysocki wrote:
> On Fri, Apr 10, 2020 at 10:02 AM Zhang Rui <rui.zhang@intel.com>
> wrote:
> > 
> > On Thu, 2020-04-09 at 15:34 +0200, Daniel Lezcano wrote:
> > > Hi Rui,
> > > 
> > > 
> > > On 08/04/2020 06:19, Zhang Rui wrote:
> > > > ACPI processor cooling device supports 1 cooling state before
> > > > cpufreq
> > > > driver probed, and 4 cooling states after cpufreq driver
> > > > probed.
> > > 
> > > What is this one state ?
> > 
> > One state means its original state and we can not set it to others.
> > 
> > ACPI processor cooling states are combined of p-state cooling
> > states
> > and a couple of optional t-state cooling states.
> > 
> > Say, if we have a processor device supports 7 throttling states, it
> > actually supports 8 cooling states with cpufreq driver unprobed,
> > and 11
> > cooling states with cpufreq driver probed.
> > 
> > > 
> > > > Thus update the statistics table when the cpufeq driver is
> > > > probed/unprobed.
> > > 
> > > To be honest, the series seems to skirt a problem in the acpi
> > > processor.
> > > 
> > > If there is a new policy, then there is a new cooling device. Why
> > > not
> > > unregister the old one and register the new one ?
> > > 
> > 
> > Good point.
> > IMO, the real problem is that do we support dynamic
> > max_cooling_state
> > or not in the thermal framework.
> > Previously, I thought we don't have a hard rule of static
> > max_cooling_state because we always invoke .get_max_state()
> > callback
> > when we need it. But after a second thought, actually we do have
> > this
> > limitation. For example, when binding cooling devices to thermal
> > zones,
> > the upper limit is set based on the return value of
> > .get_max_state() at
> > the binding time, and we never update the upper limit later.
> > So this ACPI processor issue is not just about statistics table
> > update
> > issue, we actually lose some of its cooling states.
> > 
> > Thus, a new max_state means that all the previous setting of the
> > cooling_device, i.e. all the thermal instances of the cooling
> > device,
> > needs to get updated.
> > 
> > And to fix this, it's better to
> > a. unregister and re-register the cooling device as you suggested.
> > or
> > b. introduce an API that updates the cooling device entirely
> > instead of
> > statistics table only.
> > 
> > For either of the above solutions, we'd better to cleanup the code
> > to
> > invoke .get_max_state() during registration/max_state_reset phase,
> > once, and then always use cached value later.
> > And plus, if we want to follow solution a), it's better to remove
> > .get_max_state() callback and use an integer instead so that every
> > driver knows this limitation.
> > I'd vote for solution a) if there is no soc thermal driver that may
> > return dynamic max_states.
> 
> I believe I mentioned one more option, which would be to introduce an
> optional callback into struct thermal_cooling_device_ops to return
> the
> maximum possible return value of .get_max_state(), say
> .get_max_state_limit().  That would be used for the allocation of the
> stats table and the drivers where the .get_max_state() return value
> could not change might set the new callback to NULL.
> 

For a dynamic max_state cooling device, now the problem is not just
about the statistics table.
Take the ACPI processor cooling device for example, we set its internal
limit based on the return value of .get_max_state() during the cooling
device registration, thus if it returns 1, actually we can not use deep
cooling state later at all.

Plus, when a max_state changed, the meaning of each cooling state may
also actually changed. Say, for ACPI processor, cooling state 2 means
60% of max_freq with cpufreq driver, but means ACPI processor T2
throttle state with cpuferq driver unprobed.

That says, if we don't do a full unregistration and registration in
this case, at least we need to keep the device node but re-evaluate
.get_max_state() and update all its thermal instances, as long as the
statistics table change.

thanks,
rui
> Then, upon a change of the .get_max_state() return value, the driver
> providing it would be responsible for rearranging the stats to
> reflect
> the new set of available states.
> 
> Cheers!


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-10 12:10       ` Daniel Lezcano
@ 2020-04-12  6:13         ` Zhang Rui
  2020-04-12 10:07           ` Daniel Lezcano
  0 siblings, 1 reply; 26+ messages in thread
From: Zhang Rui @ 2020-04-12  6:13 UTC (permalink / raw)
  To: Daniel Lezcano, linux-pm; +Cc: tiwai, viresh.kumar

On Fri, 2020-04-10 at 14:10 +0200, Daniel Lezcano wrote:
> On 10/04/2020 10:02, Zhang Rui wrote:
> > On Thu, 2020-04-09 at 15:34 +0200, Daniel Lezcano wrote:
> > > Hi Rui,
> > > 
> > > 
> > > On 08/04/2020 06:19, Zhang Rui wrote:
> > > > ACPI processor cooling device supports 1 cooling state before
> > > > cpufreq
> > > > driver probed, and 4 cooling states after cpufreq driver
> > > > probed.
> > > 
> > > What is this one state ?
> > 
> > One state means its original state and we can not set it to others.
> > 
> > ACPI processor cooling states are combined of p-state cooling
> > states
> > and a couple of optional t-state cooling states.
> > 
> > Say, if we have a processor device supports 7 throttling states, it
> > actually supports 8 cooling states with cpufreq driver unprobed,
> > and 11
> > cooling states with cpufreq driver probed.
> > 
> > > 
> > > > Thus update the statistics table when the cpufeq driver is
> > > > probed/unprobed.
> > > 
> > > To be honest, the series seems to skirt a problem in the acpi
> > > processor.
> > > 
> > > If there is a new policy, then there is a new cooling device. Why
> > > not
> > > unregister the old one and register the new one ?
> > > 
> > 
> > Good point.
> > IMO, the real problem is that do we support dynamic
> > max_cooling_state
> > or not in the thermal framework.
> > Previously, I thought we don't have a hard rule of static
> > max_cooling_state because we always invoke .get_max_state()
> > callback
> > when we need it. But after a second thought, actually we do have
> > this
> > limitation. For example, when binding cooling devices to thermal
> > zones,
> > the upper limit is set based on the return value of
> > .get_max_state() at
> > the binding time, and we never update the upper limit later.
> > So this ACPI processor issue is not just about statistics table
> > update
> > issue, we actually lose some of its cooling states.
> > 
> > Thus, a new max_state means that all the previous setting of the
> > cooling_device, i.e. all the thermal instances of the cooling
> > device,
> > needs to get updated.
> > 
> > And to fix this, it's better to
> > a. unregister and re-register the cooling device as you suggested.
> > or
> > b. introduce an API that updates the cooling device entirely
> > instead of
> > statistics table only.
> > 
> > For either of the above solutions, we'd better to cleanup the code
> > to 
> > invoke .get_max_state() during registration/max_state_reset phase,
> > once, and then always use cached value later.
> > And plus, if we want to follow solution a), it's better to remove
> > .get_max_state() callback and use an integer instead so that every
> > driver knows this limitation.
> > I'd vote for solution a) if there is no soc thermal driver that may
> > return dynamic max_states.
> > 
> > Do I still miss something?
> 
> I agree for the a) solution too.
> 
> But regarding the get_max_state() callback being converted to a
> integer,
> the driver int3406_thermal.c computes the upper and lower limits
> which
> are updated on a INT3406_BRIGHTNESS_LIMITS_CHANGED notification and
> get_max_state() does uppper - lower.

Right, this is another case shows that it's better to support dynamic
max_state.
IMO, this is not difficult to do. We just need to introduce a new API,
which reuses the current cdev device, and reset its every thermal
instance, and update all the thermal zones the cdev is involved.
what do you think?

thanks,
rui


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-12  6:13         ` Zhang Rui
@ 2020-04-12 10:07           ` Daniel Lezcano
  2020-04-13  2:01             ` Zhang Rui
  0 siblings, 1 reply; 26+ messages in thread
From: Daniel Lezcano @ 2020-04-12 10:07 UTC (permalink / raw)
  To: Zhang Rui, linux-pm; +Cc: tiwai, viresh.kumar






Hi Rui,

On 12/04/2020 08:13, Zhang Rui wrote:
> On Fri, 2020-04-10 at 14:10 +0200, Daniel Lezcano wrote:

[ ... ]

>>> And to fix this, it's better to
>>> a. unregister and re-register the cooling device as you suggested.
>>> or
>>> b. introduce an API that updates the cooling device entirely
>>> instead of
>>> statistics table only.
>>>
>>> For either of the above solutions, we'd better to cleanup the code
>>> to 
>>> invoke .get_max_state() during registration/max_state_reset phase,
>>> once, and then always use cached value later.
>>> And plus, if we want to follow solution a), it's better to remove
>>> .get_max_state() callback and use an integer instead so that every
>>> driver knows this limitation.
>>> I'd vote for solution a) if there is no soc thermal driver that may
>>> return dynamic max_states.
>>>
>>> Do I still miss something?
>>
>> I agree for the a) solution too.
>>
>> But regarding the get_max_state() callback being converted to a
>> integer,
>> the driver int3406_thermal.c computes the upper and lower limits
>> which
>> are updated on a INT3406_BRIGHTNESS_LIMITS_CHANGED notification and
>> get_max_state() does uppper - lower.
> 
> Right, this is another case shows that it's better to support dynamic
> max_state.
> IMO, this is not difficult to do. We just need to introduce a new API,
> which reuses the current cdev device, and reset its every thermal
> instance, and update all the thermal zones the cdev is involved.
> what do you think?

I like how the thermal framework is designed but I think there are too
many API for the thermal framework and it deserves a simplification
rather than adding more of them.

There is no place where the get_max_state is cached except in the stats
structure.

In the function thermal_cooling_device_stats_update():

  Is it possible to just compare the 'new_state' parameter with
stats->max_state and if it is greater increase the stats table and
update max_state to the new_state ?



-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-12 10:07           ` Daniel Lezcano
@ 2020-04-13  2:01             ` Zhang Rui
  2020-04-13 18:06               ` Daniel Lezcano
  0 siblings, 1 reply; 26+ messages in thread
From: Zhang Rui @ 2020-04-13  2:01 UTC (permalink / raw)
  To: Daniel Lezcano, linux-pm; +Cc: tiwai, viresh.kumar

On Sun, 2020-04-12 at 12:07 +0200, Daniel Lezcano wrote:
> 
> 
> 
> 
> Hi Rui,
> 
> On 12/04/2020 08:13, Zhang Rui wrote:
> > On Fri, 2020-04-10 at 14:10 +0200, Daniel Lezcano wrote:
> 
> [ ... ]
> 
> > > > And to fix this, it's better to
> > > > a. unregister and re-register the cooling device as you
> > > > suggested.
> > > > or
> > > > b. introduce an API that updates the cooling device entirely
> > > > instead of
> > > > statistics table only.
> > > > 
> > > > For either of the above solutions, we'd better to cleanup the
> > > > code
> > > > to 
> > > > invoke .get_max_state() during registration/max_state_reset
> > > > phase,
> > > > once, and then always use cached value later.
> > > > And plus, if we want to follow solution a), it's better to
> > > > remove
> > > > .get_max_state() callback and use an integer instead so that
> > > > every
> > > > driver knows this limitation.
> > > > I'd vote for solution a) if there is no soc thermal driver that
> > > > may
> > > > return dynamic max_states.
> > > > 
> > > > Do I still miss something?
> > > 
> > > I agree for the a) solution too.
> > > 
> > > But regarding the get_max_state() callback being converted to a
> > > integer,
> > > the driver int3406_thermal.c computes the upper and lower limits
> > > which
> > > are updated on a INT3406_BRIGHTNESS_LIMITS_CHANGED notification
> > > and
> > > get_max_state() does uppper - lower.
> > 
> > Right, this is another case shows that it's better to support
> > dynamic
> > max_state.
> > IMO, this is not difficult to do. We just need to introduce a new
> > API,
> > which reuses the current cdev device, and reset its every thermal
> > instance, and update all the thermal zones the cdev is involved.
> > what do you think?
> 
> I like how the thermal framework is designed but I think there are
> too
> many API for the thermal framework and it deserves a simplification
> rather than adding more of them.

I agree.
> 
> There is no place where the get_max_state is cached except in the
> stats
> structure.

why we can not have a cdev->max_state field, and get it updated right
after .get_max_state().
and .get_max_state()  is only invoked
a) during cooling device registration
b) when cooling device update its max_state via the new API.

> 
> In the function thermal_cooling_device_stats_update():
> 
>   Is it possible to just compare the 'new_state' parameter with
> stats->max_state and if it is greater increase the stats table and
> update max_state to the new_state ?
> 
the problem is that thermal_cooling_device_stats_update() is invoked
only if thermal zone are updated or the cur_state sysfs attribute is
changed.
There is no way for a cooling device driver to tell thermal framework
that it has changed.
Say, for the problem on hand, the statistics table will not be updated
in time when cpufreq driver probed.

thanks,
rui


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-08  4:19 ` [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed Zhang Rui
  2020-04-09 13:34   ` Daniel Lezcano
@ 2020-04-13 16:16   ` kbuild test robot
  2020-04-14 12:37     ` Dan Carpenter
  2 siblings, 0 replies; 26+ messages in thread
From: kbuild test robot @ 2020-04-13 16:16 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2329 bytes --]

Hi Zhang,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on pm/linux-next]
[also build test WARNING on linus/master v5.7-rc1 next-20200413]
[cannot apply to soc-thermal/next]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Zhang-Rui/thermal-rename-thermal_cooling_device_stats_update/20200408-122131
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>


cppcheck warnings: (new ones prefixed by >>)

>> drivers/acpi/processor_thermal.c:158:33: warning: Either the condition 'if(pr)' is redundant or there is possible null pointer dereference: pr. [nullPointerRedundantCheck]
     thermal_cdev_stats_update_max(pr->cdev);
                                   ^
   drivers/acpi/processor_thermal.c:156:6: note: Assuming that condition 'if(pr)' is not redundant
     if (pr)
        ^
   drivers/acpi/processor_thermal.c:154:38: note: Assignment 'pr=per_cpu(processors,policy->cpu)', assigned value is 0
     struct acpi_processor *pr = per_cpu(processors, policy->cpu);
                                        ^
   drivers/acpi/processor_thermal.c:158:33: note: Null pointer dereference
     thermal_cdev_stats_update_max(pr->cdev);
                                   ^

vim +158 drivers/acpi/processor_thermal.c

   148	
   149	void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
   150	{
   151		unsigned int cpu;
   152	
   153		for_each_cpu(cpu, policy->related_cpus) {
   154			struct acpi_processor *pr = per_cpu(processors, policy->cpu);
   155	
   156			if (pr)
   157				freq_qos_remove_request(&pr->thermal_req);
 > 158			thermal_cdev_stats_update_max(pr->cdev);
   159		}
   160	}
   161	#else				/* ! CONFIG_CPU_FREQ */
   162	static int cpufreq_get_max_state(unsigned int cpu)
   163	{
   164		return 0;
   165	}
   166	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-13  2:01             ` Zhang Rui
@ 2020-04-13 18:06               ` Daniel Lezcano
  2020-04-16  4:46                 ` Zhang Rui
  0 siblings, 1 reply; 26+ messages in thread
From: Daniel Lezcano @ 2020-04-13 18:06 UTC (permalink / raw)
  To: Zhang Rui, linux-pm; +Cc: tiwai, viresh.kumar

On 13/04/2020 04:01, Zhang Rui wrote:
> On Sun, 2020-04-12 at 12:07 +0200, Daniel Lezcano wrote:

[ ... ]

> why we can not have a cdev->max_state field, and get it updated right
> after .get_max_state().
> and .get_max_state()  is only invoked
> a) during cooling device registration
> b) when cooling device update its max_state via the new API.
> 
>>
>> In the function thermal_cooling_device_stats_update():
>>
>>   Is it possible to just compare the 'new_state' parameter with
>> stats->max_state and if it is greater increase the stats table and
>> update max_state to the new_state ?
>>
> the problem is that thermal_cooling_device_stats_update() is invoked
> only if thermal zone are updated or the cur_state sysfs attribute is
> changed.
> There is no way for a cooling device driver to tell thermal framework
> that it has changed.
> Say, for the problem on hand, the statistics table will not be updated
> in time when cpufreq driver probed.

Except I'm missing something, the statistics are only read from
userspace via sysfs.

userspace is not notified about a stat change. Is it really a problem
the table is not updated right at the probe time ? Does it really matter
if the user sees the old table until an update happens on a new higher
max state ?

The table is always consistent whenever the userspace reads the content.

A new entry will appear only if it is used, no?






-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-08  4:19 ` [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed Zhang Rui
@ 2020-04-14 12:37     ` Dan Carpenter
  2020-04-13 16:16   ` kbuild test robot
  2020-04-14 12:37     ` Dan Carpenter
  2 siblings, 0 replies; 26+ messages in thread
From: Dan Carpenter @ 2020-04-14 12:37 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 2421 bytes --]

Hi Zhang,

url:    https://github.com/0day-ci/linux/commits/Zhang-Rui/thermal-rename-thermal_cooling_device_stats_update/20200408-122131
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

smatch warnings:
drivers/acpi/processor_thermal.c:158 acpi_thermal_cpufreq_exit() error: we previously assumed 'pr' could be null (see line 156)

# https://github.com/0day-ci/linux/commit/7d1c8f3879a36c7230c2842c651c4a3ce66b3d61
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 7d1c8f3879a36c7230c2842c651c4a3ce66b3d61
vim +/pr +158 drivers/acpi/processor_thermal.c

3000ce3c52f8b8 Rafael J. Wysocki 2019-10-16  149  void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
4be44fcd3bf648 Len Brown         2005-08-05  150  {
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  151  	unsigned int cpu;
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  152  
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  153  	for_each_cpu(cpu, policy->related_cpus) {
3000ce3c52f8b8 Rafael J. Wysocki 2019-10-16  154  		struct acpi_processor *pr = per_cpu(processors, policy->cpu);
^1da177e4c3f41 Linus Torvalds    2005-04-16  155  
2d8b39a62a5d38 Rafael J. Wysocki 2019-10-15 @156  		if (pr)
                                                                    ^^
Check

3000ce3c52f8b8 Rafael J. Wysocki 2019-10-16  157  			freq_qos_remove_request(&pr->thermal_req);
7d1c8f3879a36c Zhang Rui         2020-04-08 @158  		thermal_cdev_stats_update_max(pr->cdev);
                                                                                              ^^^^^^^^
Unchecked dereference.

^1da177e4c3f41 Linus Torvalds    2005-04-16  159  	}
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  160  }
^1da177e4c3f41 Linus Torvalds    2005-04-16  161  #else				/* ! CONFIG_CPU_FREQ */
d9460fd227ed2c Zhang Rui         2008-01-17  162  static int cpufreq_get_max_state(unsigned int cpu)
d9460fd227ed2c Zhang Rui         2008-01-17  163  {
d9460fd227ed2c Zhang Rui         2008-01-17  164  	return 0;
d9460fd227ed2c Zhang Rui         2008-01-17  165  }

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
@ 2020-04-14 12:37     ` Dan Carpenter
  0 siblings, 0 replies; 26+ messages in thread
From: Dan Carpenter @ 2020-04-14 12:37 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2421 bytes --]

Hi Zhang,

url:    https://github.com/0day-ci/linux/commits/Zhang-Rui/thermal-rename-thermal_cooling_device_stats_update/20200408-122131
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

smatch warnings:
drivers/acpi/processor_thermal.c:158 acpi_thermal_cpufreq_exit() error: we previously assumed 'pr' could be null (see line 156)

# https://github.com/0day-ci/linux/commit/7d1c8f3879a36c7230c2842c651c4a3ce66b3d61
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 7d1c8f3879a36c7230c2842c651c4a3ce66b3d61
vim +/pr +158 drivers/acpi/processor_thermal.c

3000ce3c52f8b8 Rafael J. Wysocki 2019-10-16  149  void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
4be44fcd3bf648 Len Brown         2005-08-05  150  {
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  151  	unsigned int cpu;
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  152  
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  153  	for_each_cpu(cpu, policy->related_cpus) {
3000ce3c52f8b8 Rafael J. Wysocki 2019-10-16  154  		struct acpi_processor *pr = per_cpu(processors, policy->cpu);
^1da177e4c3f41 Linus Torvalds    2005-04-16  155  
2d8b39a62a5d38 Rafael J. Wysocki 2019-10-15 @156  		if (pr)
                                                                    ^^
Check

3000ce3c52f8b8 Rafael J. Wysocki 2019-10-16  157  			freq_qos_remove_request(&pr->thermal_req);
7d1c8f3879a36c Zhang Rui         2020-04-08 @158  		thermal_cdev_stats_update_max(pr->cdev);
                                                                                              ^^^^^^^^
Unchecked dereference.

^1da177e4c3f41 Linus Torvalds    2005-04-16  159  	}
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  160  }
^1da177e4c3f41 Linus Torvalds    2005-04-16  161  #else				/* ! CONFIG_CPU_FREQ */
d9460fd227ed2c Zhang Rui         2008-01-17  162  static int cpufreq_get_max_state(unsigned int cpu)
d9460fd227ed2c Zhang Rui         2008-01-17  163  {
d9460fd227ed2c Zhang Rui         2008-01-17  164  	return 0;
d9460fd227ed2c Zhang Rui         2008-01-17  165  }

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-13 18:06               ` Daniel Lezcano
@ 2020-04-16  4:46                 ` Zhang Rui
  2020-04-16  7:58                   ` Daniel Lezcano
  0 siblings, 1 reply; 26+ messages in thread
From: Zhang Rui @ 2020-04-16  4:46 UTC (permalink / raw)
  To: Daniel Lezcano, linux-pm; +Cc: tiwai, viresh.kumar

On Mon, 2020-04-13 at 20:06 +0200, Daniel Lezcano wrote:
> On 13/04/2020 04:01, Zhang Rui wrote:
> > On Sun, 2020-04-12 at 12:07 +0200, Daniel Lezcano wrote:
> 
> [ ... ]
> 
> > why we can not have a cdev->max_state field, and get it updated
> > right
> > after .get_max_state().
> > and .get_max_state()  is only invoked
> > a) during cooling device registration
> > b) when cooling device update its max_state via the new API.
> > 
> > > 
> > > In the function thermal_cooling_device_stats_update():
> > > 
> > >   Is it possible to just compare the 'new_state' parameter with
> > > stats->max_state and if it is greater increase the stats table
> > > and
> > > update max_state to the new_state ?
> > > 
> > 
> > the problem is that thermal_cooling_device_stats_update() is
> > invoked
> > only if thermal zone are updated or the cur_state sysfs attribute
> > is
> > changed.
> > There is no way for a cooling device driver to tell thermal
> > framework
> > that it has changed.
> > Say, for the problem on hand, the statistics table will not be
> > updated
> > in time when cpufreq driver probed.
> 
> Except I'm missing something, the statistics are only read from
> userspace via sysfs.

I agree.
> 
> userspace is not notified about a stat change. Is it really a problem
> the table is not updated right at the probe time ?

>  Does it really matter
> if the user sees the old table until an update happens on a new
> higher
> max state ?
> 
> The table is always consistent whenever the userspace reads the
> content.

> 
> A new entry will appear only if it is used, no?
> 
Hmm, IMO, stats table is not the biggest problem here.
The problem is that thermal framework is not aware of the max_state
change, and the thermal instances are never updated according to the
new max_state.
So, we should invoke .get_max_state() in thermal_zone_device_update()
and update the thermal instances accordingly.
And then, what we need to do is just to do stats update right after
.get_max_state() being invoked.

About how to update the stats table, I think adding new entries is not
enough, because the meaning of each cooling state may change when
max_state changes, thus I'd prefer a full reset/resizing of the table.

thanks,
rui
> 
> 
> 
> 
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-16  4:46                 ` Zhang Rui
@ 2020-04-16  7:58                   ` Daniel Lezcano
  2020-04-17  2:09                     ` Zhang Rui
  0 siblings, 1 reply; 26+ messages in thread
From: Daniel Lezcano @ 2020-04-16  7:58 UTC (permalink / raw)
  To: Zhang Rui, linux-pm; +Cc: tiwai, viresh.kumar

On 16/04/2020 06:46, Zhang Rui wrote:
> On Mon, 2020-04-13 at 20:06 +0200, Daniel Lezcano wrote:
>> On 13/04/2020 04:01, Zhang Rui wrote:
>>> On Sun, 2020-04-12 at 12:07 +0200, Daniel Lezcano wrote:
>>
>> [ ... ]
>>
>>> why we can not have a cdev->max_state field, and get it updated
>>> right
>>> after .get_max_state().
>>> and .get_max_state()  is only invoked
>>> a) during cooling device registration
>>> b) when cooling device update its max_state via the new API.
>>>
>>>>
>>>> In the function thermal_cooling_device_stats_update():
>>>>
>>>>   Is it possible to just compare the 'new_state' parameter with
>>>> stats->max_state and if it is greater increase the stats table
>>>> and
>>>> update max_state to the new_state ?
>>>>
>>>
>>> the problem is that thermal_cooling_device_stats_update() is
>>> invoked
>>> only if thermal zone are updated or the cur_state sysfs attribute
>>> is
>>> changed.
>>> There is no way for a cooling device driver to tell thermal
>>> framework
>>> that it has changed.
>>> Say, for the problem on hand, the statistics table will not be
>>> updated
>>> in time when cpufreq driver probed.
>>
>> Except I'm missing something, the statistics are only read from
>> userspace via sysfs.
> 
> I agree.
>>
>> userspace is not notified about a stat change. Is it really a problem
>> the table is not updated right at the probe time ?
> 
>>  Does it really matter
>> if the user sees the old table until an update happens on a new
>> higher
>> max state ?
>>
>> The table is always consistent whenever the userspace reads the
>> content.
> 
>>
>> A new entry will appear only if it is used, no?
>>
> Hmm, IMO, stats table is not the biggest problem here.
> The problem is that thermal framework is not aware of the max_state
> change, and the thermal instances are never updated according to the
> new max_state.
> So, we should invoke .get_max_state() in thermal_zone_device_update()
> and update the thermal instances accordingly.
> And then, what we need to do is just to do stats update right after
> .get_max_state() being invoked.
> 
> About how to update the stats table, I think adding new entries is not
> enough, because the meaning of each cooling state may change when
> max_state changes, thus I'd prefer a full reset/resizing of the table.

Ok, I understand. Are planning to use the existing API:

 thermal_zone_device_update(tz, THERMAL_TABLE_CHANGED);

?


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
  2020-04-16  7:58                   ` Daniel Lezcano
@ 2020-04-17  2:09                     ` Zhang Rui
  0 siblings, 0 replies; 26+ messages in thread
From: Zhang Rui @ 2020-04-17  2:09 UTC (permalink / raw)
  To: Daniel Lezcano, linux-pm; +Cc: tiwai, viresh.kumar

On Thu, 2020-04-16 at 09:58 +0200, Daniel Lezcano wrote:
> On 16/04/2020 06:46, Zhang Rui wrote:
> > On Mon, 2020-04-13 at 20:06 +0200, Daniel Lezcano wrote:
> > > On 13/04/2020 04:01, Zhang Rui wrote:
> > > > On Sun, 2020-04-12 at 12:07 +0200, Daniel Lezcano wrote:
> > > 
> > > [ ... ]
> > > 
> > > > why we can not have a cdev->max_state field, and get it updated
> > > > right
> > > > after .get_max_state().
> > > > and .get_max_state()  is only invoked
> > > > a) during cooling device registration
> > > > b) when cooling device update its max_state via the new API.
> > > > 
> > > > > 
> > > > > In the function thermal_cooling_device_stats_update():
> > > > > 
> > > > >   Is it possible to just compare the 'new_state' parameter
> > > > > with
> > > > > stats->max_state and if it is greater increase the stats
> > > > > table
> > > > > and
> > > > > update max_state to the new_state ?
> > > > > 
> > > > 
> > > > the problem is that thermal_cooling_device_stats_update() is
> > > > invoked
> > > > only if thermal zone are updated or the cur_state sysfs
> > > > attribute
> > > > is
> > > > changed.
> > > > There is no way for a cooling device driver to tell thermal
> > > > framework
> > > > that it has changed.
> > > > Say, for the problem on hand, the statistics table will not be
> > > > updated
> > > > in time when cpufreq driver probed.
> > > 
> > > Except I'm missing something, the statistics are only read from
> > > userspace via sysfs.
> > 
> > I agree.
> > > 
> > > userspace is not notified about a stat change. Is it really a
> > > problem
> > > the table is not updated right at the probe time ?
> > >  Does it really matter
> > > if the user sees the old table until an update happens on a new
> > > higher
> > > max state ?
> > > 
> > > The table is always consistent whenever the userspace reads the
> > > content.
> > > 
> > > A new entry will appear only if it is used, no?
> > > 
> > 
> > Hmm, IMO, stats table is not the biggest problem here.
> > The problem is that thermal framework is not aware of the max_state
> > change, and the thermal instances are never updated according to
> > the
> > new max_state.
> > So, we should invoke .get_max_state() in
> > thermal_zone_device_update()
> > and update the thermal instances accordingly.
> > And then, what we need to do is just to do stats update right after
> > .get_max_state() being invoked.
> > 
> > About how to update the stats table, I think adding new entries is
> > not
> > enough, because the meaning of each cooling state may change when
> > max_state changes, thus I'd prefer a full reset/resizing of the
> > table.
> 
> Ok, I understand. Are planning to use the existing API:
> 
>  thermal_zone_device_update(tz, THERMAL_TABLE_CHANGED);

we can not use THERMAL_TABLE_CHANGED because cooling device driver can
not invoke thermal_zone_device_update().

So, the cooling device max_state is changed, but the thermal framework
will not know this until the cooling device being used for a
temperature change.
For the cooling devices that are not used by any thermal zones, the
stats files are probably out of date, but maybe nobody cares. Or we can
fix this by updating stats table for every stats sysfs attribute
access.

thanks,
rui


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update()
  2020-04-08  4:19 [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Zhang Rui
                   ` (4 preceding siblings ...)
  2020-04-08  9:47 ` [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Takashi Iwai
@ 2020-05-06 12:07 ` Amit Kucheria
  5 siblings, 0 replies; 26+ messages in thread
From: Amit Kucheria @ 2020-05-06 12:07 UTC (permalink / raw)
  To: Zhang Rui; +Cc: Linux PM list, Daniel Lezcano, Takashi Iwai, Viresh Kumar

On Wed, Apr 8, 2020 at 9:49 AM Zhang Rui <rui.zhang@intel.com> wrote:
>
> Rename thermal_cooling_device_stats_update() to
> thermal_cdev_stats_update_cur()

Missing a reason for this change.

>
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>

> ---
>  drivers/thermal/thermal_core.h    | 4 ++--
>  drivers/thermal/thermal_helpers.c | 2 +-
>  drivers/thermal/thermal_sysfs.c   | 4 ++--
>  3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
> index a9bf00e91d64..722902d5724a 100644
> --- a/drivers/thermal/thermal_core.h
> +++ b/drivers/thermal/thermal_core.h
> @@ -81,11 +81,11 @@ ssize_t weight_store(struct device *, struct device_attribute *, const char *,
>                      size_t);
>
>  #ifdef CONFIG_THERMAL_STATISTICS
> -void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
> +void thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
>                                          unsigned long new_state);
>  #else
>  static inline void
> -thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
> +thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
>                                     unsigned long new_state) {}
>  #endif /* CONFIG_THERMAL_STATISTICS */
>
> diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c
> index 2ba756af76b7..3af895e5dfce 100644
> --- a/drivers/thermal/thermal_helpers.c
> +++ b/drivers/thermal/thermal_helpers.c
> @@ -186,7 +186,7 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
>         }
>
>         if (!cdev->ops->set_cur_state(cdev, target))
> -               thermal_cooling_device_stats_update(cdev, target);
> +               thermal_cdev_stats_update_cur(cdev, target);
>
>         cdev->updated = true;
>         mutex_unlock(&cdev->lock);
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index aa99edb4dff7..00caa7787b71 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -716,7 +716,7 @@ cur_state_store(struct device *dev, struct device_attribute *attr,
>
>         result = cdev->ops->set_cur_state(cdev, state);
>         if (!result)
> -               thermal_cooling_device_stats_update(cdev, state);
> +               thermal_cdev_stats_update_cur(cdev, state);
>
>         mutex_unlock(&cdev->lock);
>         return result ? result : count;
> @@ -765,7 +765,7 @@ static void update_time_in_state(struct cooling_dev_stats *stats)
>         stats->last_time = now;
>  }
>
> -void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
> +void thermal_cdev_stats_update_cur(struct thermal_cooling_device *cdev,
>                                          unsigned long new_state)
>  {
>         struct cooling_dev_stats *stats = cdev->stats;
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed
@ 2020-04-13  5:07 kbuild test robot
  0 siblings, 0 replies; 26+ messages in thread
From: kbuild test robot @ 2020-04-13  5:07 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 3007 bytes --]

CC: kbuild-all(a)lists.01.org
In-Reply-To: <20200408041917.2329-5-rui.zhang@intel.com>
References: <20200408041917.2329-5-rui.zhang@intel.com>
TO: Zhang Rui <rui.zhang@intel.com>

Hi Zhang,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on pm/linux-next]
[also build test WARNING on linus/master v5.7-rc1 next-20200412]
[cannot apply to soc-thermal/next]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Zhang-Rui/thermal-rename-thermal_cooling_device_stats_update/20200408-122131
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
:::::: branch date: 5 days ago
:::::: commit date: 5 days ago

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

smatch warnings:
drivers/acpi/processor_thermal.c:158 acpi_thermal_cpufreq_exit() error: we previously assumed 'pr' could be null (see line 156)

# https://github.com/0day-ci/linux/commit/7d1c8f3879a36c7230c2842c651c4a3ce66b3d61
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 7d1c8f3879a36c7230c2842c651c4a3ce66b3d61
vim +/pr +158 drivers/acpi/processor_thermal.c

^1da177e4c3f41 Linus Torvalds    2005-04-16  148  
3000ce3c52f8b8 Rafael J. Wysocki 2019-10-16  149  void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
4be44fcd3bf648 Len Brown         2005-08-05  150  {
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  151  	unsigned int cpu;
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  152  
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  153  	for_each_cpu(cpu, policy->related_cpus) {
3000ce3c52f8b8 Rafael J. Wysocki 2019-10-16  154  		struct acpi_processor *pr = per_cpu(processors, policy->cpu);
^1da177e4c3f41 Linus Torvalds    2005-04-16  155  
2d8b39a62a5d38 Rafael J. Wysocki 2019-10-15 @156  		if (pr)
3000ce3c52f8b8 Rafael J. Wysocki 2019-10-16  157  			freq_qos_remove_request(&pr->thermal_req);
7d1c8f3879a36c Zhang Rui         2020-04-08 @158  		thermal_cdev_stats_update_max(pr->cdev);
^1da177e4c3f41 Linus Torvalds    2005-04-16  159  	}
a1bb46c36ce389 Rafael J. Wysocki 2019-10-25  160  }
^1da177e4c3f41 Linus Torvalds    2005-04-16  161  #else				/* ! CONFIG_CPU_FREQ */
d9460fd227ed2c Zhang Rui         2008-01-17  162  static int cpufreq_get_max_state(unsigned int cpu)
d9460fd227ed2c Zhang Rui         2008-01-17  163  {
d9460fd227ed2c Zhang Rui         2008-01-17  164  	return 0;
d9460fd227ed2c Zhang Rui         2008-01-17  165  }
d9460fd227ed2c Zhang Rui         2008-01-17  166  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2020-05-06 12:08 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-08  4:19 [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Zhang Rui
2020-04-08  4:19 ` [RFC PATCH 2/5] thermal: create statistics table in two steps Zhang Rui
2020-04-08  4:19 ` [RFC PATCH 3/5] thermal: support statistics table resizing at runtime Zhang Rui
2020-04-08  9:45   ` Takashi Iwai
2020-04-09  2:57     ` Zhang Rui
2020-04-08  4:19 ` [RFC PATCH 4/5] thermal: Add a sanity check for invalid state at stats update Zhang Rui
2020-04-08  4:19 ` [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed Zhang Rui
2020-04-09 13:34   ` Daniel Lezcano
2020-04-10  8:02     ` Zhang Rui
2020-04-10 12:10       ` Daniel Lezcano
2020-04-12  6:13         ` Zhang Rui
2020-04-12 10:07           ` Daniel Lezcano
2020-04-13  2:01             ` Zhang Rui
2020-04-13 18:06               ` Daniel Lezcano
2020-04-16  4:46                 ` Zhang Rui
2020-04-16  7:58                   ` Daniel Lezcano
2020-04-17  2:09                     ` Zhang Rui
2020-04-10 14:10       ` Rafael J. Wysocki
2020-04-11  4:41         ` Zhang Rui
2020-04-13 16:16   ` kbuild test robot
2020-04-14 12:37   ` Dan Carpenter
2020-04-14 12:37     ` Dan Carpenter
2020-04-08  9:47 ` [RFC PATCH 1/5] thermal: rename thermal_cooling_device_stats_update() Takashi Iwai
2020-04-09  2:59   ` Zhang Rui
2020-05-06 12:07 ` Amit Kucheria
2020-04-13  5:07 [RFC PATCH 5/5] ACPI: processor: do update when maximum cooling state changed kbuild test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.