All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3][v3] Fix thermal problems during suspend/bootup
@ 2015-10-30  8:31 Chen Yu
  2015-10-30  8:31   ` Chen Yu
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Chen Yu @ 2015-10-30  8:31 UTC (permalink / raw)
  To: rui.zhang, edubezval; +Cc: javi.merino, linux-pm, linux-kernel, Chen Yu

This patch set fixes two problems when system is trying to
suspend and boot up:
1.After system is woken up from suspend, the thermal framework uses
  the dirty 'cached' thermal variables before suspend, which might
  cause expected behavior.
2.If a cooling device is registered after the thermal zone's registration,
  current thermal framework forgets to update the thermal_zone's status,
  which might bring expected behavior under special cases.

Chen Yu (3):
  Thermal: initialize thermal zone device correctly
  Thermal: handle thermal zone device properly during system sleep
  Thermal: do thermal zone update after a cooling device registered

 drivers/thermal/step_wise.c    | 17 ++++++++--
 drivers/thermal/thermal_core.c | 75 ++++++++++++++++++++++++++++++++++++++++--
 drivers/thermal/thermal_core.h |  1 +
 include/linux/thermal.h        |  5 +++
 4 files changed, 93 insertions(+), 5 deletions(-)

-- 
1.8.4.2


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/3][v3] Thermal: initialize thermal zone device correctly
  2015-10-30  8:31 [PATCH 0/3][v3] Fix thermal problems during suspend/bootup Chen Yu
@ 2015-10-30  8:31   ` Chen Yu
  2015-10-30  8:31   ` Chen Yu
  2015-10-30  8:32   ` Chen Yu
  2 siblings, 0 replies; 13+ messages in thread
From: Chen Yu @ 2015-10-30  8:31 UTC (permalink / raw)
  To: rui.zhang, edubezval; +Cc: javi.merino, linux-pm, linux-kernel, stable, Chen Yu

From: Zhang Rui <rui.zhang@intel.com>

After thermal zone device registered, as we have not read any
temperature before, thus tz->temperature should not be 0,
which actually means 0C, and thermal trend is not available.
In this case, we need specially handling for the first
thermal_zone_device_update().

Both thermal core framework and step_wise governor is
enhanced to handle this. And since the step_wise governor
is the only one that uses trends, so it's the only thermal
governor that needs to be updated.

CC: <stable@vger.kernel.org> #3.18+
Tested-by: Manuel Krause <manuelkrause@netscape.net>
Tested-by: szegad <szegadlo@poczta.onet.pl>
Tested-by: prash <prash.n.rao@gmail.com>
Tested-by: amish <ammdispose-arch@yahoo.com>
Tested-by: Matthias <morpheusxyz123@yahoo.de>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
---
 drivers/thermal/step_wise.c    | 17 +++++++++++++++--
 drivers/thermal/thermal_core.c | 19 +++++++++++++++++--
 drivers/thermal/thermal_core.h |  1 +
 include/linux/thermal.h        |  3 +++
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index 2f9f708..ea9366a 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance,
 	next_target = instance->target;
 	dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
 
+	if (!instance->initialized) {
+		if (throttle) {
+			next_target = (cur_state + 1) >= instance->upper ?
+					instance->upper :
+					((cur_state + 1) < instance->lower ?
+					instance->lower : (cur_state + 1));
+		} else {
+			next_target = THERMAL_NO_TARGET;
+		}
+
+		return next_target;
+	}
+
 	switch (trend) {
 	case THERMAL_TREND_RAISING:
 		if (throttle) {
@@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 		dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
 					old_target, (int)instance->target);
 
-		if (old_target == instance->target)
+		if (instance->initialized && old_target == instance->target)
 			continue;
 
 		/* Activate a passive thermal instance */
@@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 			instance->target == THERMAL_NO_TARGET)
 			update_passive_instance(tz, trip_type, -1);
 
-
+		instance->initialized = true;
 		instance->cdev->updated = false; /* cdev needs update */
 	}
 
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index d9e525c..682bc1e 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -532,8 +532,22 @@ static void update_temperature(struct thermal_zone_device *tz)
 	mutex_unlock(&tz->lock);
 
 	trace_thermal_temperature(tz);
-	dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
-				tz->last_temperature, tz->temperature);
+	if (tz->last_temperature == THERMAL_TEMP_INVALID)
+		dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
+			tz->temperature);
+	else
+		dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
+			tz->last_temperature, tz->temperature);
+}
+
+static void thermal_zone_device_reset(struct thermal_zone_device *tz)
+{
+	struct thermal_instance *pos;
+
+	tz->temperature = THERMAL_TEMP_INVALID;
+	tz->passive = 0;
+	list_for_each_entry(pos, &tz->thermal_instances, tz_node)
+		pos->initialized = false;
 }
 
 void thermal_zone_device_update(struct thermal_zone_device *tz)
@@ -1900,6 +1914,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 
 	INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
 
+	thermal_zone_device_reset(tz);
 	thermal_zone_device_update(tz);
 
 	return tz;
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index d7ac1fc..749d41a 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -41,6 +41,7 @@ struct thermal_instance {
 	struct thermal_zone_device *tz;
 	struct thermal_cooling_device *cdev;
 	int trip;
+	bool initialized;
 	unsigned long upper;	/* Highest cooling state for this trip point */
 	unsigned long lower;	/* Lowest cooling state for this trip point */
 	unsigned long target;	/* expected cooling state */
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 157d366..5bcabc7 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -43,6 +43,9 @@
 /* Default weight of a bound cooling device */
 #define THERMAL_WEIGHT_DEFAULT 0
 
+/* use value, which < 0K, to indicate an invalid/uninitialized temperature */
+#define THERMAL_TEMP_INVALID	-274000
+
 /* Unit conversion macros */
 #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
 				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
-- 
1.8.4.2


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 1/3][v3] Thermal: initialize thermal zone device correctly
@ 2015-10-30  8:31   ` Chen Yu
  0 siblings, 0 replies; 13+ messages in thread
From: Chen Yu @ 2015-10-30  8:31 UTC (permalink / raw)
  To: rui.zhang, edubezval; +Cc: javi.merino, linux-pm, linux-kernel, stable, Chen Yu

From: Zhang Rui <rui.zhang@intel.com>

After thermal zone device registered, as we have not read any
temperature before, thus tz->temperature should not be 0,
which actually means 0C, and thermal trend is not available.
In this case, we need specially handling for the first
thermal_zone_device_update().

Both thermal core framework and step_wise governor is
enhanced to handle this. And since the step_wise governor
is the only one that uses trends, so it's the only thermal
governor that needs to be updated.

CC: <stable@vger.kernel.org> #3.18+
Tested-by: Manuel Krause <manuelkrause@netscape.net>
Tested-by: szegad <szegadlo@poczta.onet.pl>
Tested-by: prash <prash.n.rao@gmail.com>
Tested-by: amish <ammdispose-arch@yahoo.com>
Tested-by: Matthias <morpheusxyz123@yahoo.de>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
---
 drivers/thermal/step_wise.c    | 17 +++++++++++++++--
 drivers/thermal/thermal_core.c | 19 +++++++++++++++++--
 drivers/thermal/thermal_core.h |  1 +
 include/linux/thermal.h        |  3 +++
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index 2f9f708..ea9366a 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance,
 	next_target = instance->target;
 	dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
 
+	if (!instance->initialized) {
+		if (throttle) {
+			next_target = (cur_state + 1) >= instance->upper ?
+					instance->upper :
+					((cur_state + 1) < instance->lower ?
+					instance->lower : (cur_state + 1));
+		} else {
+			next_target = THERMAL_NO_TARGET;
+		}
+
+		return next_target;
+	}
+
 	switch (trend) {
 	case THERMAL_TREND_RAISING:
 		if (throttle) {
@@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 		dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
 					old_target, (int)instance->target);
 
-		if (old_target == instance->target)
+		if (instance->initialized && old_target == instance->target)
 			continue;
 
 		/* Activate a passive thermal instance */
@@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 			instance->target == THERMAL_NO_TARGET)
 			update_passive_instance(tz, trip_type, -1);
 
-
+		instance->initialized = true;
 		instance->cdev->updated = false; /* cdev needs update */
 	}
 
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index d9e525c..682bc1e 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -532,8 +532,22 @@ static void update_temperature(struct thermal_zone_device *tz)
 	mutex_unlock(&tz->lock);
 
 	trace_thermal_temperature(tz);
-	dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
-				tz->last_temperature, tz->temperature);
+	if (tz->last_temperature == THERMAL_TEMP_INVALID)
+		dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
+			tz->temperature);
+	else
+		dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
+			tz->last_temperature, tz->temperature);
+}
+
+static void thermal_zone_device_reset(struct thermal_zone_device *tz)
+{
+	struct thermal_instance *pos;
+
+	tz->temperature = THERMAL_TEMP_INVALID;
+	tz->passive = 0;
+	list_for_each_entry(pos, &tz->thermal_instances, tz_node)
+		pos->initialized = false;
 }
 
 void thermal_zone_device_update(struct thermal_zone_device *tz)
@@ -1900,6 +1914,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 
 	INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
 
+	thermal_zone_device_reset(tz);
 	thermal_zone_device_update(tz);
 
 	return tz;
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index d7ac1fc..749d41a 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -41,6 +41,7 @@ struct thermal_instance {
 	struct thermal_zone_device *tz;
 	struct thermal_cooling_device *cdev;
 	int trip;
+	bool initialized;
 	unsigned long upper;	/* Highest cooling state for this trip point */
 	unsigned long lower;	/* Lowest cooling state for this trip point */
 	unsigned long target;	/* expected cooling state */
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 157d366..5bcabc7 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -43,6 +43,9 @@
 /* Default weight of a bound cooling device */
 #define THERMAL_WEIGHT_DEFAULT 0
 
+/* use value, which < 0K, to indicate an invalid/uninitialized temperature */
+#define THERMAL_TEMP_INVALID	-274000
+
 /* Unit conversion macros */
 #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
 				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/3][v3] Thermal: handle thermal zone device properly during system sleep
  2015-10-30  8:31 [PATCH 0/3][v3] Fix thermal problems during suspend/bootup Chen Yu
@ 2015-10-30  8:31   ` Chen Yu
  2015-10-30  8:31   ` Chen Yu
  2015-10-30  8:32   ` Chen Yu
  2 siblings, 0 replies; 13+ messages in thread
From: Chen Yu @ 2015-10-30  8:31 UTC (permalink / raw)
  To: rui.zhang, edubezval; +Cc: javi.merino, linux-pm, linux-kernel, stable, Chen Yu

From: Zhang Rui <rui.zhang@intel.com>

Current thermal code does not handle system sleep well because
1. the cooling device cooling state may be changed during suspend
2. the previous temperature reading becomes invalid after resumed because
   it is got before system sleep
3. updating thermal zone device during suspending/resuming
   is wrong because some devices may have already been suspended
   or may have not been resumed.

Thus, the proper way to do this is to cancel all thermal zone
device update requirements during suspend/resume, and after all
the devices have been resumed, reset and update every registered
thermal zone devices.

This also fixes a regression introduced by:
Commit 19593a1fb1f6 ("ACPI / fan: convert to platform driver")
Because, with above commit applied, all the fan devices are attached
to the acpi_general_pm_domain, and they are turned on by the pm_domain
automatically after resume, without the awareness of thermal core.

CC: <stable@vger.kernel.org> #3.18+
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=78201
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=91411
Tested-by: Manuel Krause <manuelkrause@netscape.net>
Tested-by: szegad <szegadlo@poczta.onet.pl>
Tested-by: prash <prash.n.rao@gmail.com>
Tested-by: amish <ammdispose-arch@yahoo.com>
Tested-by: Matthias <morpheusxyz123@yahoo.de>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
---
 drivers/thermal/thermal_core.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 682bc1e..9aae767 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -37,6 +37,7 @@
 #include <linux/of.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
+#include <linux/suspend.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/thermal.h>
@@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list);
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
 
+static atomic_t in_suspend;
+
 static struct thermal_governor *def_governor;
 
 static struct thermal_governor *__find_governor(const char *name)
@@ -554,6 +557,9 @@ void thermal_zone_device_update(struct thermal_zone_device *tz)
 {
 	int count;
 
+	if (atomic_read(&in_suspend))
+		return;
+
 	if (!tz->ops->get_temp)
 		return;
 
@@ -2155,6 +2161,36 @@ static void thermal_unregister_governors(void)
 	thermal_gov_power_allocator_unregister();
 }
 
+static int thermal_pm_notify(struct notifier_block *nb,
+				unsigned long mode, void *_unused)
+{
+	struct thermal_zone_device *tz;
+
+	switch (mode) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_RESTORE_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		atomic_set(&in_suspend, 1);
+		break;
+	case PM_POST_HIBERNATION:
+	case PM_POST_RESTORE:
+	case PM_POST_SUSPEND:
+		atomic_set(&in_suspend, 0);
+		list_for_each_entry(tz, &thermal_tz_list, node) {
+			thermal_zone_device_reset(tz);
+			thermal_zone_device_update(tz);
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static struct notifier_block thermal_pm_nb = {
+	.notifier_call = thermal_pm_notify,
+};
+
 static int __init thermal_init(void)
 {
 	int result;
@@ -2175,6 +2211,11 @@ static int __init thermal_init(void)
 	if (result)
 		goto exit_netlink;
 
+	result = register_pm_notifier(&thermal_pm_nb);
+	if (result)
+		pr_warn("Thermal: Can not register suspend notifier, return %d\n",
+			result);
+
 	return 0;
 
 exit_netlink:
@@ -2194,6 +2235,7 @@ error:
 
 static void __exit thermal_exit(void)
 {
+	unregister_pm_notifier(&thermal_pm_nb);
 	of_thermal_destroy_zones();
 	genetlink_exit();
 	class_unregister(&thermal_class);
-- 
1.8.4.2


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/3][v3] Thermal: handle thermal zone device properly during system sleep
@ 2015-10-30  8:31   ` Chen Yu
  0 siblings, 0 replies; 13+ messages in thread
From: Chen Yu @ 2015-10-30  8:31 UTC (permalink / raw)
  To: rui.zhang, edubezval; +Cc: javi.merino, linux-pm, linux-kernel, stable, Chen Yu

From: Zhang Rui <rui.zhang@intel.com>

Current thermal code does not handle system sleep well because
1. the cooling device cooling state may be changed during suspend
2. the previous temperature reading becomes invalid after resumed because
   it is got before system sleep
3. updating thermal zone device during suspending/resuming
   is wrong because some devices may have already been suspended
   or may have not been resumed.

Thus, the proper way to do this is to cancel all thermal zone
device update requirements during suspend/resume, and after all
the devices have been resumed, reset and update every registered
thermal zone devices.

This also fixes a regression introduced by:
Commit 19593a1fb1f6 ("ACPI / fan: convert to platform driver")
Because, with above commit applied, all the fan devices are attached
to the acpi_general_pm_domain, and they are turned on by the pm_domain
automatically after resume, without the awareness of thermal core.

CC: <stable@vger.kernel.org> #3.18+
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=78201
Reference: https://bugzilla.kernel.org/show_bug.cgi?id=91411
Tested-by: Manuel Krause <manuelkrause@netscape.net>
Tested-by: szegad <szegadlo@poczta.onet.pl>
Tested-by: prash <prash.n.rao@gmail.com>
Tested-by: amish <ammdispose-arch@yahoo.com>
Tested-by: Matthias <morpheusxyz123@yahoo.de>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
---
 drivers/thermal/thermal_core.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 682bc1e..9aae767 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -37,6 +37,7 @@
 #include <linux/of.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
+#include <linux/suspend.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/thermal.h>
@@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list);
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
 
+static atomic_t in_suspend;
+
 static struct thermal_governor *def_governor;
 
 static struct thermal_governor *__find_governor(const char *name)
@@ -554,6 +557,9 @@ void thermal_zone_device_update(struct thermal_zone_device *tz)
 {
 	int count;
 
+	if (atomic_read(&in_suspend))
+		return;
+
 	if (!tz->ops->get_temp)
 		return;
 
@@ -2155,6 +2161,36 @@ static void thermal_unregister_governors(void)
 	thermal_gov_power_allocator_unregister();
 }
 
+static int thermal_pm_notify(struct notifier_block *nb,
+				unsigned long mode, void *_unused)
+{
+	struct thermal_zone_device *tz;
+
+	switch (mode) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_RESTORE_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		atomic_set(&in_suspend, 1);
+		break;
+	case PM_POST_HIBERNATION:
+	case PM_POST_RESTORE:
+	case PM_POST_SUSPEND:
+		atomic_set(&in_suspend, 0);
+		list_for_each_entry(tz, &thermal_tz_list, node) {
+			thermal_zone_device_reset(tz);
+			thermal_zone_device_update(tz);
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static struct notifier_block thermal_pm_nb = {
+	.notifier_call = thermal_pm_notify,
+};
+
 static int __init thermal_init(void)
 {
 	int result;
@@ -2175,6 +2211,11 @@ static int __init thermal_init(void)
 	if (result)
 		goto exit_netlink;
 
+	result = register_pm_notifier(&thermal_pm_nb);
+	if (result)
+		pr_warn("Thermal: Can not register suspend notifier, return %d\n",
+			result);
+
 	return 0;
 
 exit_netlink:
@@ -2194,6 +2235,7 @@ error:
 
 static void __exit thermal_exit(void)
 {
+	unregister_pm_notifier(&thermal_pm_nb);
 	of_thermal_destroy_zones();
 	genetlink_exit();
 	class_unregister(&thermal_class);
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/3][v3] Thermal: do thermal zone update after a cooling device registered
  2015-10-30  8:31 [PATCH 0/3][v3] Fix thermal problems during suspend/bootup Chen Yu
@ 2015-10-30  8:32   ` Chen Yu
  2015-10-30  8:31   ` Chen Yu
  2015-10-30  8:32   ` Chen Yu
  2 siblings, 0 replies; 13+ messages in thread
From: Chen Yu @ 2015-10-30  8:32 UTC (permalink / raw)
  To: rui.zhang, edubezval; +Cc: javi.merino, linux-pm, linux-kernel, Chen Yu, stable

When a new cooling device is registered, we need to update the
thermal zone to set the new registered cooling device to a proper
state.

This fixes a problem that the system is cool, while the fan devices
are left running on full speed after boot, if fan device is registered
after thermal zone device.

Here is the history of why current patch looks like this:
https://patchwork.kernel.org/patch/7273041/

CC: <stable@vger.kernel.org> #3.18+
Reference:https://bugzilla.kernel.org/show_bug.cgi?id=92431
Tested-by: Manuel Krause <manuelkrause@netscape.net>
Tested-by: szegad <szegadlo@poczta.onet.pl>
Tested-by: prash <prash.n.rao@gmail.com>
Tested-by: amish <ammdispose-arch@yahoo.com>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
---
 drivers/thermal/thermal_core.c | 14 +++++++++++++-
 include/linux/thermal.h        |  2 ++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 9aae767..ba08b55 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1341,6 +1341,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	if (!result) {
 		list_add_tail(&dev->tz_node, &tz->thermal_instances);
 		list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
+		atomic_set(&tz->need_update, 1);
 	}
 	mutex_unlock(&cdev->lock);
 	mutex_unlock(&tz->lock);
@@ -1450,6 +1451,7 @@ __thermal_cooling_device_register(struct device_node *np,
 				  const struct thermal_cooling_device_ops *ops)
 {
 	struct thermal_cooling_device *cdev;
+	struct thermal_zone_device *pos = NULL;
 	int result;
 
 	if (type && strlen(type) >= THERMAL_NAME_LENGTH)
@@ -1494,6 +1496,12 @@ __thermal_cooling_device_register(struct device_node *np,
 	/* Update binding information for 'this' new cdev */
 	bind_cdev(cdev);
 
+	mutex_lock(&thermal_list_lock);
+	list_for_each_entry(pos, &thermal_tz_list, node)
+		if (atomic_cmpxchg(&pos->need_update, 1, 0))
+			thermal_zone_device_update(pos);
+	mutex_unlock(&thermal_list_lock);
+
 	return cdev;
 }
 
@@ -1826,6 +1834,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	tz->trips = trips;
 	tz->passive_delay = passive_delay;
 	tz->polling_delay = polling_delay;
+	/* A new thermal zone needs to be updated anyway. */
+	atomic_set(&tz->need_update, 1);
 
 	dev_set_name(&tz->device, "thermal_zone%d", tz->id);
 	result = device_register(&tz->device);
@@ -1921,7 +1931,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
 
 	thermal_zone_device_reset(tz);
-	thermal_zone_device_update(tz);
+	/* Update the new thermal zone and mark it as already updated. */
+	if (atomic_cmpxchg(&tz->need_update, 1, 0))
+		thermal_zone_device_update(tz);
 
 	return tz;
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 5bcabc7..385d411 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -168,6 +168,7 @@ struct thermal_attr {
  * @forced_passive:	If > 0, temperature at which to switch on all ACPI
  *			processor cooling devices.  Currently only used by the
  *			step-wise governor.
+ * @need_update:	if equals 1, thermal_zone_device_update needs to be invoked.
  * @ops:	operations this &thermal_zone_device supports
  * @tzp:	thermal zone parameters
  * @governor:	pointer to the governor for this thermal zone
@@ -195,6 +196,7 @@ struct thermal_zone_device {
 	int emul_temperature;
 	int passive;
 	unsigned int forced_passive;
+	atomic_t need_update;
 	struct thermal_zone_device_ops *ops;
 	struct thermal_zone_params *tzp;
 	struct thermal_governor *governor;
-- 
1.8.4.2


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/3][v3] Thermal: do thermal zone update after a cooling device registered
@ 2015-10-30  8:32   ` Chen Yu
  0 siblings, 0 replies; 13+ messages in thread
From: Chen Yu @ 2015-10-30  8:32 UTC (permalink / raw)
  To: rui.zhang, edubezval; +Cc: javi.merino, linux-pm, linux-kernel, Chen Yu, stable

When a new cooling device is registered, we need to update the
thermal zone to set the new registered cooling device to a proper
state.

This fixes a problem that the system is cool, while the fan devices
are left running on full speed after boot, if fan device is registered
after thermal zone device.

Here is the history of why current patch looks like this:
https://patchwork.kernel.org/patch/7273041/

CC: <stable@vger.kernel.org> #3.18+
Reference:https://bugzilla.kernel.org/show_bug.cgi?id=92431
Tested-by: Manuel Krause <manuelkrause@netscape.net>
Tested-by: szegad <szegadlo@poczta.onet.pl>
Tested-by: prash <prash.n.rao@gmail.com>
Tested-by: amish <ammdispose-arch@yahoo.com>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
---
 drivers/thermal/thermal_core.c | 14 +++++++++++++-
 include/linux/thermal.h        |  2 ++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 9aae767..ba08b55 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1341,6 +1341,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	if (!result) {
 		list_add_tail(&dev->tz_node, &tz->thermal_instances);
 		list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
+		atomic_set(&tz->need_update, 1);
 	}
 	mutex_unlock(&cdev->lock);
 	mutex_unlock(&tz->lock);
@@ -1450,6 +1451,7 @@ __thermal_cooling_device_register(struct device_node *np,
 				  const struct thermal_cooling_device_ops *ops)
 {
 	struct thermal_cooling_device *cdev;
+	struct thermal_zone_device *pos = NULL;
 	int result;
 
 	if (type && strlen(type) >= THERMAL_NAME_LENGTH)
@@ -1494,6 +1496,12 @@ __thermal_cooling_device_register(struct device_node *np,
 	/* Update binding information for 'this' new cdev */
 	bind_cdev(cdev);
 
+	mutex_lock(&thermal_list_lock);
+	list_for_each_entry(pos, &thermal_tz_list, node)
+		if (atomic_cmpxchg(&pos->need_update, 1, 0))
+			thermal_zone_device_update(pos);
+	mutex_unlock(&thermal_list_lock);
+
 	return cdev;
 }
 
@@ -1826,6 +1834,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	tz->trips = trips;
 	tz->passive_delay = passive_delay;
 	tz->polling_delay = polling_delay;
+	/* A new thermal zone needs to be updated anyway. */
+	atomic_set(&tz->need_update, 1);
 
 	dev_set_name(&tz->device, "thermal_zone%d", tz->id);
 	result = device_register(&tz->device);
@@ -1921,7 +1931,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
 
 	thermal_zone_device_reset(tz);
-	thermal_zone_device_update(tz);
+	/* Update the new thermal zone and mark it as already updated. */
+	if (atomic_cmpxchg(&tz->need_update, 1, 0))
+		thermal_zone_device_update(tz);
 
 	return tz;
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 5bcabc7..385d411 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -168,6 +168,7 @@ struct thermal_attr {
  * @forced_passive:	If > 0, temperature at which to switch on all ACPI
  *			processor cooling devices.  Currently only used by the
  *			step-wise governor.
+ * @need_update:	if equals 1, thermal_zone_device_update needs to be invoked.
  * @ops:	operations this &thermal_zone_device supports
  * @tzp:	thermal zone parameters
  * @governor:	pointer to the governor for this thermal zone
@@ -195,6 +196,7 @@ struct thermal_zone_device {
 	int emul_temperature;
 	int passive;
 	unsigned int forced_passive;
+	atomic_t need_update;
 	struct thermal_zone_device_ops *ops;
 	struct thermal_zone_params *tzp;
 	struct thermal_governor *governor;
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3][v3] Thermal: initialize thermal zone device correctly
  2015-10-30  8:31   ` Chen Yu
  (?)
@ 2015-12-31 18:43   ` Eduardo Valentin
  2016-01-12  6:42     ` Chen, Yu C
  -1 siblings, 1 reply; 13+ messages in thread
From: Eduardo Valentin @ 2015-12-31 18:43 UTC (permalink / raw)
  To: Chen Yu; +Cc: rui.zhang, javi.merino, linux-pm, linux-kernel, stable

For some reason, I thought Rui had picked this already. 

Anyways, here are a couple of comments:

On Fri, Oct 30, 2015 at 04:31:47PM +0800, Chen Yu wrote:
> From: Zhang Rui <rui.zhang@intel.com>
> 
> After thermal zone device registered, as we have not read any
> temperature before, thus tz->temperature should not be 0,
> which actually means 0C, and thermal trend is not available.
> In this case, we need specially handling for the first
> thermal_zone_device_update().
> 
> Both thermal core framework and step_wise governor is
> enhanced to handle this. And since the step_wise governor
> is the only one that uses trends, so it's the only thermal
> governor that needs to be updated.
> 
> CC: <stable@vger.kernel.org> #3.18+
> Tested-by: Manuel Krause <manuelkrause@netscape.net>
> Tested-by: szegad <szegadlo@poczta.onet.pl>
> Tested-by: prash <prash.n.rao@gmail.com>
> Tested-by: amish <ammdispose-arch@yahoo.com>
> Tested-by: Matthias <morpheusxyz123@yahoo.de>
> Reviewed-by: Javi Merino <javi.merino@arm.com>
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> Signed-off-by: Chen Yu <yu.c.chen@intel.com>
> ---
>  drivers/thermal/step_wise.c    | 17 +++++++++++++++--
>  drivers/thermal/thermal_core.c | 19 +++++++++++++++++--
>  drivers/thermal/thermal_core.h |  1 +

I would prefer if you could split this patch in two. One for thermal
core another one for step wise.

>  include/linux/thermal.h        |  3 +++
>  4 files changed, 36 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
> index 2f9f708..ea9366a 100644
> --- a/drivers/thermal/step_wise.c
> +++ b/drivers/thermal/step_wise.c
> @@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance,
>  	next_target = instance->target;
>  	dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
>  
> +	if (!instance->initialized) {
> +		if (throttle) {
> +			next_target = (cur_state + 1) >= instance->upper ?
> +					instance->upper :
> +					((cur_state + 1) < instance->lower ?
> +					instance->lower : (cur_state + 1));
> +		} else {
> +			next_target = THERMAL_NO_TARGET;
> +		}
> +
> +		return next_target;
> +	}
> +
>  	switch (trend) {
>  	case THERMAL_TREND_RAISING:
>  		if (throttle) {
> @@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
>  		dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
>  					old_target, (int)instance->target);
>  
> -		if (old_target == instance->target)
> +		if (instance->initialized && old_target == instance->target)
>  			continue;
>  
>  		/* Activate a passive thermal instance */
> @@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
>  			instance->target == THERMAL_NO_TARGET)
>  			update_passive_instance(tz, trip_type, -1);
>  
> -
> +		instance->initialized = true;
>  		instance->cdev->updated = false; /* cdev needs update */
>  	}
>  

Considering that I understood the problem and your proposal well, I
would say these changes on step wise are the perfect case for setting up
a step_wise.bind_to_tz(). bind_to_tz() is already designed as an
opportunity for governor to check the thermal zone status at the time of
binding. Remember that moving to bind_to_tz() covers not only
registration time, but governor switching too (say, user chooses
user_space, then step_wise).

The above code seams to be correct, but after reviewing the code
of step_wise.throttle(), I would say it is already complicated and
deserves simplification, when possible.



> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index d9e525c..682bc1e 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -532,8 +532,22 @@ static void update_temperature(struct thermal_zone_device *tz)
>  	mutex_unlock(&tz->lock);
>  
>  	trace_thermal_temperature(tz);
> -	dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
> -				tz->last_temperature, tz->temperature);
> +	if (tz->last_temperature == THERMAL_TEMP_INVALID)
> +		dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
> +			tz->temperature);
> +	else
> +		dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
> +			tz->last_temperature, tz->temperature);
> +}
> +
> +static void thermal_zone_device_reset(struct thermal_zone_device *tz)
> +{
> +	struct thermal_instance *pos;
> +
> +	tz->temperature = THERMAL_TEMP_INVALID;
> +	tz->passive = 0;
> +	list_for_each_entry(pos, &tz->thermal_instances, tz_node)
> +		pos->initialized = false;
>  }
>  
>  void thermal_zone_device_update(struct thermal_zone_device *tz)
> @@ -1900,6 +1914,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
>  
>  	INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
>  
> +	thermal_zone_device_reset(tz);
>  	thermal_zone_device_update(tz);
>  
>  	return tz;
> diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
> index d7ac1fc..749d41a 100644
> --- a/drivers/thermal/thermal_core.h
> +++ b/drivers/thermal/thermal_core.h
> @@ -41,6 +41,7 @@ struct thermal_instance {
>  	struct thermal_zone_device *tz;
>  	struct thermal_cooling_device *cdev;
>  	int trip;
> +	bool initialized;
>  	unsigned long upper;	/* Highest cooling state for this trip point */
>  	unsigned long lower;	/* Lowest cooling state for this trip point */
>  	unsigned long target;	/* expected cooling state */
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 157d366..5bcabc7 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -43,6 +43,9 @@
>  /* Default weight of a bound cooling device */
>  #define THERMAL_WEIGHT_DEFAULT 0
>  
> +/* use value, which < 0K, to indicate an invalid/uninitialized temperature */
> +#define THERMAL_TEMP_INVALID	-274000
> +
>  /* Unit conversion macros */
>  #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
>  				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
> -- 
> 1.8.4.2
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3][v3] Thermal: handle thermal zone device properly during system sleep
  2015-10-30  8:31   ` Chen Yu
  (?)
@ 2015-12-31 18:48   ` Eduardo Valentin
  -1 siblings, 0 replies; 13+ messages in thread
From: Eduardo Valentin @ 2015-12-31 18:48 UTC (permalink / raw)
  To: Chen Yu; +Cc: rui.zhang, javi.merino, linux-pm, linux-kernel, stable

On Fri, Oct 30, 2015 at 04:31:58PM +0800, Chen Yu wrote:
> From: Zhang Rui <rui.zhang@intel.com>
> 
> Current thermal code does not handle system sleep well because
> 1. the cooling device cooling state may be changed during suspend
> 2. the previous temperature reading becomes invalid after resumed because
>    it is got before system sleep
> 3. updating thermal zone device during suspending/resuming
>    is wrong because some devices may have already been suspended
>    or may have not been resumed.
> 
> Thus, the proper way to do this is to cancel all thermal zone
> device update requirements during suspend/resume, and after all
> the devices have been resumed, reset and update every registered
> thermal zone devices.
> 
> This also fixes a regression introduced by:
> Commit 19593a1fb1f6 ("ACPI / fan: convert to platform driver")
> Because, with above commit applied, all the fan devices are attached
> to the acpi_general_pm_domain, and they are turned on by the pm_domain
> automatically after resume, without the awareness of thermal core.
> 
> CC: <stable@vger.kernel.org> #3.18+
> Reference: https://bugzilla.kernel.org/show_bug.cgi?id=78201
> Reference: https://bugzilla.kernel.org/show_bug.cgi?id=91411
> Tested-by: Manuel Krause <manuelkrause@netscape.net>
> Tested-by: szegad <szegadlo@poczta.onet.pl>
> Tested-by: prash <prash.n.rao@gmail.com>
> Tested-by: amish <ammdispose-arch@yahoo.com>
> Tested-by: Matthias <morpheusxyz123@yahoo.de>
> Reviewed-by: Javi Merino <javi.merino@arm.com>
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> Signed-off-by: Chen Yu <yu.c.chen@intel.com>

Acked-by: Eduardo Valentin <edubezval@gmail.com>


> ---
>  drivers/thermal/thermal_core.c | 42 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 42 insertions(+)
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 682bc1e..9aae767 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -37,6 +37,7 @@
>  #include <linux/of.h>
>  #include <net/netlink.h>
>  #include <net/genetlink.h>
> +#include <linux/suspend.h>
>  
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/thermal.h>
> @@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list);
>  static DEFINE_MUTEX(thermal_list_lock);
>  static DEFINE_MUTEX(thermal_governor_lock);
>  
> +static atomic_t in_suspend;
> +
>  static struct thermal_governor *def_governor;
>  
>  static struct thermal_governor *__find_governor(const char *name)
> @@ -554,6 +557,9 @@ void thermal_zone_device_update(struct thermal_zone_device *tz)
>  {
>  	int count;
>  
> +	if (atomic_read(&in_suspend))
> +		return;
> +
>  	if (!tz->ops->get_temp)
>  		return;
>  
> @@ -2155,6 +2161,36 @@ static void thermal_unregister_governors(void)
>  	thermal_gov_power_allocator_unregister();
>  }
>  
> +static int thermal_pm_notify(struct notifier_block *nb,
> +				unsigned long mode, void *_unused)
> +{
> +	struct thermal_zone_device *tz;
> +
> +	switch (mode) {
> +	case PM_HIBERNATION_PREPARE:
> +	case PM_RESTORE_PREPARE:
> +	case PM_SUSPEND_PREPARE:
> +		atomic_set(&in_suspend, 1);
> +		break;
> +	case PM_POST_HIBERNATION:
> +	case PM_POST_RESTORE:
> +	case PM_POST_SUSPEND:
> +		atomic_set(&in_suspend, 0);
> +		list_for_each_entry(tz, &thermal_tz_list, node) {
> +			thermal_zone_device_reset(tz);
> +			thermal_zone_device_update(tz);
> +		}
> +		break;
> +	default:
> +		break;
> +	}
> +	return 0;
> +}
> +
> +static struct notifier_block thermal_pm_nb = {
> +	.notifier_call = thermal_pm_notify,
> +};
> +
>  static int __init thermal_init(void)
>  {
>  	int result;
> @@ -2175,6 +2211,11 @@ static int __init thermal_init(void)
>  	if (result)
>  		goto exit_netlink;
>  
> +	result = register_pm_notifier(&thermal_pm_nb);
> +	if (result)
> +		pr_warn("Thermal: Can not register suspend notifier, return %d\n",
> +			result);
> +
>  	return 0;
>  
>  exit_netlink:
> @@ -2194,6 +2235,7 @@ error:
>  
>  static void __exit thermal_exit(void)
>  {
> +	unregister_pm_notifier(&thermal_pm_nb);
>  	of_thermal_destroy_zones();
>  	genetlink_exit();
>  	class_unregister(&thermal_class);
> -- 
> 1.8.4.2
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3][v3] Thermal: do thermal zone update after a cooling device registered
  2015-10-30  8:32   ` Chen Yu
  (?)
@ 2015-12-31 19:03   ` Eduardo Valentin
  2016-01-01 15:38     ` Chen, Yu C
  -1 siblings, 1 reply; 13+ messages in thread
From: Eduardo Valentin @ 2015-12-31 19:03 UTC (permalink / raw)
  To: Chen Yu; +Cc: rui.zhang, javi.merino, linux-pm, linux-kernel, stable

Hello Chen,

On Fri, Oct 30, 2015 at 04:32:10PM +0800, Chen Yu wrote:
> When a new cooling device is registered, we need to update the
> thermal zone to set the new registered cooling device to a proper
> state.
> 
> This fixes a problem that the system is cool, while the fan devices
> are left running on full speed after boot, if fan device is registered
> after thermal zone device.
> 
> Here is the history of why current patch looks like this:
> https://patchwork.kernel.org/patch/7273041/
> 
> CC: <stable@vger.kernel.org> #3.18+
> Reference:https://bugzilla.kernel.org/show_bug.cgi?id=92431
> Tested-by: Manuel Krause <manuelkrause@netscape.net>
> Tested-by: szegad <szegadlo@poczta.onet.pl>
> Tested-by: prash <prash.n.rao@gmail.com>
> Tested-by: amish <ammdispose-arch@yahoo.com>
> Reviewed-by: Javi Merino <javi.merino@arm.com>
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> Signed-off-by: Chen Yu <yu.c.chen@intel.com>
> ---
>  drivers/thermal/thermal_core.c | 14 +++++++++++++-
>  include/linux/thermal.h        |  2 ++
>  2 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 9aae767..ba08b55 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -1341,6 +1341,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
>  	if (!result) {
>  		list_add_tail(&dev->tz_node, &tz->thermal_instances);
>  		list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
> +		atomic_set(&tz->need_update, 1);
>  	}
>  	mutex_unlock(&cdev->lock);
>  	mutex_unlock(&tz->lock);
> @@ -1450,6 +1451,7 @@ __thermal_cooling_device_register(struct device_node *np,
>  				  const struct thermal_cooling_device_ops *ops)
>  {
>  	struct thermal_cooling_device *cdev;
> +	struct thermal_zone_device *pos = NULL;
>  	int result;
>  
>  	if (type && strlen(type) >= THERMAL_NAME_LENGTH)
> @@ -1494,6 +1496,12 @@ __thermal_cooling_device_register(struct device_node *np,
>  	/* Update binding information for 'this' new cdev */
>  	bind_cdev(cdev);
>  
> +	mutex_lock(&thermal_list_lock);
> +	list_for_each_entry(pos, &thermal_tz_list, node)
> +		if (atomic_cmpxchg(&pos->need_update, 1, 0))
> +			thermal_zone_device_update(pos);
> +	mutex_unlock(&thermal_list_lock);
> +
>  	return cdev;
>  }
>  
> @@ -1826,6 +1834,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
>  	tz->trips = trips;
>  	tz->passive_delay = passive_delay;
>  	tz->polling_delay = polling_delay;
> +	/* A new thermal zone needs to be updated anyway. */
> +	atomic_set(&tz->need_update, 1);
>  
>  	dev_set_name(&tz->device, "thermal_zone%d", tz->id);
>  	result = device_register(&tz->device);
> @@ -1921,7 +1931,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
>  	INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
>  
>  	thermal_zone_device_reset(tz);
> -	thermal_zone_device_update(tz);
> +	/* Update the new thermal zone and mark it as already updated. */
> +	if (atomic_cmpxchg(&tz->need_update, 1, 0))
> +		thermal_zone_device_update(tz);
>  
>  	return tz;
>  
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 5bcabc7..385d411 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -168,6 +168,7 @@ struct thermal_attr {
>   * @forced_passive:	If > 0, temperature at which to switch on all ACPI
>   *			processor cooling devices.  Currently only used by the
>   *			step-wise governor.
> + * @need_update:	if equals 1, thermal_zone_device_update needs to be invoked.
>   * @ops:	operations this &thermal_zone_device supports
>   * @tzp:	thermal zone parameters
>   * @governor:	pointer to the governor for this thermal zone
> @@ -195,6 +196,7 @@ struct thermal_zone_device {
>  	int emul_temperature;
>  	int passive;
>  	unsigned int forced_passive;
> +	atomic_t need_update;

The only problem I have with the above change is the fact that it does
not touch thermal_zone_device_update() in any place. Please, remember
that thermal_zone_device_update() is an exported function. That means
that anyone can actually call it. And that is what happens today. If you
git grep for it you will see that there are occurrences inside thermal
core and inside thermal drivers. And this change do not take care of
them. Are you sure you don't need to revisit all occurrences?

Shouldn't thermal_zone_device_update() unset the need_update bit, given
it has just updated the zone?

Also, what happens to the logic when external changes happens? Say, we
have writable trip points enabled, and user changes a trip point value?


BR,

>  	struct thermal_zone_device_ops *ops;
>  	struct thermal_zone_params *tzp;
>  	struct thermal_governor *governor;
> -- 
> 1.8.4.2
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH 3/3][v3] Thermal: do thermal zone update after a cooling device registered
  2015-12-31 19:03   ` Eduardo Valentin
@ 2016-01-01 15:38     ` Chen, Yu C
  0 siblings, 0 replies; 13+ messages in thread
From: Chen, Yu C @ 2016-01-01 15:38 UTC (permalink / raw)
  To: Eduardo Valentin; +Cc: Zhang, Rui, javi.merino, linux-pm, linux-kernel, stable

Hi Eduardo,
thanks for your review,

> -----Original Message-----
> From: Eduardo Valentin [mailto:edubezval@gmail.com]
> Sent: Friday, January 01, 2016 3:04 AM
> To: Chen, Yu C
> Cc: Zhang, Rui; javi.merino@arm.com; linux-pm@vger.kernel.org; linux-
> kernel@vger.kernel.org; stable@vger.kernel.org
> Subject: Re: [PATCH 3/3][v3] Thermal: do thermal zone update after a cooling
> device registered
> 
> Hello Chen,
> 
> On Fri, Oct 30, 2015 at 04:32:10PM +0800, Chen Yu wrote:
> > When a new cooling device is registered, we need to update the thermal
> > zone to set the new registered cooling device to a proper state.
> >
> > This fixes a problem that the system is cool, while the fan devices
> > are left running on full speed after boot, if fan device is registered
> > after thermal zone device.
> >
> > Here is the history of why current patch looks like this:
> > https://patchwork.kernel.org/patch/7273041/
> >
> > CC: <stable@vger.kernel.org> #3.18+
> > Reference:https://bugzilla.kernel.org/show_bug.cgi?id=92431
> > Tested-by: Manuel Krause <manuelkrause@netscape.net>
> > Tested-by: szegad <szegadlo@poczta.onet.pl>
> > Tested-by: prash <prash.n.rao@gmail.com>
> > Tested-by: amish <ammdispose-arch@yahoo.com>
> > Reviewed-by: Javi Merino <javi.merino@arm.com>
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > Signed-off-by: Chen Yu <yu.c.chen@intel.com>
> > ---
> >  drivers/thermal/thermal_core.c | 14 +++++++++++++-
> >  include/linux/thermal.h        |  2 ++
> >  2 files changed, 15 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/thermal/thermal_core.c
> > b/drivers/thermal/thermal_core.c index 9aae767..ba08b55 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -1341,6 +1341,7 @@ int thermal_zone_bind_cooling_device(struct
> thermal_zone_device *tz,
> >  	if (!result) {
> >  		list_add_tail(&dev->tz_node, &tz->thermal_instances);
> >  		list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
> > +		atomic_set(&tz->need_update, 1);
> >  	}
> >  	mutex_unlock(&cdev->lock);
> >  	mutex_unlock(&tz->lock);
> > @@ -1450,6 +1451,7 @@ __thermal_cooling_device_register(struct
> device_node *np,
> >  				  const struct thermal_cooling_device_ops
> *ops)  {
> >  	struct thermal_cooling_device *cdev;
> > +	struct thermal_zone_device *pos = NULL;
> >  	int result;
> >
> >  	if (type && strlen(type) >= THERMAL_NAME_LENGTH) @@ -1494,6
> +1496,12
> > @@ __thermal_cooling_device_register(struct device_node *np,
> >  	/* Update binding information for 'this' new cdev */
> >  	bind_cdev(cdev);
> >
> > +	mutex_lock(&thermal_list_lock);
> > +	list_for_each_entry(pos, &thermal_tz_list, node)
> > +		if (atomic_cmpxchg(&pos->need_update, 1, 0))
> > +			thermal_zone_device_update(pos);
> > +	mutex_unlock(&thermal_list_lock);
> > +
> >  	return cdev;
> >  }
> >
> > @@ -1826,6 +1834,8 @@ struct thermal_zone_device
> *thermal_zone_device_register(const char *type,
> >  	tz->trips = trips;
> >  	tz->passive_delay = passive_delay;
> >  	tz->polling_delay = polling_delay;
> > +	/* A new thermal zone needs to be updated anyway. */
> > +	atomic_set(&tz->need_update, 1);
> >
> >  	dev_set_name(&tz->device, "thermal_zone%d", tz->id);
> >  	result = device_register(&tz->device); @@ -1921,7 +1931,9 @@
> struct
> > thermal_zone_device *thermal_zone_device_register(const char *type,
> >  	INIT_DELAYED_WORK(&(tz->poll_queue),
> thermal_zone_device_check);
> >
> >  	thermal_zone_device_reset(tz);
> > -	thermal_zone_device_update(tz);
> > +	/* Update the new thermal zone and mark it as already updated. */
> > +	if (atomic_cmpxchg(&tz->need_update, 1, 0))
> > +		thermal_zone_device_update(tz);
> >
> >  	return tz;
> >
> > diff --git a/include/linux/thermal.h b/include/linux/thermal.h index
> > 5bcabc7..385d411 100644
> > --- a/include/linux/thermal.h
> > +++ b/include/linux/thermal.h
> > @@ -168,6 +168,7 @@ struct thermal_attr {
> >   * @forced_passive:	If > 0, temperature at which to switch on all ACPI
> >   *			processor cooling devices.  Currently only used by the
> >   *			step-wise governor.
> > + * @need_update:	if equals 1, thermal_zone_device_update needs to
> be invoked.
> >   * @ops:	operations this &thermal_zone_device supports
> >   * @tzp:	thermal zone parameters
> >   * @governor:	pointer to the governor for this thermal zone
> > @@ -195,6 +196,7 @@ struct thermal_zone_device {
> >  	int emul_temperature;
> >  	int passive;
> >  	unsigned int forced_passive;
> > +	atomic_t need_update;
> 
> The only problem I have with the above change is the fact that it does not
> touch thermal_zone_device_update() in any place. Please, remember that
> thermal_zone_device_update() is an exported function. That means that
> anyone can actually call it. And that is what happens today. If you git grep for
> it you will see that there are occurrences inside thermal core and inside
> thermal drivers. And this change do not take care of them. Are you sure you
> don't need to revisit all occurrences?
[Yu] Do you mean this patch should be aware of other places who have already
'updated' the thermal zone for the new device? so we don't need to
update them for the second time? Yes, the current implementation seems 
to be redundant when others have updated the zone for us, but it is to make
sure the updating for this thermal zone will be invoked at least once, if a new
cooling device is added into it. 
> 
> Shouldn't thermal_zone_device_update() unset the need_update bit, given
> it has just updated the zone?
[Yu] If   thermal_zone_device_update takes care of this flag, we might need to make
thermal_zone_device_update a atomic function first. Maybe the name of  need_update
should be renamed to new_device_added.
> 
> Also, what happens to the logic when external changes happens? Say, we
> have writable trip points enabled, and user changes a trip point value?
[Yu] If the user changes a trip point, since currently thermal_zone_device_update is
not atomic, this might bring problems when thermal_zone_device_update is executing parallelly,
 and we might need to make thermal_zone_device_update into a atomic operation in the future.
> 
> 
> BR,
> 
> >  	struct thermal_zone_device_ops *ops;
> >  	struct thermal_zone_params *tzp;
> >  	struct thermal_governor *governor;
> > --
> > 1.8.4.2
> >
thanks and happy new year,
Yu

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH 1/3][v3] Thermal: initialize thermal zone device correctly
  2015-12-31 18:43   ` Eduardo Valentin
@ 2016-01-12  6:42     ` Chen, Yu C
  2016-01-12 13:35       ` Zhang, Rui
  0 siblings, 1 reply; 13+ messages in thread
From: Chen, Yu C @ 2016-01-12  6:42 UTC (permalink / raw)
  To: Eduardo Valentin, Zhang, Rui; +Cc: javi.merino, linux-pm, linux-kernel, stable

Hi Eduardo,
Thanks for your review and sorry for that I missed your email.

> -----Original Message-----
> From: linux-pm-owner@vger.kernel.org [mailto:linux-pm-
> owner@vger.kernel.org] On Behalf Of Eduardo Valentin
> Sent: Friday, January 01, 2016 2:44 AM
> To: Chen, Yu C
> Cc: Zhang, Rui; javi.merino@arm.com; linux-pm@vger.kernel.org; linux-
> kernel@vger.kernel.org; stable@vger.kernel.org
> Subject: Re: [PATCH 1/3][v3] Thermal: initialize thermal zone device correctly
> 
> For some reason, I thought Rui had picked this already.
> 
> Anyways, here are a couple of comments:
> 
> On Fri, Oct 30, 2015 at 04:31:47PM +0800, Chen Yu wrote:
> > From: Zhang Rui <rui.zhang@intel.com>
> >
> > After thermal zone device registered, as we have not read any
> > temperature before, thus tz->temperature should not be 0, which
> > actually means 0C, and thermal trend is not available.
> > In this case, we need specially handling for the first
> > thermal_zone_device_update().
> >
> > Both thermal core framework and step_wise governor is enhanced to
> > handle this. And since the step_wise governor is the only one that
> > uses trends, so it's the only thermal governor that needs to be
> > updated.
> >
> > CC: <stable@vger.kernel.org> #3.18+
> > Tested-by: Manuel Krause <manuelkrause@netscape.net>
> > Tested-by: szegad <szegadlo@poczta.onet.pl>
> > Tested-by: prash <prash.n.rao@gmail.com>
> > Tested-by: amish <ammdispose-arch@yahoo.com>
> > Tested-by: Matthias <morpheusxyz123@yahoo.de>
> > Reviewed-by: Javi Merino <javi.merino@arm.com>
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > Signed-off-by: Chen Yu <yu.c.chen@intel.com>
> > ---
> >  drivers/thermal/step_wise.c    | 17 +++++++++++++++--
> >  drivers/thermal/thermal_core.c | 19 +++++++++++++++++--
> > drivers/thermal/thermal_core.h |  1 +
> 
> I would prefer if you could split this patch in two. One for thermal core
> another one for step wise.
[Yu]  It would be better if we can split this patch into two, for stable material.
What do you think, Rui?
> 
> >  include/linux/thermal.h        |  3 +++
> >  4 files changed, 36 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
> > index 2f9f708..ea9366a 100644
> > --- a/drivers/thermal/step_wise.c
> > +++ b/drivers/thermal/step_wise.c
> > @@ -63,6 +63,19 @@ static unsigned long get_target_state(struct
> thermal_instance *instance,
> >  	next_target = instance->target;
> >  	dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
> >
> > +	if (!instance->initialized) {
> > +		if (throttle) {
> > +			next_target = (cur_state + 1) >= instance->upper ?
> > +					instance->upper :
> > +					((cur_state + 1) < instance->lower ?
> > +					instance->lower : (cur_state + 1));
> > +		} else {
> > +			next_target = THERMAL_NO_TARGET;
> > +		}
> > +
> > +		return next_target;
> > +	}
> > +
> >  	switch (trend) {
> >  	case THERMAL_TREND_RAISING:
> >  		if (throttle) {
> > @@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct
> thermal_zone_device *tz, int trip)
> >  		dev_dbg(&instance->cdev->device, "old_target=%d,
> target=%d\n",
> >  					old_target, (int)instance->target);
> >
> > -		if (old_target == instance->target)
> > +		if (instance->initialized && old_target == instance->target)
> >  			continue;
> >
> >  		/* Activate a passive thermal instance */ @@ -161,7 +174,7
> @@
> > static void thermal_zone_trip_update(struct thermal_zone_device *tz, int
> trip)
> >  			instance->target == THERMAL_NO_TARGET)
> >  			update_passive_instance(tz, trip_type, -1);
> >
> > -
> > +		instance->initialized = true;
> >  		instance->cdev->updated = false; /* cdev needs update */
> >  	}
> >
> 
> Considering that I understood the problem and your proposal well, I would
> say these changes on step wise are the perfect case for setting up a
> step_wise.bind_to_tz(). bind_to_tz() is already designed as an opportunity
> for governor to check the thermal zone status at the time of binding.
> Remember that moving to bind_to_tz() covers not only registration time, but
> governor switching too (say, user chooses user_space, then step_wise).

[Yu] The  code change in step_wise. get_target_state is mainly for
suspend/resume scenario, which  is not involved with thermal zone/governor bindings IMO.
> 
> The above code seams to be correct, but after reviewing the code of
> step_wise.throttle(), I would say it is already complicated and deserves
> simplification, when possible.
> 
> 
> 
> > diff --git a/drivers/thermal/thermal_core.c
> > b/drivers/thermal/thermal_core.c index d9e525c..682bc1e 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -532,8 +532,22 @@ static void update_temperature(struct
> thermal_zone_device *tz)
> >  	mutex_unlock(&tz->lock);
> >
> >  	trace_thermal_temperature(tz);
> > -	dev_dbg(&tz->device, "last_temperature=%d,
> current_temperature=%d\n",
> > -				tz->last_temperature, tz->temperature);
> > +	if (tz->last_temperature == THERMAL_TEMP_INVALID)
> > +		dev_dbg(&tz->device, "last_temperature N/A,
> current_temperature=%d\n",
> > +			tz->temperature);
> > +	else
> > +		dev_dbg(&tz->device, "last_temperature=%d,
> current_temperature=%d\n",
> > +			tz->last_temperature, tz->temperature); }
> > +
> > +static void thermal_zone_device_reset(struct thermal_zone_device *tz)
> > +{
> > +	struct thermal_instance *pos;
> > +
> > +	tz->temperature = THERMAL_TEMP_INVALID;
> > +	tz->passive = 0;
> > +	list_for_each_entry(pos, &tz->thermal_instances, tz_node)
> > +		pos->initialized = false;
> >  }
> >
> >  void thermal_zone_device_update(struct thermal_zone_device *tz) @@
> > -1900,6 +1914,7 @@ struct thermal_zone_device
> > *thermal_zone_device_register(const char *type,
> >
> >  	INIT_DELAYED_WORK(&(tz->poll_queue),
> thermal_zone_device_check);
> >
> > +	thermal_zone_device_reset(tz);
> >  	thermal_zone_device_update(tz);
> >
> >  	return tz;
> > diff --git a/drivers/thermal/thermal_core.h
> > b/drivers/thermal/thermal_core.h index d7ac1fc..749d41a 100644
> > --- a/drivers/thermal/thermal_core.h
> > +++ b/drivers/thermal/thermal_core.h
> > @@ -41,6 +41,7 @@ struct thermal_instance {
> >  	struct thermal_zone_device *tz;
> >  	struct thermal_cooling_device *cdev;
> >  	int trip;
> > +	bool initialized;
> >  	unsigned long upper;	/* Highest cooling state for this trip point */
> >  	unsigned long lower;	/* Lowest cooling state for this trip point */
> >  	unsigned long target;	/* expected cooling state */
> > diff --git a/include/linux/thermal.h b/include/linux/thermal.h index
> > 157d366..5bcabc7 100644
> > --- a/include/linux/thermal.h
> > +++ b/include/linux/thermal.h
> > @@ -43,6 +43,9 @@
> >  /* Default weight of a bound cooling device */  #define
> > THERMAL_WEIGHT_DEFAULT 0
> >
> > +/* use value, which < 0K, to indicate an invalid/uninitialized temperature
> */
> > +#define THERMAL_TEMP_INVALID	-274000
> > +
> >  /* Unit conversion macros */
> >  #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
> >  				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
> > --
> > 1.8.4.2

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH 1/3][v3] Thermal: initialize thermal zone device correctly
  2016-01-12  6:42     ` Chen, Yu C
@ 2016-01-12 13:35       ` Zhang, Rui
  0 siblings, 0 replies; 13+ messages in thread
From: Zhang, Rui @ 2016-01-12 13:35 UTC (permalink / raw)
  To: Chen, Yu C, Eduardo Valentin; +Cc: javi.merino, linux-pm, linux-kernel, stable



> -----Original Message-----
> From: Chen, Yu C
> Sent: Tuesday, January 12, 2016 2:42 PM
> To: Eduardo Valentin <edubezval@gmail.com>; Zhang, Rui
> <rui.zhang@intel.com>
> Cc: javi.merino@arm.com; linux-pm@vger.kernel.org; linux-
> kernel@vger.kernel.org; stable@vger.kernel.org
> Subject: RE: [PATCH 1/3][v3] Thermal: initialize thermal zone device correctly
> Importance: High
> 
> Hi Eduardo,
> Thanks for your review and sorry for that I missed your email.
> 
> > -----Original Message-----
> > From: linux-pm-owner@vger.kernel.org [mailto:linux-pm-
> > owner@vger.kernel.org] On Behalf Of Eduardo Valentin
> > Sent: Friday, January 01, 2016 2:44 AM
> > To: Chen, Yu C
> > Cc: Zhang, Rui; javi.merino@arm.com; linux-pm@vger.kernel.org; linux-
> > kernel@vger.kernel.org; stable@vger.kernel.org
> > Subject: Re: [PATCH 1/3][v3] Thermal: initialize thermal zone device
> > correctly
> >
> > For some reason, I thought Rui had picked this already.
> >
> > Anyways, here are a couple of comments:
> >
> > On Fri, Oct 30, 2015 at 04:31:47PM +0800, Chen Yu wrote:
> > > From: Zhang Rui <rui.zhang@intel.com>
> > >
> > > After thermal zone device registered, as we have not read any
> > > temperature before, thus tz->temperature should not be 0, which
> > > actually means 0C, and thermal trend is not available.
> > > In this case, we need specially handling for the first
> > > thermal_zone_device_update().
> > >
> > > Both thermal core framework and step_wise governor is enhanced to
> > > handle this. And since the step_wise governor is the only one that
> > > uses trends, so it's the only thermal governor that needs to be
> > > updated.
> > >
> > > CC: <stable@vger.kernel.org> #3.18+
> > > Tested-by: Manuel Krause <manuelkrause@netscape.net>
> > > Tested-by: szegad <szegadlo@poczta.onet.pl>
> > > Tested-by: prash <prash.n.rao@gmail.com>
> > > Tested-by: amish <ammdispose-arch@yahoo.com>
> > > Tested-by: Matthias <morpheusxyz123@yahoo.de>
> > > Reviewed-by: Javi Merino <javi.merino@arm.com>
> > > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > > Signed-off-by: Chen Yu <yu.c.chen@intel.com>
> > > ---
> > >  drivers/thermal/step_wise.c    | 17 +++++++++++++++--
> > >  drivers/thermal/thermal_core.c | 19 +++++++++++++++++--
> > > drivers/thermal/thermal_core.h |  1 +
> >
> > I would prefer if you could split this patch in two. One for thermal
> > core another one for step wise.
> [Yu]  It would be better if we can split this patch into two, for stable material.
> What do you think, Rui?

[Zhang, Rui] No, first of all, we cannot take one patch for upstream and then split it into two for stable, second, I'd say I'm okay if the code is organized in two patches as Eduardo described, but at the same time, I don't think it's a problem if it's sent within one patch.

> >
> > >  include/linux/thermal.h        |  3 +++
> > >  4 files changed, 36 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/thermal/step_wise.c
> > > b/drivers/thermal/step_wise.c index 2f9f708..ea9366a 100644
> > > --- a/drivers/thermal/step_wise.c
> > > +++ b/drivers/thermal/step_wise.c
> > > @@ -63,6 +63,19 @@ static unsigned long get_target_state(struct
> > thermal_instance *instance,
> > >  	next_target = instance->target;
> > >  	dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
> > >
> > > +	if (!instance->initialized) {
> > > +		if (throttle) {
> > > +			next_target = (cur_state + 1) >= instance->upper ?
> > > +					instance->upper :
> > > +					((cur_state + 1) < instance->lower ?
> > > +					instance->lower : (cur_state + 1));
> > > +		} else {
> > > +			next_target = THERMAL_NO_TARGET;
> > > +		}
> > > +
> > > +		return next_target;
> > > +	}
> > > +
> > >  	switch (trend) {
> > >  	case THERMAL_TREND_RAISING:
> > >  		if (throttle) {
> > > @@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct
> > thermal_zone_device *tz, int trip)
> > >  		dev_dbg(&instance->cdev->device, "old_target=%d,
> > target=%d\n",
> > >  					old_target, (int)instance->target);
> > >
> > > -		if (old_target == instance->target)
> > > +		if (instance->initialized && old_target == instance->target)
> > >  			continue;
> > >
> > >  		/* Activate a passive thermal instance */ @@ -161,7 +174,7
> > @@
> > > static void thermal_zone_trip_update(struct thermal_zone_device *tz,
> > > int
> > trip)
> > >  			instance->target == THERMAL_NO_TARGET)
> > >  			update_passive_instance(tz, trip_type, -1);
> > >
> > > -
> > > +		instance->initialized = true;
> > >  		instance->cdev->updated = false; /* cdev needs update */
> > >  	}
> > >
> >
> > Considering that I understood the problem and your proposal well, I
> > would say these changes on step wise are the perfect case for setting
> > up a step_wise.bind_to_tz(). bind_to_tz() is already designed as an
> > opportunity for governor to check the thermal zone status at the time of
> binding.
> > Remember that moving to bind_to_tz() covers not only registration
> > time, but governor switching too (say, user chooses user_space, then
> step_wise).
> 
> [Yu] The  code change in step_wise. get_target_state is mainly for
> suspend/resume scenario, which  is not involved with thermal
> zone/governor bindings IMO.

Agreed.

Thanks,
rui
> >
> > The above code seams to be correct, but after reviewing the code of
> > step_wise.throttle(), I would say it is already complicated and
> > deserves simplification, when possible.
> >
> >
> >
> > > diff --git a/drivers/thermal/thermal_core.c
> > > b/drivers/thermal/thermal_core.c index d9e525c..682bc1e 100644
> > > --- a/drivers/thermal/thermal_core.c
> > > +++ b/drivers/thermal/thermal_core.c
> > > @@ -532,8 +532,22 @@ static void update_temperature(struct
> > thermal_zone_device *tz)
> > >  	mutex_unlock(&tz->lock);
> > >
> > >  	trace_thermal_temperature(tz);
> > > -	dev_dbg(&tz->device, "last_temperature=%d,
> > current_temperature=%d\n",
> > > -				tz->last_temperature, tz->temperature);
> > > +	if (tz->last_temperature == THERMAL_TEMP_INVALID)
> > > +		dev_dbg(&tz->device, "last_temperature N/A,
> > current_temperature=%d\n",
> > > +			tz->temperature);
> > > +	else
> > > +		dev_dbg(&tz->device, "last_temperature=%d,
> > current_temperature=%d\n",
> > > +			tz->last_temperature, tz->temperature); }
> > > +
> > > +static void thermal_zone_device_reset(struct thermal_zone_device
> > > +*tz) {
> > > +	struct thermal_instance *pos;
> > > +
> > > +	tz->temperature = THERMAL_TEMP_INVALID;
> > > +	tz->passive = 0;
> > > +	list_for_each_entry(pos, &tz->thermal_instances, tz_node)
> > > +		pos->initialized = false;
> > >  }
> > >
> > >  void thermal_zone_device_update(struct thermal_zone_device *tz)
> @@
> > > -1900,6 +1914,7 @@ struct thermal_zone_device
> > > *thermal_zone_device_register(const char *type,
> > >
> > >  	INIT_DELAYED_WORK(&(tz->poll_queue),
> > thermal_zone_device_check);
> > >
> > > +	thermal_zone_device_reset(tz);
> > >  	thermal_zone_device_update(tz);
> > >
> > >  	return tz;
> > > diff --git a/drivers/thermal/thermal_core.h
> > > b/drivers/thermal/thermal_core.h index d7ac1fc..749d41a 100644
> > > --- a/drivers/thermal/thermal_core.h
> > > +++ b/drivers/thermal/thermal_core.h
> > > @@ -41,6 +41,7 @@ struct thermal_instance {
> > >  	struct thermal_zone_device *tz;
> > >  	struct thermal_cooling_device *cdev;
> > >  	int trip;
> > > +	bool initialized;
> > >  	unsigned long upper;	/* Highest cooling state for this trip point */
> > >  	unsigned long lower;	/* Lowest cooling state for this trip point */
> > >  	unsigned long target;	/* expected cooling state */
> > > diff --git a/include/linux/thermal.h b/include/linux/thermal.h index
> > > 157d366..5bcabc7 100644
> > > --- a/include/linux/thermal.h
> > > +++ b/include/linux/thermal.h
> > > @@ -43,6 +43,9 @@
> > >  /* Default weight of a bound cooling device */  #define
> > > THERMAL_WEIGHT_DEFAULT 0
> > >
> > > +/* use value, which < 0K, to indicate an invalid/uninitialized
> > > +temperature
> > */
> > > +#define THERMAL_TEMP_INVALID	-274000
> > > +
> > >  /* Unit conversion macros */
> > >  #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
> > >  				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
> > > --
> > > 1.8.4.2

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-01-12 13:35 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-30  8:31 [PATCH 0/3][v3] Fix thermal problems during suspend/bootup Chen Yu
2015-10-30  8:31 ` [PATCH 1/3][v3] Thermal: initialize thermal zone device correctly Chen Yu
2015-10-30  8:31   ` Chen Yu
2015-12-31 18:43   ` Eduardo Valentin
2016-01-12  6:42     ` Chen, Yu C
2016-01-12 13:35       ` Zhang, Rui
2015-10-30  8:31 ` [PATCH 2/3][v3] Thermal: handle thermal zone device properly during system sleep Chen Yu
2015-10-30  8:31   ` Chen Yu
2015-12-31 18:48   ` Eduardo Valentin
2015-10-30  8:32 ` [PATCH 3/3][v3] Thermal: do thermal zone update after a cooling device registered Chen Yu
2015-10-30  8:32   ` Chen Yu
2015-12-31 19:03   ` Eduardo Valentin
2016-01-01 15:38     ` Chen, Yu C

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.