linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eduardo Valentin <evalenti@kernel.org>
To: eduval@amazon.com, linux-pm@vger.kernel.org
Cc: "Rafael J. Wysocki" <rafael@kernel.org>,
	Daniel Lezcano <daniel.lezcano@linaro.org>,
	Amit Kucheria <amitk@kernel.org>, Zhang Rui <rui.zhang@intel.com>,
	Jonathan Corbet <corbet@lwn.net>,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 7/7] thermal: stats: add error accounting to thermal zone
Date: Thu, 18 May 2023 20:27:19 -0700	[thread overview]
Message-ID: <20230519032719.2581689-8-evalenti@kernel.org> (raw)
In-Reply-To: <20230519032719.2581689-1-evalenti@kernel.org>

From: Eduardo Valentin <eduval@amazon.com>

This patch adds an extra stat to report how many
temperature update failures were detected.
Error count is increase whenever the thermal
driver returns an actual error or when the temperature
is non positive.

Sample:

$ cat /sys/class/thermal/thermal_zone0/stats/error_count
0
$ echo -1 > /sys/class/thermal/thermal_zone0/emul_temp
$ cat /sys/class/thermal/thermal_zone0/stats/error_count
3

Cc: "Rafael J. Wysocki" <rafael@kernel.org> (supporter:THERMAL)
Cc: Daniel Lezcano <daniel.lezcano@linaro.org> (supporter:THERMAL)
Cc: Amit Kucheria <amitk@kernel.org> (reviewer:THERMAL)
Cc: Zhang Rui <rui.zhang@intel.com> (reviewer:THERMAL)
Cc: Jonathan Corbet <corbet@lwn.net> (maintainer:DOCUMENTATION)
Cc: linux-pm@vger.kernel.org (open list:THERMAL)
Cc: linux-doc@vger.kernel.org (open list:DOCUMENTATION)
Cc: linux-kernel@vger.kernel.org (open list)

Signed-off-by: Eduardo Valentin <eduval@amazon.com>
---
 drivers/thermal/thermal_core.c  |  3 ++
 drivers/thermal/thermal_core.h  |  7 ++++
 drivers/thermal/thermal_sysfs.c | 64 +++++++++++++++++++++++++++++++++
 3 files changed, 74 insertions(+)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 2ff7d9c7c973..359e7b2ff0e3 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -389,6 +389,9 @@ static void update_temperature(struct thermal_zone_device *tz)
 		/* tell the governor its source is hosed */
 		handle_error_temperature(tz, ret);
 
+		/* book keeping */
+		thermal_zone_device_error_stats_update(tz, ret);
+
 		return;
 	}
 
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index ef37b92bbb7c..612f93e6c257 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -141,12 +141,19 @@ ssize_t weight_store(struct device *, struct device_attribute *, const char *,
 
 #ifdef CONFIG_THERMAL_STATISTICS
 void thermal_zone_device_stats_update(struct thermal_zone_device *tz);
+void thermal_zone_device_error_stats_update(struct thermal_zone_device *tz,
+					    int error);
 void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
 					 struct thermal_instance *instance,
 					 unsigned long new_state);
 #else
 static inline
 void thermal_zone_device_stats_update(struct thermal_zone_device *tz) {}
+static inline
+void thermal_zone_device_error_stats_update(struct thermal_zone_device *tz,
+					    int error)
+{
+}
 static inline void
 thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
 				    struct thermal_instance *instance,
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index 25851fe073c3..e511042e9dab 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -541,12 +541,21 @@ static void destroy_trip_attrs(struct thermal_zone_device *tz)
 /* thermal zone device statistics handling */
 struct thermal_zone_device_stats {
 	spinlock_t lock; /* protects this struct */
+	unsigned int error_count; /* just account them */
+	int max_temperature;
 	s64 max_gradient;
 	s64 min_gradient;
 	ktime_t last_time;
 	ktime_t *time_in_trip;
 };
 
+static void error_stats_update(struct thermal_zone_device *tz, int error)
+{
+	struct thermal_zone_device_stats *stats = tz->stats;
+
+	stats->error_count++;
+}
+
 #define DELTA_MILLI_C_TO_MICRO_C(t0, t1)		(((t0) - (t1)) * 1000)
 static void temperature_stats_update(struct thermal_zone_device *tz)
 {
@@ -555,6 +564,15 @@ static void temperature_stats_update(struct thermal_zone_device *tz)
 	s64 cur_gradient, delta_temp;
 	int i, trip_id = -1;
 
+	if (tz->temperature <= 0) {
+		/* probably a wrong reading */
+		error_stats_update(tz, tz->temperature);
+		return;
+	}
+
+	if (tz->temperature > stats->max_temperature)
+		stats->max_temperature = tz->temperature;
+
 	delta = ktime_sub(now, stats->last_time);
 	stats->last_time = now;
 
@@ -610,6 +628,31 @@ void thermal_zone_device_stats_update(struct thermal_zone_device *tz)
 	spin_unlock(&stats->lock);
 }
 
+void thermal_zone_device_error_stats_update(struct thermal_zone_device *tz,
+					    int error)
+{
+	struct thermal_zone_device_stats *stats = tz->stats;
+
+	spin_lock(&stats->lock);
+	error_stats_update(tz, error);
+	spin_unlock(&stats->lock);
+}
+
+static ssize_t max_temperature_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	struct thermal_zone_device_stats *stats = tz->stats;
+	int ret;
+
+	spin_lock(&stats->lock);
+	temperature_stats_update(tz);
+	ret = snprintf(buf, PAGE_SIZE, "%d\n", stats->max_temperature);
+	spin_unlock(&stats->lock);
+
+	return ret;
+}
+
 static ssize_t max_gradient_show(struct device *dev,
 				 struct device_attribute *attr, char *buf)
 {
@@ -640,6 +683,21 @@ static ssize_t min_gradient_show(struct device *dev,
 	return ret;
 }
 
+static ssize_t error_count_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	struct thermal_zone_device_stats *stats = tz->stats;
+	int ret;
+
+	spin_lock(&stats->lock);
+	temperature_stats_update(tz);
+	ret = snprintf(buf, PAGE_SIZE, "%u\n", stats->error_count);
+	spin_unlock(&stats->lock);
+
+	return ret;
+}
+
 static ssize_t
 time_in_trip_ms_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
@@ -705,6 +763,8 @@ reset_tz_stats_store(struct device *dev, struct device_attribute *attr,
 
 	stats->min_gradient = 0;
 	stats->max_gradient = 0;
+	stats->max_temperature = 0;
+	stats->error_count = 0;
 	stats->last_time = ktime_get();
 
 	for (i = 0; i <= tz->num_trips; i++)
@@ -717,13 +777,17 @@ reset_tz_stats_store(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RO(min_gradient);
 static DEVICE_ATTR_RO(max_gradient);
+static DEVICE_ATTR_RO(max_temperature);
 static DEVICE_ATTR_RO(time_in_trip_ms);
+static DEVICE_ATTR_RO(error_count);
 static DEVICE_ATTR_WO(reset_tz_stats);
 
 static struct attribute *thermal_zone_device_stats_attrs[] = {
 	&dev_attr_min_gradient.attr,
 	&dev_attr_max_gradient.attr,
+	&dev_attr_max_temperature.attr,
 	&dev_attr_time_in_trip_ms.attr,
+	&dev_attr_error_count.attr,
 	&dev_attr_reset_tz_stats.attr,
 	NULL
 };
-- 
2.34.1


  parent reply	other threads:[~2023-05-19  3:28 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-19  3:27 [PATCH 0/7] thermal: enhancements on thermal stats Eduardo Valentin
2023-05-19  3:27 ` [PATCH 1/7] thermal: stats: track time each dev changes due to tz Eduardo Valentin
2023-06-20 13:43   ` Rafael J. Wysocki
2023-06-21  4:37     ` Eduardo Valentin
2023-05-19  3:27 ` [PATCH 2/7] thermal: stats: track number of change requests " Eduardo Valentin
2023-06-20 17:12   ` Rafael J. Wysocki
2023-06-21  4:40     ` Eduardo Valentin
2023-05-19  3:27 ` [PATCH 3/7] thermal: stats: introduce thermal zone stats/ directory Eduardo Valentin
2023-05-19  3:27 ` [PATCH 4/7] thermal: stats: introduce thermal zone stats/min_gradient Eduardo Valentin
2023-06-20 17:17   ` Rafael J. Wysocki
2023-05-19  3:27 ` [PATCH 5/7] thermal: stats: introduce tz time in trip Eduardo Valentin
2023-06-20 17:27   ` Rafael J. Wysocki
2023-06-21  4:45     ` Eduardo Valentin
2023-06-23 16:40       ` Rafael J. Wysocki
2023-06-28 20:00         ` Eduardo Valentin
2023-05-19  3:27 ` [PATCH 6/7] ythermal: core: report errors to governors Eduardo Valentin
2023-06-20 17:29   ` Rafael J. Wysocki
2023-06-21  4:49     ` Eduardo Valentin
2023-05-19  3:27 ` Eduardo Valentin [this message]
2023-06-20 17:32   ` [PATCH 7/7] thermal: stats: add error accounting to thermal zone Rafael J. Wysocki
2023-06-21  4:50     ` Eduardo Valentin
2023-05-24 18:22 ` [PATCH 0/7] thermal: enhancements on thermal stats Rafael J. Wysocki
2023-06-05 23:28   ` Eduardo Valentin
2023-06-20 19:05 ` Daniel Lezcano
2023-06-21  4:24   ` Eduardo Valentin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230519032719.2581689-8-evalenti@kernel.org \
    --to=evalenti@kernel.org \
    --cc=amitk@kernel.org \
    --cc=corbet@lwn.net \
    --cc=daniel.lezcano@linaro.org \
    --cc=eduval@amazon.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=rafael@kernel.org \
    --cc=rui.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).