linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] thermal: qoriq: Only enable sites that actually exist
@ 2021-11-29 11:02 Sebastian Krzyszkowiak
  2021-11-30 15:13 ` Daniel Lezcano
  0 siblings, 1 reply; 3+ messages in thread
From: Sebastian Krzyszkowiak @ 2021-11-29 11:02 UTC (permalink / raw)
  To: linux-pm
  Cc: linux-kernel, Andrey Smirnov, Zhang Rui, Amit Kucheria,
	Daniel Lezcano, Rafael J. Wysocki, kernel,
	Sebastian Krzyszkowiak

On i.MX8MQ, enabling monitoring sites that aren't connected to anything
can cause unwanted side effects on some units. This seems to happen
once some of these sites report out-of-range readings and results in
sensor misbehavior, such as thermal zone readings getting stuck or even
suddenly reporting an impossibly high value, triggering emergency
shutdowns.

The datasheet lists all non-existent sites as "reserved" and doesn't
make any guarantees about being able to enable them at all, so let's
not do that.

Fixes: 45038e03d633 ("thermal: qoriq: Enable all sensors before registering them")

Signed-off-by: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
---
 drivers/thermal/qoriq_thermal.c | 63 ++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c
index 73049f9bea25..ef0848849ee2 100644
--- a/drivers/thermal/qoriq_thermal.c
+++ b/drivers/thermal/qoriq_thermal.c
@@ -32,7 +32,6 @@
 #define TMR_DISABLE	0x0
 #define TMR_ME		0x80000000
 #define TMR_ALPF	0x0c000000
-#define TMR_MSITE_ALL	GENMASK(15, 0)
 
 #define REGS_TMTMIR	0x008	/* Temperature measurement interval Register */
 #define TMTMIR_DEFAULT	0x0000000f
@@ -129,33 +128,51 @@ static const struct thermal_zone_of_device_ops tmu_tz_ops = {
 static int qoriq_tmu_register_tmu_zone(struct device *dev,
 				       struct qoriq_tmu_data *qdata)
 {
-	int id;
+	int ret = 0;
+	struct device_node *np, *child, *sensor_np;
 
-	if (qdata->ver == TMU_VER1) {
-		regmap_write(qdata->regmap, REGS_TMR,
-			     TMR_MSITE_ALL | TMR_ME | TMR_ALPF);
-	} else {
-		regmap_write(qdata->regmap, REGS_V2_TMSR, TMR_MSITE_ALL);
-		regmap_write(qdata->regmap, REGS_TMR, TMR_ME | TMR_ALPF_V2);
-	}
+	np = of_find_node_by_name(NULL, "thermal-zones");
+	if (!np)
+		return -ENODEV;
+
+	sensor_np = of_node_get(dev->of_node);
 
-	for (id = 0; id < SITES_MAX; id++) {
+	for_each_available_child_of_node(np, child) {
 		struct thermal_zone_device *tzd;
-		struct qoriq_sensor *sensor = &qdata->sensor[id];
-		int ret;
+		struct qoriq_sensor *sensor;
+		int id, site;
+
+		ret = thermal_zone_of_get_sensor_id(child, sensor_np, &id);
+
+		if (ret < 0) {
+			dev_err(dev, "failed to get valid sensor id: %d\n", ret);
+			of_node_put(child);
+			break;
+		}
 
+		sensor = &qdata->sensor[id];
 		sensor->id = id;
 
+		/* Enable monitoring */
+		if (qdata->ver == TMU_VER1) {
+			site = 0x1 << (15 - id);
+			regmap_update_bits(qdata->regmap, REGS_TMR,
+					   site | TMR_ME | TMR_ALPF,
+					   site | TMR_ME | TMR_ALPF);
+		} else {
+			site = 0x1 << id;
+			regmap_update_bits(qdata->regmap, REGS_V2_TMSR, site, site);
+			regmap_write(qdata->regmap, REGS_TMR, TMR_ME | TMR_ALPF_V2);
+		}
+
 		tzd = devm_thermal_zone_of_sensor_register(dev, id,
 							   sensor,
 							   &tmu_tz_ops);
-		ret = PTR_ERR_OR_ZERO(tzd);
-		if (ret) {
-			if (ret == -ENODEV)
-				continue;
-
-			regmap_write(qdata->regmap, REGS_TMR, TMR_DISABLE);
-			return ret;
+		if (IS_ERR(tzd)) {
+			ret = PTR_ERR(tzd);
+			dev_err(dev, "failed to register thermal zone: %d\n", ret);
+			of_node_put(child);
+			break;
 		}
 
 		if (devm_thermal_add_hwmon_sysfs(tzd))
@@ -164,7 +181,13 @@ static int qoriq_tmu_register_tmu_zone(struct device *dev,
 
 	}
 
-	return 0;
+	of_node_put(sensor_np);
+	of_node_put(np);
+
+	if (ret)
+		regmap_write(qdata->regmap, REGS_TMR, TMR_DISABLE);
+
+	return ret;
 }
 
 static int qoriq_tmu_calibration(struct device *dev,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] thermal: qoriq: Only enable sites that actually exist
  2021-11-29 11:02 [PATCH] thermal: qoriq: Only enable sites that actually exist Sebastian Krzyszkowiak
@ 2021-11-30 15:13 ` Daniel Lezcano
  2022-01-15 21:44   ` Sebastian Krzyszkowiak
  0 siblings, 1 reply; 3+ messages in thread
From: Daniel Lezcano @ 2021-11-30 15:13 UTC (permalink / raw)
  To: Sebastian Krzyszkowiak, linux-pm
  Cc: linux-kernel, Andrey Smirnov, Zhang Rui, Amit Kucheria,
	Rafael J. Wysocki, kernel


Hi Sebastian,

thanks for the fix.

On 29/11/2021 12:02, Sebastian Krzyszkowiak wrote:
> On i.MX8MQ, enabling monitoring sites that aren't connected to anything
> can cause unwanted side effects on some units. This seems to happen
> once some of these sites report out-of-range readings and results in
> sensor misbehavior, such as thermal zone readings getting stuck or even
> suddenly reporting an impossibly high value, triggering emergency
> shutdowns.
> 
> The datasheet lists all non-existent sites as "reserved" and doesn't
> make any guarantees about being able to enable them at all, so let's
> not do that.

The description of what does the patch is missing here.

> Fixes: 45038e03d633 ("thermal: qoriq: Enable all sensors before registering them")
> 
> Signed-off-by: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
> ---
>  drivers/thermal/qoriq_thermal.c | 63 ++++++++++++++++++++++-----------
>  1 file changed, 43 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c
> index 73049f9bea25..ef0848849ee2 100644
> --- a/drivers/thermal/qoriq_thermal.c
> +++ b/drivers/thermal/qoriq_thermal.c
> @@ -32,7 +32,6 @@
>  #define TMR_DISABLE	0x0
>  #define TMR_ME		0x80000000
>  #define TMR_ALPF	0x0c000000
> -#define TMR_MSITE_ALL	GENMASK(15, 0)
>  
>  #define REGS_TMTMIR	0x008	/* Temperature measurement interval Register */
>  #define TMTMIR_DEFAULT	0x0000000f
> @@ -129,33 +128,51 @@ static const struct thermal_zone_of_device_ops tmu_tz_ops = {
>  static int qoriq_tmu_register_tmu_zone(struct device *dev,
>  				       struct qoriq_tmu_data *qdata)
>  {
> -	int id;
> +	int ret = 0;
> +	struct device_node *np, *child, *sensor_np;
>  
> -	if (qdata->ver == TMU_VER1) {
> -		regmap_write(qdata->regmap, REGS_TMR,
> -			     TMR_MSITE_ALL | TMR_ME | TMR_ALPF);
> -	} else {
> -		regmap_write(qdata->regmap, REGS_V2_TMSR, TMR_MSITE_ALL);
> -		regmap_write(qdata->regmap, REGS_TMR, TMR_ME | TMR_ALPF_V2);
> -	}
> +	np = of_find_node_by_name(NULL, "thermal-zones");
> +	if (!np)
> +		return -ENODEV;
> +
> +	sensor_np = of_node_get(dev->of_node);
>  
> -	for (id = 0; id < SITES_MAX; id++) {
> +	for_each_available_child_of_node(np, child) {
>  		struct thermal_zone_device *tzd;
> -		struct qoriq_sensor *sensor = &qdata->sensor[id];
> -		int ret;
> +		struct qoriq_sensor *sensor;
> +		int id, site;
> +
> +		ret = thermal_zone_of_get_sensor_id(child, sensor_np, &id);
> +
> +		if (ret < 0) {
> +			dev_err(dev, "failed to get valid sensor id: %d\n", ret);
> +			of_node_put(child);
> +			break;
> +		}
>  
> +		sensor = &qdata->sensor[id];
>  		sensor->id = id;
>  
> +		/* Enable monitoring */
> +		if (qdata->ver == TMU_VER1) {
> +			site = 0x1 << (15 - id);
> +			regmap_update_bits(qdata->regmap, REGS_TMR,
> +					   site | TMR_ME | TMR_ALPF,
> +					   site | TMR_ME | TMR_ALPF);
> +		} else {
> +			site = 0x1 << id;
> +			regmap_update_bits(qdata->regmap, REGS_V2_TMSR, site, site);
> +			regmap_write(qdata->regmap, REGS_TMR, TMR_ME | TMR_ALPF_V2);
> +		}

Why not create the site mask in the loop and then call once the block
above out this loop?

>  		tzd = devm_thermal_zone_of_sensor_register(dev, id,
>  							   sensor,
>  							   &tmu_tz_ops);
> -		ret = PTR_ERR_OR_ZERO(tzd);
> -		if (ret) {
> -			if (ret == -ENODEV)
> -				continue;
> -
> -			regmap_write(qdata->regmap, REGS_TMR, TMR_DISABLE);
> -			return ret;
> +		if (IS_ERR(tzd)) {
> +			ret = PTR_ERR(tzd);
> +			dev_err(dev, "failed to register thermal zone: %d\n", ret);
> +			of_node_put(child);
> +			break;
>  		}
>  
>  		if (devm_thermal_add_hwmon_sysfs(tzd))
> @@ -164,7 +181,13 @@ static int qoriq_tmu_register_tmu_zone(struct device *dev,
>  
>  	}
>  
> -	return 0;
> +	of_node_put(sensor_np);
> +	of_node_put(np);
> +
> +	if (ret)
> +		regmap_write(qdata->regmap, REGS_TMR, TMR_DISABLE);
> +
> +	return ret;
>  }
>  
>  static int qoriq_tmu_calibration(struct device *dev,
> 


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] thermal: qoriq: Only enable sites that actually exist
  2021-11-30 15:13 ` Daniel Lezcano
@ 2022-01-15 21:44   ` Sebastian Krzyszkowiak
  0 siblings, 0 replies; 3+ messages in thread
From: Sebastian Krzyszkowiak @ 2022-01-15 21:44 UTC (permalink / raw)
  To: linux-pm, Daniel Lezcano
  Cc: linux-kernel, Andrey Smirnov, Zhang Rui, Amit Kucheria,
	Rafael J. Wysocki, kernel

[-- Attachment #1: Type: text/plain, Size: 4824 bytes --]

Hi Daniel,

On wtorek, 30 listopada 2021 16:13:02 CET Daniel Lezcano wrote:
> Hi Sebastian,
> 
> thanks for the fix.
> 
> On 29/11/2021 12:02, Sebastian Krzyszkowiak wrote:
> > On i.MX8MQ, enabling monitoring sites that aren't connected to anything
> > can cause unwanted side effects on some units. This seems to happen
> > once some of these sites report out-of-range readings and results in
> > sensor misbehavior, such as thermal zone readings getting stuck or even
> > suddenly reporting an impossibly high value, triggering emergency
> > shutdowns.
> > 
> > The datasheet lists all non-existent sites as "reserved" and doesn't
> > make any guarantees about being able to enable them at all, so let's
> > not do that.
> 
> The description of what does the patch is missing here.

Does this appended to commit description sound good?

"Instead, iterate over sensor DT nodes and only enable monitoring sites that 
are specified there prior to registering their thermal zones."

> > Fixes: 45038e03d633 ("thermal: qoriq: Enable all sensors before
> > registering them")
> > 
> > Signed-off-by: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
> > ---
> > 
> >  drivers/thermal/qoriq_thermal.c | 63 ++++++++++++++++++++++-----------
> >  1 file changed, 43 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/thermal/qoriq_thermal.c
> > b/drivers/thermal/qoriq_thermal.c index 73049f9bea25..ef0848849ee2 100644
> > --- a/drivers/thermal/qoriq_thermal.c
> > +++ b/drivers/thermal/qoriq_thermal.c
> > @@ -32,7 +32,6 @@
> > 
> >  #define TMR_DISABLE	0x0
> >  #define TMR_ME		0x80000000
> >  #define TMR_ALPF	0x0c000000
> > 
> > -#define TMR_MSITE_ALL	GENMASK(15, 0)
> > 
> >  #define REGS_TMTMIR	0x008	/* Temperature measurement 
interval Register */
> >  #define TMTMIR_DEFAULT	0x0000000f
> > 
> > @@ -129,33 +128,51 @@ static const struct thermal_zone_of_device_ops
> > tmu_tz_ops = {> 
> >  static int qoriq_tmu_register_tmu_zone(struct device *dev,
> >  
> >  				       struct qoriq_tmu_data 
*qdata)
> >  
> >  {
> > 
> > -	int id;
> > +	int ret = 0;
> > +	struct device_node *np, *child, *sensor_np;
> > 
> > -	if (qdata->ver == TMU_VER1) {
> > -		regmap_write(qdata->regmap, REGS_TMR,
> > -			     TMR_MSITE_ALL | TMR_ME | TMR_ALPF);
> > -	} else {
> > -		regmap_write(qdata->regmap, REGS_V2_TMSR, 
TMR_MSITE_ALL);
> > -		regmap_write(qdata->regmap, REGS_TMR, TMR_ME | 
TMR_ALPF_V2);
> > -	}
> > +	np = of_find_node_by_name(NULL, "thermal-zones");
> > +	if (!np)
> > +		return -ENODEV;
> > +
> > +	sensor_np = of_node_get(dev->of_node);
> > 
> > -	for (id = 0; id < SITES_MAX; id++) {
> > +	for_each_available_child_of_node(np, child) {
> > 
> >  		struct thermal_zone_device *tzd;
> > 
> > -		struct qoriq_sensor *sensor = &qdata->sensor[id];
> > -		int ret;
> > +		struct qoriq_sensor *sensor;
> > +		int id, site;
> > +
> > +		ret = thermal_zone_of_get_sensor_id(child, sensor_np, 
&id);
> > +
> > +		if (ret < 0) {
> > +			dev_err(dev, "failed to get valid sensor id: 
%d\n", ret);
> > +			of_node_put(child);
> > +			break;
> > +		}
> > 
> > +		sensor = &qdata->sensor[id];
> > 
> >  		sensor->id = id;
> > 
> > +		/* Enable monitoring */
> > +		if (qdata->ver == TMU_VER1) {
> > +			site = 0x1 << (15 - id);
> > +			regmap_update_bits(qdata->regmap, REGS_TMR,
> > +					   site | TMR_ME | 
TMR_ALPF,
> > +					   site | TMR_ME | 
TMR_ALPF);
> > +		} else {
> > +			site = 0x1 << id;
> > +			regmap_update_bits(qdata->regmap, 
REGS_V2_TMSR, site, site);
> > +			regmap_write(qdata->regmap, REGS_TMR, TMR_ME 
| TMR_ALPF_V2);
> > +		}
> 
> Why not create the site mask in the loop and then call once the block
> above out this loop?

That's how it worked in the past. The entire point of this patch is to not do 
that - otherwise we could simply revert 45038e03d633 and bring back the issue 
it was attempting to fix (bogus data being reported prior to the first reading).

> 
> >  		tzd = devm_thermal_zone_of_sensor_register(dev, id,
> >  		
> >  							   
sensor,
> >  							   
&tmu_tz_ops);
> > 
> > -		ret = PTR_ERR_OR_ZERO(tzd);
> > -		if (ret) {
> > -			if (ret == -ENODEV)
> > -				continue;
> > -
> > -			regmap_write(qdata->regmap, REGS_TMR, 
TMR_DISABLE);
> > -			return ret;
> > +		if (IS_ERR(tzd)) {
> > +			ret = PTR_ERR(tzd);
> > +			dev_err(dev, "failed to register thermal 
zone: %d\n", ret);
> > +			of_node_put(child);
> > +			break;
> > 
> >  		}
> >  		
> >  		if (devm_thermal_add_hwmon_sysfs(tzd))
> > 
> > @@ -164,7 +181,13 @@ static int qoriq_tmu_register_tmu_zone(struct device
> > *dev,> 
> >  	}
> > 
> > -	return 0;
> > +	of_node_put(sensor_np);
> > +	of_node_put(np);
> > +
> > +	if (ret)
> > +		regmap_write(qdata->regmap, REGS_TMR, TMR_DISABLE);
> > +
> > +	return ret;
> > 
> >  }
> >  
> >  static int qoriq_tmu_calibration(struct device *dev,

Cheers,
Sebastian

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-01-15 21:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-29 11:02 [PATCH] thermal: qoriq: Only enable sites that actually exist Sebastian Krzyszkowiak
2021-11-30 15:13 ` Daniel Lezcano
2022-01-15 21:44   ` Sebastian Krzyszkowiak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).