All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-08  8:11 ` Michal Vokáč
  0 siblings, 0 replies; 22+ messages in thread
From: Michal Vokáč @ 2021-10-08  8:11 UTC (permalink / raw)
  To: Andrzej Pietrasiewicz, linux-pm, Daniel Lezcano, Shawn Guo
  Cc: Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš, petrben, stable, Michal Vokáč

From: Petr Beneš <petr.benes@ysoft.com>

SoC temperature readout may not work after thermal alarm fires interrupt.
This harms userspace as well as CPU cooling device.

Two issues with the logic involved. First, there is no protection against
concurent measurements, hence one can switch the sensor off while
the other one tries to read temperature later. Second, the interrupt path
usually fails. At the end the sensor is powered off and thermal IRQ is
disabled. One has to reenable the thermal zone by the sysfs interface.

Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
driver's local data to decide whether to run a measurement")

It uses data->irq_enabled as the "local data". Indeed, its value is
related to the state of the sensor loosely under normal operation and,
frankly, gets unleashed when the thermal interrupt arrives.

Current patch adds the "local data" (new member sensor_on in
imx_thermal_data) and sets its value in controlled manner.

Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
Cc: petrben@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
---
 drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 2c7473d86a59..df5658e21828 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -209,6 +209,8 @@ struct imx_thermal_data {
 	struct clk *thermal_clk;
 	const struct thermal_soc_data *socdata;
 	const char *temp_grade;
+	struct mutex sensor_lock;
+	bool sensor_on;
 };
 
 static void imx_set_panic_temp(struct imx_thermal_data *data,
@@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
 	const struct thermal_soc_data *soc_data = data->socdata;
 	struct regmap *map = data->tempmon;
 	unsigned int n_meas;
-	bool wait, run_measurement;
+	bool wait;
 	u32 val;
 
-	run_measurement = !data->irq_enabled;
-	if (!run_measurement) {
+	mutex_lock(&data->sensor_lock);
+
+	if (data->sensor_on) {
 		/* Check if a measurement is currently in progress */
 		regmap_read(map, soc_data->temp_data, &val);
 		wait = !(val & soc_data->temp_valid_mask);
@@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
 
 	regmap_read(map, soc_data->temp_data, &val);
 
-	if (run_measurement) {
+	if (!data->sensor_on) {
 		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
 			     soc_data->measure_temp_mask);
 		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
 			     soc_data->power_down_mask);
 	}
 
+	mutex_unlock(&data->sensor_lock);
+
 	if ((val & soc_data->temp_valid_mask) == 0) {
 		dev_dbg(&tz->device, "temp measurement never finished\n");
 		return -EAGAIN;
@@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
 	const struct thermal_soc_data *soc_data = data->socdata;
 
 	if (mode == THERMAL_DEVICE_ENABLED) {
+		mutex_lock(&data->sensor_lock);
 		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
 			     soc_data->power_down_mask);
 		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
 			     soc_data->measure_temp_mask);
+		data->sensor_on = true;
+		mutex_unlock(&data->sensor_lock);
 
 		if (!data->irq_enabled) {
 			data->irq_enabled = true;
 			enable_irq(data->irq);
 		}
 	} else {
+		mutex_lock(&data->sensor_lock);
 		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
 			     soc_data->measure_temp_mask);
 		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
 			     soc_data->power_down_mask);
+		data->sensor_on = false;
+		mutex_unlock(&data->sensor_lock);
 
 		if (data->irq_enabled) {
 			disable_irq(data->irq);
@@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	}
 
 	/* Make sure sensor is in known good state for measurements */
+	mutex_init(&data->sensor_lock);
+	mutex_lock(&data->sensor_lock);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
 		     data->socdata->power_down_mask);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
@@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
 			IMX6_MISC0_REFTOP_SELBIASOFF);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
 		     data->socdata->power_down_mask);
+	data->sensor_on = false;
+	mutex_unlock(&data->sensor_lock);
 
 	ret = imx_thermal_register_legacy_cooling(data);
 	if (ret)
@@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	if (data->socdata->version == TEMPMON_IMX6SX)
 		imx_set_panic_temp(data, data->temp_critical);
 
+	mutex_lock(&data->sensor_lock);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
 		     data->socdata->power_down_mask);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
 		     data->socdata->measure_temp_mask);
+	data->sensor_on = true;
+	mutex_unlock(&data->sensor_lock);
 
 	data->irq_enabled = true;
 	ret = thermal_zone_device_enable(data->tz);
@@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
 	struct regmap *map = data->tempmon;
 
 	/* Disable measurements */
+	mutex_lock(&data->sensor_lock);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
 		     data->socdata->power_down_mask);
+	data->sensor_on = false;
+	mutex_unlock(&data->sensor_lock);
+
 	if (!IS_ERR(data->thermal_clk))
 		clk_disable_unprepare(data->thermal_clk);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-08  8:11 ` Michal Vokáč
  0 siblings, 0 replies; 22+ messages in thread
From: Michal Vokáč @ 2021-10-08  8:11 UTC (permalink / raw)
  To: Andrzej Pietrasiewicz, linux-pm, Daniel Lezcano, Shawn Guo
  Cc: Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš, petrben, stable, Michal Vokáč

From: Petr Beneš <petr.benes@ysoft.com>

SoC temperature readout may not work after thermal alarm fires interrupt.
This harms userspace as well as CPU cooling device.

Two issues with the logic involved. First, there is no protection against
concurent measurements, hence one can switch the sensor off while
the other one tries to read temperature later. Second, the interrupt path
usually fails. At the end the sensor is powered off and thermal IRQ is
disabled. One has to reenable the thermal zone by the sysfs interface.

Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
driver's local data to decide whether to run a measurement")

It uses data->irq_enabled as the "local data". Indeed, its value is
related to the state of the sensor loosely under normal operation and,
frankly, gets unleashed when the thermal interrupt arrives.

Current patch adds the "local data" (new member sensor_on in
imx_thermal_data) and sets its value in controlled manner.

Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
Cc: petrben@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
---
 drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 2c7473d86a59..df5658e21828 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -209,6 +209,8 @@ struct imx_thermal_data {
 	struct clk *thermal_clk;
 	const struct thermal_soc_data *socdata;
 	const char *temp_grade;
+	struct mutex sensor_lock;
+	bool sensor_on;
 };
 
 static void imx_set_panic_temp(struct imx_thermal_data *data,
@@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
 	const struct thermal_soc_data *soc_data = data->socdata;
 	struct regmap *map = data->tempmon;
 	unsigned int n_meas;
-	bool wait, run_measurement;
+	bool wait;
 	u32 val;
 
-	run_measurement = !data->irq_enabled;
-	if (!run_measurement) {
+	mutex_lock(&data->sensor_lock);
+
+	if (data->sensor_on) {
 		/* Check if a measurement is currently in progress */
 		regmap_read(map, soc_data->temp_data, &val);
 		wait = !(val & soc_data->temp_valid_mask);
@@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
 
 	regmap_read(map, soc_data->temp_data, &val);
 
-	if (run_measurement) {
+	if (!data->sensor_on) {
 		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
 			     soc_data->measure_temp_mask);
 		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
 			     soc_data->power_down_mask);
 	}
 
+	mutex_unlock(&data->sensor_lock);
+
 	if ((val & soc_data->temp_valid_mask) == 0) {
 		dev_dbg(&tz->device, "temp measurement never finished\n");
 		return -EAGAIN;
@@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
 	const struct thermal_soc_data *soc_data = data->socdata;
 
 	if (mode == THERMAL_DEVICE_ENABLED) {
+		mutex_lock(&data->sensor_lock);
 		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
 			     soc_data->power_down_mask);
 		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
 			     soc_data->measure_temp_mask);
+		data->sensor_on = true;
+		mutex_unlock(&data->sensor_lock);
 
 		if (!data->irq_enabled) {
 			data->irq_enabled = true;
 			enable_irq(data->irq);
 		}
 	} else {
+		mutex_lock(&data->sensor_lock);
 		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
 			     soc_data->measure_temp_mask);
 		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
 			     soc_data->power_down_mask);
+		data->sensor_on = false;
+		mutex_unlock(&data->sensor_lock);
 
 		if (data->irq_enabled) {
 			disable_irq(data->irq);
@@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	}
 
 	/* Make sure sensor is in known good state for measurements */
+	mutex_init(&data->sensor_lock);
+	mutex_lock(&data->sensor_lock);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
 		     data->socdata->power_down_mask);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
@@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
 			IMX6_MISC0_REFTOP_SELBIASOFF);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
 		     data->socdata->power_down_mask);
+	data->sensor_on = false;
+	mutex_unlock(&data->sensor_lock);
 
 	ret = imx_thermal_register_legacy_cooling(data);
 	if (ret)
@@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	if (data->socdata->version == TEMPMON_IMX6SX)
 		imx_set_panic_temp(data, data->temp_critical);
 
+	mutex_lock(&data->sensor_lock);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
 		     data->socdata->power_down_mask);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
 		     data->socdata->measure_temp_mask);
+	data->sensor_on = true;
+	mutex_unlock(&data->sensor_lock);
 
 	data->irq_enabled = true;
 	ret = thermal_zone_device_enable(data->tz);
@@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
 	struct regmap *map = data->tempmon;
 
 	/* Disable measurements */
+	mutex_lock(&data->sensor_lock);
 	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
 		     data->socdata->power_down_mask);
+	data->sensor_on = false;
+	mutex_unlock(&data->sensor_lock);
+
 	if (!IS_ERR(data->thermal_clk))
 		clk_disable_unprepare(data->thermal_clk);
 
-- 
2.25.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-08  8:11 ` Michal Vokáč
@ 2021-10-16 22:23   ` Daniel Lezcano
  -1 siblings, 0 replies; 22+ messages in thread
From: Daniel Lezcano @ 2021-10-16 22:23 UTC (permalink / raw)
  To: Michal Vokáč, Andrzej Pietrasiewicz, linux-pm, Shawn Guo
  Cc: Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš,
	petrben, stable

On 08/10/2021 10:11, Michal Vokáč wrote:
> From: Petr Beneš <petr.benes@ysoft.com>
> 
> SoC temperature readout may not work after thermal alarm fires interrupt.
> This harms userspace as well as CPU cooling device.
> 
> Two issues with the logic involved. First, there is no protection against
> concurent measurements, hence one can switch the sensor off while
> the other one tries to read temperature later. Second, the interrupt path
> usually fails. At the end the sensor is powered off and thermal IRQ is
> disabled. One has to reenable the thermal zone by the sysfs interface.
> 
> Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
> driver's local data to decide whether to run a measurement")

Are these troubles observed and reproduced ? Or is it your understanding
from reading the code ?

get_temp() and tz enable/disable are protected against races in the core
code via the tz mutex

> It uses data->irq_enabled as the "local data". Indeed, its value is
> related to the state of the sensor loosely under normal operation and,
> frankly, gets unleashed when the thermal interrupt arrives.
> 
> Current patch adds the "local data" (new member sensor_on in
> imx_thermal_data) and sets its value in controlled manner.>
> Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
> Cc: petrben@gmail.com
> Cc: stable@vger.kernel.org
> Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
> Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
> ---
>  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
>  1 file changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
> index 2c7473d86a59..df5658e21828 100644
> --- a/drivers/thermal/imx_thermal.c
> +++ b/drivers/thermal/imx_thermal.c
> @@ -209,6 +209,8 @@ struct imx_thermal_data {
>  	struct clk *thermal_clk;
>  	const struct thermal_soc_data *socdata;
>  	const char *temp_grade;
> +	struct mutex sensor_lock;
> +	bool sensor_on;
>  };
>  
>  static void imx_set_panic_temp(struct imx_thermal_data *data,
> @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>  	const struct thermal_soc_data *soc_data = data->socdata;
>  	struct regmap *map = data->tempmon;
>  	unsigned int n_meas;
> -	bool wait, run_measurement;
> +	bool wait;
>  	u32 val;
>  
> -	run_measurement = !data->irq_enabled;
> -	if (!run_measurement) {
> +	mutex_lock(&data->sensor_lock);
> +
> +	if (data->sensor_on) {
>  		/* Check if a measurement is currently in progress */
>  		regmap_read(map, soc_data->temp_data, &val);
>  		wait = !(val & soc_data->temp_valid_mask);
> @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>  
>  	regmap_read(map, soc_data->temp_data, &val);
>  
> -	if (run_measurement) {
> +	if (!data->sensor_on) {
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->measure_temp_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->power_down_mask);
>  	}
>  
> +	mutex_unlock(&data->sensor_lock);
> +
>  	if ((val & soc_data->temp_valid_mask) == 0) {
>  		dev_dbg(&tz->device, "temp measurement never finished\n");
>  		return -EAGAIN;
> @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
>  	const struct thermal_soc_data *soc_data = data->socdata;
>  
>  	if (mode == THERMAL_DEVICE_ENABLED) {
> +		mutex_lock(&data->sensor_lock);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->power_down_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->measure_temp_mask);
> +		data->sensor_on = true;
> +		mutex_unlock(&data->sensor_lock);
>  
>  		if (!data->irq_enabled) {
>  			data->irq_enabled = true;
>  			enable_irq(data->irq);
>  		}
>  	} else {
> +		mutex_lock(&data->sensor_lock);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->measure_temp_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->power_down_mask);
> +		data->sensor_on = false;
> +		mutex_unlock(&data->sensor_lock);
>  
>  		if (data->irq_enabled) {
>  			disable_irq(data->irq);
> @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  	}
>  
>  	/* Make sure sensor is in known good state for measurements */
> +	mutex_init(&data->sensor_lock);
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>  		     data->socdata->power_down_mask);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  			IMX6_MISC0_REFTOP_SELBIASOFF);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->power_down_mask);
> +	data->sensor_on = false;
> +	mutex_unlock(&data->sensor_lock);
>  
>  	ret = imx_thermal_register_legacy_cooling(data);
>  	if (ret)
> @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  	if (data->socdata->version == TEMPMON_IMX6SX)
>  		imx_set_panic_temp(data, data->temp_critical);
>  
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>  		     data->socdata->power_down_mask);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->measure_temp_mask);
> +	data->sensor_on = true;
> +	mutex_unlock(&data->sensor_lock);
>  
>  	data->irq_enabled = true;
>  	ret = thermal_zone_device_enable(data->tz);
> @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
>  	struct regmap *map = data->tempmon;
>  
>  	/* Disable measurements */
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->power_down_mask);
> +	data->sensor_on = false;
> +	mutex_unlock(&data->sensor_lock);
> +
>  	if (!IS_ERR(data->thermal_clk))
>  		clk_disable_unprepare(data->thermal_clk);
>  
> 


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-16 22:23   ` Daniel Lezcano
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Lezcano @ 2021-10-16 22:23 UTC (permalink / raw)
  To: Michal Vokáč, Andrzej Pietrasiewicz, linux-pm, Shawn Guo
  Cc: Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš,
	petrben, stable

On 08/10/2021 10:11, Michal Vokáč wrote:
> From: Petr Beneš <petr.benes@ysoft.com>
> 
> SoC temperature readout may not work after thermal alarm fires interrupt.
> This harms userspace as well as CPU cooling device.
> 
> Two issues with the logic involved. First, there is no protection against
> concurent measurements, hence one can switch the sensor off while
> the other one tries to read temperature later. Second, the interrupt path
> usually fails. At the end the sensor is powered off and thermal IRQ is
> disabled. One has to reenable the thermal zone by the sysfs interface.
> 
> Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
> driver's local data to decide whether to run a measurement")

Are these troubles observed and reproduced ? Or is it your understanding
from reading the code ?

get_temp() and tz enable/disable are protected against races in the core
code via the tz mutex

> It uses data->irq_enabled as the "local data". Indeed, its value is
> related to the state of the sensor loosely under normal operation and,
> frankly, gets unleashed when the thermal interrupt arrives.
> 
> Current patch adds the "local data" (new member sensor_on in
> imx_thermal_data) and sets its value in controlled manner.>
> Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
> Cc: petrben@gmail.com
> Cc: stable@vger.kernel.org
> Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
> Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
> ---
>  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
>  1 file changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
> index 2c7473d86a59..df5658e21828 100644
> --- a/drivers/thermal/imx_thermal.c
> +++ b/drivers/thermal/imx_thermal.c
> @@ -209,6 +209,8 @@ struct imx_thermal_data {
>  	struct clk *thermal_clk;
>  	const struct thermal_soc_data *socdata;
>  	const char *temp_grade;
> +	struct mutex sensor_lock;
> +	bool sensor_on;
>  };
>  
>  static void imx_set_panic_temp(struct imx_thermal_data *data,
> @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>  	const struct thermal_soc_data *soc_data = data->socdata;
>  	struct regmap *map = data->tempmon;
>  	unsigned int n_meas;
> -	bool wait, run_measurement;
> +	bool wait;
>  	u32 val;
>  
> -	run_measurement = !data->irq_enabled;
> -	if (!run_measurement) {
> +	mutex_lock(&data->sensor_lock);
> +
> +	if (data->sensor_on) {
>  		/* Check if a measurement is currently in progress */
>  		regmap_read(map, soc_data->temp_data, &val);
>  		wait = !(val & soc_data->temp_valid_mask);
> @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>  
>  	regmap_read(map, soc_data->temp_data, &val);
>  
> -	if (run_measurement) {
> +	if (!data->sensor_on) {
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->measure_temp_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->power_down_mask);
>  	}
>  
> +	mutex_unlock(&data->sensor_lock);
> +
>  	if ((val & soc_data->temp_valid_mask) == 0) {
>  		dev_dbg(&tz->device, "temp measurement never finished\n");
>  		return -EAGAIN;
> @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
>  	const struct thermal_soc_data *soc_data = data->socdata;
>  
>  	if (mode == THERMAL_DEVICE_ENABLED) {
> +		mutex_lock(&data->sensor_lock);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->power_down_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->measure_temp_mask);
> +		data->sensor_on = true;
> +		mutex_unlock(&data->sensor_lock);
>  
>  		if (!data->irq_enabled) {
>  			data->irq_enabled = true;
>  			enable_irq(data->irq);
>  		}
>  	} else {
> +		mutex_lock(&data->sensor_lock);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->measure_temp_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->power_down_mask);
> +		data->sensor_on = false;
> +		mutex_unlock(&data->sensor_lock);
>  
>  		if (data->irq_enabled) {
>  			disable_irq(data->irq);
> @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  	}
>  
>  	/* Make sure sensor is in known good state for measurements */
> +	mutex_init(&data->sensor_lock);
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>  		     data->socdata->power_down_mask);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  			IMX6_MISC0_REFTOP_SELBIASOFF);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->power_down_mask);
> +	data->sensor_on = false;
> +	mutex_unlock(&data->sensor_lock);
>  
>  	ret = imx_thermal_register_legacy_cooling(data);
>  	if (ret)
> @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  	if (data->socdata->version == TEMPMON_IMX6SX)
>  		imx_set_panic_temp(data, data->temp_critical);
>  
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>  		     data->socdata->power_down_mask);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->measure_temp_mask);
> +	data->sensor_on = true;
> +	mutex_unlock(&data->sensor_lock);
>  
>  	data->irq_enabled = true;
>  	ret = thermal_zone_device_enable(data->tz);
> @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
>  	struct regmap *map = data->tempmon;
>  
>  	/* Disable measurements */
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->power_down_mask);
> +	data->sensor_on = false;
> +	mutex_unlock(&data->sensor_lock);
> +
>  	if (!IS_ERR(data->thermal_clk))
>  		clk_disable_unprepare(data->thermal_clk);
>  
> 


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-16 22:23   ` Daniel Lezcano
@ 2021-10-18  8:36     ` Petr Benes
  -1 siblings, 0 replies; 22+ messages in thread
From: Petr Benes @ 2021-10-18  8:36 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable

On Sun, 17 Oct 2021 at 00:23, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 08/10/2021 10:11, Michal Vokáč wrote:
> > From: Petr Beneš <petr.benes@ysoft.com>
> >
> > SoC temperature readout may not work after thermal alarm fires interrupt.
> > This harms userspace as well as CPU cooling device.
> >
> > Two issues with the logic involved. First, there is no protection against
> > concurent measurements, hence one can switch the sensor off while
> > the other one tries to read temperature later. Second, the interrupt path
> > usually fails. At the end the sensor is powered off and thermal IRQ is
> > disabled. One has to reenable the thermal zone by the sysfs interface.
> >
> > Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
> > driver's local data to decide whether to run a measurement")
>
> Are these troubles observed and reproduced ? Or is it your understanding
> from reading the code ?

Yes, it is observed. We are using:
CONFIG_CPU_THERMAL=y
CONFIG_CPU_FREQ_THERMAL=y
If the SoC temperature oscillates  around the passive trip point, it is not
possible to read out /sys/class/thermal/thermal_zone0/temp after
a while (minutes). For reproduction either heat the device up a bit or set
the passive trip point lower.

>
> get_temp() and tz enable/disable are protected against races in the core
> code via the tz mutex

imx_get_temp() itself doesn't handle concurrent invocations properly, despite
it may seem it does. The sensor is switched on/off without control.
That is a flaw in imx_get_temp().
No evidence the core locking is wrong.

data->irq_enabled is set to false in imx_thermal_alarm_irq() each time
the interrupt arrives. imx_get_temp() does wrong decision based
on the value later.

>
> > It uses data->irq_enabled as the "local data". Indeed, its value is
> > related to the state of the sensor loosely under normal operation and,
> > frankly, gets unleashed when the thermal interrupt arrives.
> >
> > Current patch adds the "local data" (new member sensor_on in
> > imx_thermal_data) and sets its value in controlled manner.>
> > Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
> > Cc: petrben@gmail.com
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
> > Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
> > ---
> >  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
> >  1 file changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
> > index 2c7473d86a59..df5658e21828 100644
> > --- a/drivers/thermal/imx_thermal.c
> > +++ b/drivers/thermal/imx_thermal.c
> > @@ -209,6 +209,8 @@ struct imx_thermal_data {
> >       struct clk *thermal_clk;
> >       const struct thermal_soc_data *socdata;
> >       const char *temp_grade;
> > +     struct mutex sensor_lock;
> > +     bool sensor_on;
> >  };
> >
> >  static void imx_set_panic_temp(struct imx_thermal_data *data,
> > @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
> >       const struct thermal_soc_data *soc_data = data->socdata;
> >       struct regmap *map = data->tempmon;
> >       unsigned int n_meas;
> > -     bool wait, run_measurement;
> > +     bool wait;
> >       u32 val;
> >
> > -     run_measurement = !data->irq_enabled;
> > -     if (!run_measurement) {
> > +     mutex_lock(&data->sensor_lock);
> > +
> > +     if (data->sensor_on) {
> >               /* Check if a measurement is currently in progress */
> >               regmap_read(map, soc_data->temp_data, &val);
> >               wait = !(val & soc_data->temp_valid_mask);
> > @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
> >
> >       regmap_read(map, soc_data->temp_data, &val);
> >
> > -     if (run_measurement) {
> > +     if (!data->sensor_on) {
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->measure_temp_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->power_down_mask);
> >       }
> >
> > +     mutex_unlock(&data->sensor_lock);
> > +
> >       if ((val & soc_data->temp_valid_mask) == 0) {
> >               dev_dbg(&tz->device, "temp measurement never finished\n");
> >               return -EAGAIN;
> > @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
> >       const struct thermal_soc_data *soc_data = data->socdata;
> >
> >       if (mode == THERMAL_DEVICE_ENABLED) {
> > +             mutex_lock(&data->sensor_lock);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->power_down_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->measure_temp_mask);
> > +             data->sensor_on = true;
> > +             mutex_unlock(&data->sensor_lock);
> >
> >               if (!data->irq_enabled) {
> >                       data->irq_enabled = true;
> >                       enable_irq(data->irq);
> >               }
> >       } else {
> > +             mutex_lock(&data->sensor_lock);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->measure_temp_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->power_down_mask);
> > +             data->sensor_on = false;
> > +             mutex_unlock(&data->sensor_lock);
> >
> >               if (data->irq_enabled) {
> >                       disable_irq(data->irq);
> > @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >       }
> >
> >       /* Make sure sensor is in known good state for measurements */
> > +     mutex_init(&data->sensor_lock);
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> >                    data->socdata->power_down_mask);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> > @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >                       IMX6_MISC0_REFTOP_SELBIASOFF);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->power_down_mask);
> > +     data->sensor_on = false;
> > +     mutex_unlock(&data->sensor_lock);
> >
> >       ret = imx_thermal_register_legacy_cooling(data);
> >       if (ret)
> > @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >       if (data->socdata->version == TEMPMON_IMX6SX)
> >               imx_set_panic_temp(data, data->temp_critical);
> >
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> >                    data->socdata->power_down_mask);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->measure_temp_mask);
> > +     data->sensor_on = true;
> > +     mutex_unlock(&data->sensor_lock);
> >
> >       data->irq_enabled = true;
> >       ret = thermal_zone_device_enable(data->tz);
> > @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
> >       struct regmap *map = data->tempmon;
> >
> >       /* Disable measurements */
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->power_down_mask);
> > +     data->sensor_on = false;
> > +     mutex_unlock(&data->sensor_lock);
> > +
> >       if (!IS_ERR(data->thermal_clk))
> >               clk_disable_unprepare(data->thermal_clk);
> >
> >
>
>
> --
> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-18  8:36     ` Petr Benes
  0 siblings, 0 replies; 22+ messages in thread
From: Petr Benes @ 2021-10-18  8:36 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable

On Sun, 17 Oct 2021 at 00:23, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 08/10/2021 10:11, Michal Vokáč wrote:
> > From: Petr Beneš <petr.benes@ysoft.com>
> >
> > SoC temperature readout may not work after thermal alarm fires interrupt.
> > This harms userspace as well as CPU cooling device.
> >
> > Two issues with the logic involved. First, there is no protection against
> > concurent measurements, hence one can switch the sensor off while
> > the other one tries to read temperature later. Second, the interrupt path
> > usually fails. At the end the sensor is powered off and thermal IRQ is
> > disabled. One has to reenable the thermal zone by the sysfs interface.
> >
> > Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
> > driver's local data to decide whether to run a measurement")
>
> Are these troubles observed and reproduced ? Or is it your understanding
> from reading the code ?

Yes, it is observed. We are using:
CONFIG_CPU_THERMAL=y
CONFIG_CPU_FREQ_THERMAL=y
If the SoC temperature oscillates  around the passive trip point, it is not
possible to read out /sys/class/thermal/thermal_zone0/temp after
a while (minutes). For reproduction either heat the device up a bit or set
the passive trip point lower.

>
> get_temp() and tz enable/disable are protected against races in the core
> code via the tz mutex

imx_get_temp() itself doesn't handle concurrent invocations properly, despite
it may seem it does. The sensor is switched on/off without control.
That is a flaw in imx_get_temp().
No evidence the core locking is wrong.

data->irq_enabled is set to false in imx_thermal_alarm_irq() each time
the interrupt arrives. imx_get_temp() does wrong decision based
on the value later.

>
> > It uses data->irq_enabled as the "local data". Indeed, its value is
> > related to the state of the sensor loosely under normal operation and,
> > frankly, gets unleashed when the thermal interrupt arrives.
> >
> > Current patch adds the "local data" (new member sensor_on in
> > imx_thermal_data) and sets its value in controlled manner.>
> > Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
> > Cc: petrben@gmail.com
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
> > Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
> > ---
> >  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
> >  1 file changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
> > index 2c7473d86a59..df5658e21828 100644
> > --- a/drivers/thermal/imx_thermal.c
> > +++ b/drivers/thermal/imx_thermal.c
> > @@ -209,6 +209,8 @@ struct imx_thermal_data {
> >       struct clk *thermal_clk;
> >       const struct thermal_soc_data *socdata;
> >       const char *temp_grade;
> > +     struct mutex sensor_lock;
> > +     bool sensor_on;
> >  };
> >
> >  static void imx_set_panic_temp(struct imx_thermal_data *data,
> > @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
> >       const struct thermal_soc_data *soc_data = data->socdata;
> >       struct regmap *map = data->tempmon;
> >       unsigned int n_meas;
> > -     bool wait, run_measurement;
> > +     bool wait;
> >       u32 val;
> >
> > -     run_measurement = !data->irq_enabled;
> > -     if (!run_measurement) {
> > +     mutex_lock(&data->sensor_lock);
> > +
> > +     if (data->sensor_on) {
> >               /* Check if a measurement is currently in progress */
> >               regmap_read(map, soc_data->temp_data, &val);
> >               wait = !(val & soc_data->temp_valid_mask);
> > @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
> >
> >       regmap_read(map, soc_data->temp_data, &val);
> >
> > -     if (run_measurement) {
> > +     if (!data->sensor_on) {
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->measure_temp_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->power_down_mask);
> >       }
> >
> > +     mutex_unlock(&data->sensor_lock);
> > +
> >       if ((val & soc_data->temp_valid_mask) == 0) {
> >               dev_dbg(&tz->device, "temp measurement never finished\n");
> >               return -EAGAIN;
> > @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
> >       const struct thermal_soc_data *soc_data = data->socdata;
> >
> >       if (mode == THERMAL_DEVICE_ENABLED) {
> > +             mutex_lock(&data->sensor_lock);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->power_down_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->measure_temp_mask);
> > +             data->sensor_on = true;
> > +             mutex_unlock(&data->sensor_lock);
> >
> >               if (!data->irq_enabled) {
> >                       data->irq_enabled = true;
> >                       enable_irq(data->irq);
> >               }
> >       } else {
> > +             mutex_lock(&data->sensor_lock);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->measure_temp_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->power_down_mask);
> > +             data->sensor_on = false;
> > +             mutex_unlock(&data->sensor_lock);
> >
> >               if (data->irq_enabled) {
> >                       disable_irq(data->irq);
> > @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >       }
> >
> >       /* Make sure sensor is in known good state for measurements */
> > +     mutex_init(&data->sensor_lock);
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> >                    data->socdata->power_down_mask);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> > @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >                       IMX6_MISC0_REFTOP_SELBIASOFF);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->power_down_mask);
> > +     data->sensor_on = false;
> > +     mutex_unlock(&data->sensor_lock);
> >
> >       ret = imx_thermal_register_legacy_cooling(data);
> >       if (ret)
> > @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >       if (data->socdata->version == TEMPMON_IMX6SX)
> >               imx_set_panic_temp(data, data->temp_critical);
> >
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> >                    data->socdata->power_down_mask);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->measure_temp_mask);
> > +     data->sensor_on = true;
> > +     mutex_unlock(&data->sensor_lock);
> >
> >       data->irq_enabled = true;
> >       ret = thermal_zone_device_enable(data->tz);
> > @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
> >       struct regmap *map = data->tempmon;
> >
> >       /* Disable measurements */
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->power_down_mask);
> > +     data->sensor_on = false;
> > +     mutex_unlock(&data->sensor_lock);
> > +
> >       if (!IS_ERR(data->thermal_clk))
> >               clk_disable_unprepare(data->thermal_clk);
> >
> >
>
>
> --
> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-18  8:36     ` Petr Benes
@ 2021-10-18  8:43       ` Daniel Lezcano
  -1 siblings, 0 replies; 22+ messages in thread
From: Daniel Lezcano @ 2021-10-18  8:43 UTC (permalink / raw)
  To: Petr Benes
  Cc: Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable

On 18/10/2021 10:36, Petr Benes wrote:
> On Sun, 17 Oct 2021 at 00:23, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>
>> On 08/10/2021 10:11, Michal Vokáč wrote:
>>> From: Petr Beneš <petr.benes@ysoft.com>
>>>
>>> SoC temperature readout may not work after thermal alarm fires interrupt.
>>> This harms userspace as well as CPU cooling device.
>>>
>>> Two issues with the logic involved. First, there is no protection against
>>> concurent measurements, hence one can switch the sensor off while
>>> the other one tries to read temperature later. Second, the interrupt path
>>> usually fails. At the end the sensor is powered off and thermal IRQ is
>>> disabled. One has to reenable the thermal zone by the sysfs interface.
>>>
>>> Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
>>> driver's local data to decide whether to run a measurement")
>>
>> Are these troubles observed and reproduced ? Or is it your understanding
>> from reading the code ?
> 
> Yes, it is observed. We are using:
> CONFIG_CPU_THERMAL=y
> CONFIG_CPU_FREQ_THERMAL=y
> If the SoC temperature oscillates  around the passive trip point, it is not
> possible to read out /sys/class/thermal/thermal_zone0/temp after
> a while (minutes). For reproduction either heat the device up a bit or set
> the passive trip point lower.

Ok, I have an i.MX7 here to resurrect. I'll give a try.


>> get_temp() and tz enable/disable are protected against races in the core
>> code via the tz mutex
> 
> imx_get_temp() itself doesn't handle concurrent invocations properly, despite
> it may seem it does. The sensor is switched on/off without control.
> That is a flaw in imx_get_temp().
> No evidence the core locking is wrong.
> 
> data->irq_enabled is set to false in imx_thermal_alarm_irq() each time
> the interrupt arrives. imx_get_temp() does wrong decision based
> on the value later.
> 
>>
>>> It uses data->irq_enabled as the "local data". Indeed, its value is
>>> related to the state of the sensor loosely under normal operation and,
>>> frankly, gets unleashed when the thermal interrupt arrives.
>>>
>>> Current patch adds the "local data" (new member sensor_on in
>>> imx_thermal_data) and sets its value in controlled manner.>
>>> Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
>>> Cc: petrben@gmail.com
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
>>> Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
>>> ---
>>>  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
>>>  1 file changed, 26 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
>>> index 2c7473d86a59..df5658e21828 100644
>>> --- a/drivers/thermal/imx_thermal.c
>>> +++ b/drivers/thermal/imx_thermal.c
>>> @@ -209,6 +209,8 @@ struct imx_thermal_data {
>>>       struct clk *thermal_clk;
>>>       const struct thermal_soc_data *socdata;
>>>       const char *temp_grade;
>>> +     struct mutex sensor_lock;
>>> +     bool sensor_on;
>>>  };
>>>
>>>  static void imx_set_panic_temp(struct imx_thermal_data *data,
>>> @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>>>       const struct thermal_soc_data *soc_data = data->socdata;
>>>       struct regmap *map = data->tempmon;
>>>       unsigned int n_meas;
>>> -     bool wait, run_measurement;
>>> +     bool wait;
>>>       u32 val;
>>>
>>> -     run_measurement = !data->irq_enabled;
>>> -     if (!run_measurement) {
>>> +     mutex_lock(&data->sensor_lock);
>>> +
>>> +     if (data->sensor_on) {
>>>               /* Check if a measurement is currently in progress */
>>>               regmap_read(map, soc_data->temp_data, &val);
>>>               wait = !(val & soc_data->temp_valid_mask);
>>> @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>>>
>>>       regmap_read(map, soc_data->temp_data, &val);
>>>
>>> -     if (run_measurement) {
>>> +     if (!data->sensor_on) {
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->measure_temp_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->power_down_mask);
>>>       }
>>>
>>> +     mutex_unlock(&data->sensor_lock);
>>> +
>>>       if ((val & soc_data->temp_valid_mask) == 0) {
>>>               dev_dbg(&tz->device, "temp measurement never finished\n");
>>>               return -EAGAIN;
>>> @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
>>>       const struct thermal_soc_data *soc_data = data->socdata;
>>>
>>>       if (mode == THERMAL_DEVICE_ENABLED) {
>>> +             mutex_lock(&data->sensor_lock);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->power_down_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->measure_temp_mask);
>>> +             data->sensor_on = true;
>>> +             mutex_unlock(&data->sensor_lock);
>>>
>>>               if (!data->irq_enabled) {
>>>                       data->irq_enabled = true;
>>>                       enable_irq(data->irq);
>>>               }
>>>       } else {
>>> +             mutex_lock(&data->sensor_lock);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->measure_temp_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->power_down_mask);
>>> +             data->sensor_on = false;
>>> +             mutex_unlock(&data->sensor_lock);
>>>
>>>               if (data->irq_enabled) {
>>>                       disable_irq(data->irq);
>>> @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>       }
>>>
>>>       /* Make sure sensor is in known good state for measurements */
>>> +     mutex_init(&data->sensor_lock);
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>>                    data->socdata->power_down_mask);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>> @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>                       IMX6_MISC0_REFTOP_SELBIASOFF);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->power_down_mask);
>>> +     data->sensor_on = false;
>>> +     mutex_unlock(&data->sensor_lock);
>>>
>>>       ret = imx_thermal_register_legacy_cooling(data);
>>>       if (ret)
>>> @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>       if (data->socdata->version == TEMPMON_IMX6SX)
>>>               imx_set_panic_temp(data, data->temp_critical);
>>>
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>>                    data->socdata->power_down_mask);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->measure_temp_mask);
>>> +     data->sensor_on = true;
>>> +     mutex_unlock(&data->sensor_lock);
>>>
>>>       data->irq_enabled = true;
>>>       ret = thermal_zone_device_enable(data->tz);
>>> @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
>>>       struct regmap *map = data->tempmon;
>>>
>>>       /* Disable measurements */
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->power_down_mask);
>>> +     data->sensor_on = false;
>>> +     mutex_unlock(&data->sensor_lock);
>>> +
>>>       if (!IS_ERR(data->thermal_clk))
>>>               clk_disable_unprepare(data->thermal_clk);
>>>
>>>
>>
>>
>> --
>> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>>
>> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
>> <http://twitter.com/#!/linaroorg> Twitter |
>> <http://www.linaro.org/linaro-blog/> Blog


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-18  8:43       ` Daniel Lezcano
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Lezcano @ 2021-10-18  8:43 UTC (permalink / raw)
  To: Petr Benes
  Cc: Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable

On 18/10/2021 10:36, Petr Benes wrote:
> On Sun, 17 Oct 2021 at 00:23, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>
>> On 08/10/2021 10:11, Michal Vokáč wrote:
>>> From: Petr Beneš <petr.benes@ysoft.com>
>>>
>>> SoC temperature readout may not work after thermal alarm fires interrupt.
>>> This harms userspace as well as CPU cooling device.
>>>
>>> Two issues with the logic involved. First, there is no protection against
>>> concurent measurements, hence one can switch the sensor off while
>>> the other one tries to read temperature later. Second, the interrupt path
>>> usually fails. At the end the sensor is powered off and thermal IRQ is
>>> disabled. One has to reenable the thermal zone by the sysfs interface.
>>>
>>> Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
>>> driver's local data to decide whether to run a measurement")
>>
>> Are these troubles observed and reproduced ? Or is it your understanding
>> from reading the code ?
> 
> Yes, it is observed. We are using:
> CONFIG_CPU_THERMAL=y
> CONFIG_CPU_FREQ_THERMAL=y
> If the SoC temperature oscillates  around the passive trip point, it is not
> possible to read out /sys/class/thermal/thermal_zone0/temp after
> a while (minutes). For reproduction either heat the device up a bit or set
> the passive trip point lower.

Ok, I have an i.MX7 here to resurrect. I'll give a try.


>> get_temp() and tz enable/disable are protected against races in the core
>> code via the tz mutex
> 
> imx_get_temp() itself doesn't handle concurrent invocations properly, despite
> it may seem it does. The sensor is switched on/off without control.
> That is a flaw in imx_get_temp().
> No evidence the core locking is wrong.
> 
> data->irq_enabled is set to false in imx_thermal_alarm_irq() each time
> the interrupt arrives. imx_get_temp() does wrong decision based
> on the value later.
> 
>>
>>> It uses data->irq_enabled as the "local data". Indeed, its value is
>>> related to the state of the sensor loosely under normal operation and,
>>> frankly, gets unleashed when the thermal interrupt arrives.
>>>
>>> Current patch adds the "local data" (new member sensor_on in
>>> imx_thermal_data) and sets its value in controlled manner.>
>>> Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
>>> Cc: petrben@gmail.com
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
>>> Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
>>> ---
>>>  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
>>>  1 file changed, 26 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
>>> index 2c7473d86a59..df5658e21828 100644
>>> --- a/drivers/thermal/imx_thermal.c
>>> +++ b/drivers/thermal/imx_thermal.c
>>> @@ -209,6 +209,8 @@ struct imx_thermal_data {
>>>       struct clk *thermal_clk;
>>>       const struct thermal_soc_data *socdata;
>>>       const char *temp_grade;
>>> +     struct mutex sensor_lock;
>>> +     bool sensor_on;
>>>  };
>>>
>>>  static void imx_set_panic_temp(struct imx_thermal_data *data,
>>> @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>>>       const struct thermal_soc_data *soc_data = data->socdata;
>>>       struct regmap *map = data->tempmon;
>>>       unsigned int n_meas;
>>> -     bool wait, run_measurement;
>>> +     bool wait;
>>>       u32 val;
>>>
>>> -     run_measurement = !data->irq_enabled;
>>> -     if (!run_measurement) {
>>> +     mutex_lock(&data->sensor_lock);
>>> +
>>> +     if (data->sensor_on) {
>>>               /* Check if a measurement is currently in progress */
>>>               regmap_read(map, soc_data->temp_data, &val);
>>>               wait = !(val & soc_data->temp_valid_mask);
>>> @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>>>
>>>       regmap_read(map, soc_data->temp_data, &val);
>>>
>>> -     if (run_measurement) {
>>> +     if (!data->sensor_on) {
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->measure_temp_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->power_down_mask);
>>>       }
>>>
>>> +     mutex_unlock(&data->sensor_lock);
>>> +
>>>       if ((val & soc_data->temp_valid_mask) == 0) {
>>>               dev_dbg(&tz->device, "temp measurement never finished\n");
>>>               return -EAGAIN;
>>> @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
>>>       const struct thermal_soc_data *soc_data = data->socdata;
>>>
>>>       if (mode == THERMAL_DEVICE_ENABLED) {
>>> +             mutex_lock(&data->sensor_lock);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->power_down_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->measure_temp_mask);
>>> +             data->sensor_on = true;
>>> +             mutex_unlock(&data->sensor_lock);
>>>
>>>               if (!data->irq_enabled) {
>>>                       data->irq_enabled = true;
>>>                       enable_irq(data->irq);
>>>               }
>>>       } else {
>>> +             mutex_lock(&data->sensor_lock);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->measure_temp_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->power_down_mask);
>>> +             data->sensor_on = false;
>>> +             mutex_unlock(&data->sensor_lock);
>>>
>>>               if (data->irq_enabled) {
>>>                       disable_irq(data->irq);
>>> @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>       }
>>>
>>>       /* Make sure sensor is in known good state for measurements */
>>> +     mutex_init(&data->sensor_lock);
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>>                    data->socdata->power_down_mask);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>> @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>                       IMX6_MISC0_REFTOP_SELBIASOFF);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->power_down_mask);
>>> +     data->sensor_on = false;
>>> +     mutex_unlock(&data->sensor_lock);
>>>
>>>       ret = imx_thermal_register_legacy_cooling(data);
>>>       if (ret)
>>> @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>       if (data->socdata->version == TEMPMON_IMX6SX)
>>>               imx_set_panic_temp(data, data->temp_critical);
>>>
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>>                    data->socdata->power_down_mask);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->measure_temp_mask);
>>> +     data->sensor_on = true;
>>> +     mutex_unlock(&data->sensor_lock);
>>>
>>>       data->irq_enabled = true;
>>>       ret = thermal_zone_device_enable(data->tz);
>>> @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
>>>       struct regmap *map = data->tempmon;
>>>
>>>       /* Disable measurements */
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->power_down_mask);
>>> +     data->sensor_on = false;
>>> +     mutex_unlock(&data->sensor_lock);
>>> +
>>>       if (!IS_ERR(data->thermal_clk))
>>>               clk_disable_unprepare(data->thermal_clk);
>>>
>>>
>>
>>
>> --
>> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>>
>> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
>> <http://twitter.com/#!/linaroorg> Twitter |
>> <http://www.linaro.org/linaro-blog/> Blog


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-18  8:36     ` Petr Benes
@ 2021-10-18 11:11       ` Daniel Lezcano
  -1 siblings, 0 replies; 22+ messages in thread
From: Daniel Lezcano @ 2021-10-18 11:11 UTC (permalink / raw)
  To: Petr Benes
  Cc: Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable


I'm not seeing any thermal zone definitions in the DT for imx6 and imx7.

Am I missing something ?


On 18/10/2021 10:36, Petr Benes wrote:
> On Sun, 17 Oct 2021 at 00:23, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>
>> On 08/10/2021 10:11, Michal Vokáč wrote:
>>> From: Petr Beneš <petr.benes@ysoft.com>
>>>
>>> SoC temperature readout may not work after thermal alarm fires interrupt.
>>> This harms userspace as well as CPU cooling device.
>>>
>>> Two issues with the logic involved. First, there is no protection against
>>> concurent measurements, hence one can switch the sensor off while
>>> the other one tries to read temperature later. Second, the interrupt path
>>> usually fails. At the end the sensor is powered off and thermal IRQ is
>>> disabled. One has to reenable the thermal zone by the sysfs interface.
>>>
>>> Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
>>> driver's local data to decide whether to run a measurement")
>>
>> Are these troubles observed and reproduced ? Or is it your understanding
>> from reading the code ?
> 
> Yes, it is observed. We are using:
> CONFIG_CPU_THERMAL=y
> CONFIG_CPU_FREQ_THERMAL=y
> If the SoC temperature oscillates  around the passive trip point, it is not
> possible to read out /sys/class/thermal/thermal_zone0/temp after
> a while (minutes). For reproduction either heat the device up a bit or set
> the passive trip point lower.
> 
>>
>> get_temp() and tz enable/disable are protected against races in the core
>> code via the tz mutex
> 
> imx_get_temp() itself doesn't handle concurrent invocations properly, despite
> it may seem it does. The sensor is switched on/off without control.
> That is a flaw in imx_get_temp().
> No evidence the core locking is wrong.
> 
> data->irq_enabled is set to false in imx_thermal_alarm_irq() each time
> the interrupt arrives. imx_get_temp() does wrong decision based
> on the value later.
> 
>>
>>> It uses data->irq_enabled as the "local data". Indeed, its value is
>>> related to the state of the sensor loosely under normal operation and,
>>> frankly, gets unleashed when the thermal interrupt arrives.
>>>
>>> Current patch adds the "local data" (new member sensor_on in
>>> imx_thermal_data) and sets its value in controlled manner.>
>>> Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
>>> Cc: petrben@gmail.com
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
>>> Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
>>> ---
>>>  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
>>>  1 file changed, 26 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
>>> index 2c7473d86a59..df5658e21828 100644
>>> --- a/drivers/thermal/imx_thermal.c
>>> +++ b/drivers/thermal/imx_thermal.c
>>> @@ -209,6 +209,8 @@ struct imx_thermal_data {
>>>       struct clk *thermal_clk;
>>>       const struct thermal_soc_data *socdata;
>>>       const char *temp_grade;
>>> +     struct mutex sensor_lock;
>>> +     bool sensor_on;
>>>  };
>>>
>>>  static void imx_set_panic_temp(struct imx_thermal_data *data,
>>> @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>>>       const struct thermal_soc_data *soc_data = data->socdata;
>>>       struct regmap *map = data->tempmon;
>>>       unsigned int n_meas;
>>> -     bool wait, run_measurement;
>>> +     bool wait;
>>>       u32 val;
>>>
>>> -     run_measurement = !data->irq_enabled;
>>> -     if (!run_measurement) {
>>> +     mutex_lock(&data->sensor_lock);
>>> +
>>> +     if (data->sensor_on) {
>>>               /* Check if a measurement is currently in progress */
>>>               regmap_read(map, soc_data->temp_data, &val);
>>>               wait = !(val & soc_data->temp_valid_mask);
>>> @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>>>
>>>       regmap_read(map, soc_data->temp_data, &val);
>>>
>>> -     if (run_measurement) {
>>> +     if (!data->sensor_on) {
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->measure_temp_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->power_down_mask);
>>>       }
>>>
>>> +     mutex_unlock(&data->sensor_lock);
>>> +
>>>       if ((val & soc_data->temp_valid_mask) == 0) {
>>>               dev_dbg(&tz->device, "temp measurement never finished\n");
>>>               return -EAGAIN;
>>> @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
>>>       const struct thermal_soc_data *soc_data = data->socdata;
>>>
>>>       if (mode == THERMAL_DEVICE_ENABLED) {
>>> +             mutex_lock(&data->sensor_lock);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->power_down_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->measure_temp_mask);
>>> +             data->sensor_on = true;
>>> +             mutex_unlock(&data->sensor_lock);
>>>
>>>               if (!data->irq_enabled) {
>>>                       data->irq_enabled = true;
>>>                       enable_irq(data->irq);
>>>               }
>>>       } else {
>>> +             mutex_lock(&data->sensor_lock);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->measure_temp_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->power_down_mask);
>>> +             data->sensor_on = false;
>>> +             mutex_unlock(&data->sensor_lock);
>>>
>>>               if (data->irq_enabled) {
>>>                       disable_irq(data->irq);
>>> @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>       }
>>>
>>>       /* Make sure sensor is in known good state for measurements */
>>> +     mutex_init(&data->sensor_lock);
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>>                    data->socdata->power_down_mask);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>> @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>                       IMX6_MISC0_REFTOP_SELBIASOFF);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->power_down_mask);
>>> +     data->sensor_on = false;
>>> +     mutex_unlock(&data->sensor_lock);
>>>
>>>       ret = imx_thermal_register_legacy_cooling(data);
>>>       if (ret)
>>> @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>       if (data->socdata->version == TEMPMON_IMX6SX)
>>>               imx_set_panic_temp(data, data->temp_critical);
>>>
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>>                    data->socdata->power_down_mask);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->measure_temp_mask);
>>> +     data->sensor_on = true;
>>> +     mutex_unlock(&data->sensor_lock);
>>>
>>>       data->irq_enabled = true;
>>>       ret = thermal_zone_device_enable(data->tz);
>>> @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
>>>       struct regmap *map = data->tempmon;
>>>
>>>       /* Disable measurements */
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->power_down_mask);
>>> +     data->sensor_on = false;
>>> +     mutex_unlock(&data->sensor_lock);
>>> +
>>>       if (!IS_ERR(data->thermal_clk))
>>>               clk_disable_unprepare(data->thermal_clk);
>>>
>>>
>>
>>
>> --
>> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>>
>> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
>> <http://twitter.com/#!/linaroorg> Twitter |
>> <http://www.linaro.org/linaro-blog/> Blog


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-18 11:11       ` Daniel Lezcano
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Lezcano @ 2021-10-18 11:11 UTC (permalink / raw)
  To: Petr Benes
  Cc: Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable


I'm not seeing any thermal zone definitions in the DT for imx6 and imx7.

Am I missing something ?


On 18/10/2021 10:36, Petr Benes wrote:
> On Sun, 17 Oct 2021 at 00:23, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>
>> On 08/10/2021 10:11, Michal Vokáč wrote:
>>> From: Petr Beneš <petr.benes@ysoft.com>
>>>
>>> SoC temperature readout may not work after thermal alarm fires interrupt.
>>> This harms userspace as well as CPU cooling device.
>>>
>>> Two issues with the logic involved. First, there is no protection against
>>> concurent measurements, hence one can switch the sensor off while
>>> the other one tries to read temperature later. Second, the interrupt path
>>> usually fails. At the end the sensor is powered off and thermal IRQ is
>>> disabled. One has to reenable the thermal zone by the sysfs interface.
>>>
>>> Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
>>> driver's local data to decide whether to run a measurement")
>>
>> Are these troubles observed and reproduced ? Or is it your understanding
>> from reading the code ?
> 
> Yes, it is observed. We are using:
> CONFIG_CPU_THERMAL=y
> CONFIG_CPU_FREQ_THERMAL=y
> If the SoC temperature oscillates  around the passive trip point, it is not
> possible to read out /sys/class/thermal/thermal_zone0/temp after
> a while (minutes). For reproduction either heat the device up a bit or set
> the passive trip point lower.
> 
>>
>> get_temp() and tz enable/disable are protected against races in the core
>> code via the tz mutex
> 
> imx_get_temp() itself doesn't handle concurrent invocations properly, despite
> it may seem it does. The sensor is switched on/off without control.
> That is a flaw in imx_get_temp().
> No evidence the core locking is wrong.
> 
> data->irq_enabled is set to false in imx_thermal_alarm_irq() each time
> the interrupt arrives. imx_get_temp() does wrong decision based
> on the value later.
> 
>>
>>> It uses data->irq_enabled as the "local data". Indeed, its value is
>>> related to the state of the sensor loosely under normal operation and,
>>> frankly, gets unleashed when the thermal interrupt arrives.
>>>
>>> Current patch adds the "local data" (new member sensor_on in
>>> imx_thermal_data) and sets its value in controlled manner.>
>>> Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
>>> Cc: petrben@gmail.com
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
>>> Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
>>> ---
>>>  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
>>>  1 file changed, 26 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
>>> index 2c7473d86a59..df5658e21828 100644
>>> --- a/drivers/thermal/imx_thermal.c
>>> +++ b/drivers/thermal/imx_thermal.c
>>> @@ -209,6 +209,8 @@ struct imx_thermal_data {
>>>       struct clk *thermal_clk;
>>>       const struct thermal_soc_data *socdata;
>>>       const char *temp_grade;
>>> +     struct mutex sensor_lock;
>>> +     bool sensor_on;
>>>  };
>>>
>>>  static void imx_set_panic_temp(struct imx_thermal_data *data,
>>> @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>>>       const struct thermal_soc_data *soc_data = data->socdata;
>>>       struct regmap *map = data->tempmon;
>>>       unsigned int n_meas;
>>> -     bool wait, run_measurement;
>>> +     bool wait;
>>>       u32 val;
>>>
>>> -     run_measurement = !data->irq_enabled;
>>> -     if (!run_measurement) {
>>> +     mutex_lock(&data->sensor_lock);
>>> +
>>> +     if (data->sensor_on) {
>>>               /* Check if a measurement is currently in progress */
>>>               regmap_read(map, soc_data->temp_data, &val);
>>>               wait = !(val & soc_data->temp_valid_mask);
>>> @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>>>
>>>       regmap_read(map, soc_data->temp_data, &val);
>>>
>>> -     if (run_measurement) {
>>> +     if (!data->sensor_on) {
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->measure_temp_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->power_down_mask);
>>>       }
>>>
>>> +     mutex_unlock(&data->sensor_lock);
>>> +
>>>       if ((val & soc_data->temp_valid_mask) == 0) {
>>>               dev_dbg(&tz->device, "temp measurement never finished\n");
>>>               return -EAGAIN;
>>> @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
>>>       const struct thermal_soc_data *soc_data = data->socdata;
>>>
>>>       if (mode == THERMAL_DEVICE_ENABLED) {
>>> +             mutex_lock(&data->sensor_lock);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->power_down_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->measure_temp_mask);
>>> +             data->sensor_on = true;
>>> +             mutex_unlock(&data->sensor_lock);
>>>
>>>               if (!data->irq_enabled) {
>>>                       data->irq_enabled = true;
>>>                       enable_irq(data->irq);
>>>               }
>>>       } else {
>>> +             mutex_lock(&data->sensor_lock);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>>>                            soc_data->measure_temp_mask);
>>>               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>>>                            soc_data->power_down_mask);
>>> +             data->sensor_on = false;
>>> +             mutex_unlock(&data->sensor_lock);
>>>
>>>               if (data->irq_enabled) {
>>>                       disable_irq(data->irq);
>>> @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>       }
>>>
>>>       /* Make sure sensor is in known good state for measurements */
>>> +     mutex_init(&data->sensor_lock);
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>>                    data->socdata->power_down_mask);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>> @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>                       IMX6_MISC0_REFTOP_SELBIASOFF);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->power_down_mask);
>>> +     data->sensor_on = false;
>>> +     mutex_unlock(&data->sensor_lock);
>>>
>>>       ret = imx_thermal_register_legacy_cooling(data);
>>>       if (ret)
>>> @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
>>>       if (data->socdata->version == TEMPMON_IMX6SX)
>>>               imx_set_panic_temp(data, data->temp_critical);
>>>
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>>>                    data->socdata->power_down_mask);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->measure_temp_mask);
>>> +     data->sensor_on = true;
>>> +     mutex_unlock(&data->sensor_lock);
>>>
>>>       data->irq_enabled = true;
>>>       ret = thermal_zone_device_enable(data->tz);
>>> @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
>>>       struct regmap *map = data->tempmon;
>>>
>>>       /* Disable measurements */
>>> +     mutex_lock(&data->sensor_lock);
>>>       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>>>                    data->socdata->power_down_mask);
>>> +     data->sensor_on = false;
>>> +     mutex_unlock(&data->sensor_lock);
>>> +
>>>       if (!IS_ERR(data->thermal_clk))
>>>               clk_disable_unprepare(data->thermal_clk);
>>>
>>>
>>
>>
>> --
>> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>>
>> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
>> <http://twitter.com/#!/linaroorg> Twitter |
>> <http://www.linaro.org/linaro-blog/> Blog


-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-08  8:11 ` Michal Vokáč
@ 2021-10-18 11:28   ` Oleksij Rempel
  -1 siblings, 0 replies; 22+ messages in thread
From: Oleksij Rempel @ 2021-10-18 11:28 UTC (permalink / raw)
  To: Michal Vokáč
  Cc: Andrzej Pietrasiewicz, linux-pm, Daniel Lezcano, Shawn Guo,
	Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš,
	petrben, stable

Hi Michal,

I hope you have seen this patch:
https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/

Are there any reason why this was ignored?

On Fri, Oct 08, 2021 at 10:11:37AM +0200, Michal Vokáč wrote:
> From: Petr Beneš <petr.benes@ysoft.com>
> 
> SoC temperature readout may not work after thermal alarm fires interrupt.
> This harms userspace as well as CPU cooling device.
> 
> Two issues with the logic involved. First, there is no protection against
> concurent measurements, hence one can switch the sensor off while
> the other one tries to read temperature later. Second, the interrupt path
> usually fails. At the end the sensor is powered off and thermal IRQ is
> disabled. One has to reenable the thermal zone by the sysfs interface.
> 
> Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
> driver's local data to decide whether to run a measurement")
> 
> It uses data->irq_enabled as the "local data". Indeed, its value is
> related to the state of the sensor loosely under normal operation and,
> frankly, gets unleashed when the thermal interrupt arrives.
> 
> Current patch adds the "local data" (new member sensor_on in
> imx_thermal_data) and sets its value in controlled manner.
> 
> Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
> Cc: petrben@gmail.com
> Cc: stable@vger.kernel.org
> Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
> Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
> ---
>  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
>  1 file changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
> index 2c7473d86a59..df5658e21828 100644
> --- a/drivers/thermal/imx_thermal.c
> +++ b/drivers/thermal/imx_thermal.c
> @@ -209,6 +209,8 @@ struct imx_thermal_data {
>  	struct clk *thermal_clk;
>  	const struct thermal_soc_data *socdata;
>  	const char *temp_grade;
> +	struct mutex sensor_lock;
> +	bool sensor_on;
>  };
>  
>  static void imx_set_panic_temp(struct imx_thermal_data *data,
> @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>  	const struct thermal_soc_data *soc_data = data->socdata;
>  	struct regmap *map = data->tempmon;
>  	unsigned int n_meas;
> -	bool wait, run_measurement;
> +	bool wait;
>  	u32 val;
>  
> -	run_measurement = !data->irq_enabled;
> -	if (!run_measurement) {
> +	mutex_lock(&data->sensor_lock);
> +
> +	if (data->sensor_on) {
>  		/* Check if a measurement is currently in progress */
>  		regmap_read(map, soc_data->temp_data, &val);
>  		wait = !(val & soc_data->temp_valid_mask);
> @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>  
>  	regmap_read(map, soc_data->temp_data, &val);
>  
> -	if (run_measurement) {
> +	if (!data->sensor_on) {
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->measure_temp_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->power_down_mask);
>  	}
>  
> +	mutex_unlock(&data->sensor_lock);
> +
>  	if ((val & soc_data->temp_valid_mask) == 0) {
>  		dev_dbg(&tz->device, "temp measurement never finished\n");
>  		return -EAGAIN;
> @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
>  	const struct thermal_soc_data *soc_data = data->socdata;
>  
>  	if (mode == THERMAL_DEVICE_ENABLED) {
> +		mutex_lock(&data->sensor_lock);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->power_down_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->measure_temp_mask);
> +		data->sensor_on = true;
> +		mutex_unlock(&data->sensor_lock);
>  
>  		if (!data->irq_enabled) {
>  			data->irq_enabled = true;
>  			enable_irq(data->irq);
>  		}
>  	} else {
> +		mutex_lock(&data->sensor_lock);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->measure_temp_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->power_down_mask);
> +		data->sensor_on = false;
> +		mutex_unlock(&data->sensor_lock);
>  
>  		if (data->irq_enabled) {
>  			disable_irq(data->irq);
> @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  	}
>  
>  	/* Make sure sensor is in known good state for measurements */
> +	mutex_init(&data->sensor_lock);
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>  		     data->socdata->power_down_mask);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  			IMX6_MISC0_REFTOP_SELBIASOFF);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->power_down_mask);
> +	data->sensor_on = false;
> +	mutex_unlock(&data->sensor_lock);
>  
>  	ret = imx_thermal_register_legacy_cooling(data);
>  	if (ret)
> @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  	if (data->socdata->version == TEMPMON_IMX6SX)
>  		imx_set_panic_temp(data, data->temp_critical);
>  
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>  		     data->socdata->power_down_mask);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->measure_temp_mask);
> +	data->sensor_on = true;
> +	mutex_unlock(&data->sensor_lock);
>  
>  	data->irq_enabled = true;
>  	ret = thermal_zone_device_enable(data->tz);
> @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
>  	struct regmap *map = data->tempmon;
>  
>  	/* Disable measurements */
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->power_down_mask);
> +	data->sensor_on = false;
> +	mutex_unlock(&data->sensor_lock);
> +
>  	if (!IS_ERR(data->thermal_clk))
>  		clk_disable_unprepare(data->thermal_clk);
>  
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-18 11:28   ` Oleksij Rempel
  0 siblings, 0 replies; 22+ messages in thread
From: Oleksij Rempel @ 2021-10-18 11:28 UTC (permalink / raw)
  To: Michal Vokáč
  Cc: Andrzej Pietrasiewicz, linux-pm, Daniel Lezcano, Shawn Guo,
	Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš,
	petrben, stable

Hi Michal,

I hope you have seen this patch:
https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/

Are there any reason why this was ignored?

On Fri, Oct 08, 2021 at 10:11:37AM +0200, Michal Vokáč wrote:
> From: Petr Beneš <petr.benes@ysoft.com>
> 
> SoC temperature readout may not work after thermal alarm fires interrupt.
> This harms userspace as well as CPU cooling device.
> 
> Two issues with the logic involved. First, there is no protection against
> concurent measurements, hence one can switch the sensor off while
> the other one tries to read temperature later. Second, the interrupt path
> usually fails. At the end the sensor is powered off and thermal IRQ is
> disabled. One has to reenable the thermal zone by the sysfs interface.
> 
> Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
> driver's local data to decide whether to run a measurement")
> 
> It uses data->irq_enabled as the "local data". Indeed, its value is
> related to the state of the sensor loosely under normal operation and,
> frankly, gets unleashed when the thermal interrupt arrives.
> 
> Current patch adds the "local data" (new member sensor_on in
> imx_thermal_data) and sets its value in controlled manner.
> 
> Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
> Cc: petrben@gmail.com
> Cc: stable@vger.kernel.org
> Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
> Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
> ---
>  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
>  1 file changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
> index 2c7473d86a59..df5658e21828 100644
> --- a/drivers/thermal/imx_thermal.c
> +++ b/drivers/thermal/imx_thermal.c
> @@ -209,6 +209,8 @@ struct imx_thermal_data {
>  	struct clk *thermal_clk;
>  	const struct thermal_soc_data *socdata;
>  	const char *temp_grade;
> +	struct mutex sensor_lock;
> +	bool sensor_on;
>  };
>  
>  static void imx_set_panic_temp(struct imx_thermal_data *data,
> @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>  	const struct thermal_soc_data *soc_data = data->socdata;
>  	struct regmap *map = data->tempmon;
>  	unsigned int n_meas;
> -	bool wait, run_measurement;
> +	bool wait;
>  	u32 val;
>  
> -	run_measurement = !data->irq_enabled;
> -	if (!run_measurement) {
> +	mutex_lock(&data->sensor_lock);
> +
> +	if (data->sensor_on) {
>  		/* Check if a measurement is currently in progress */
>  		regmap_read(map, soc_data->temp_data, &val);
>  		wait = !(val & soc_data->temp_valid_mask);
> @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
>  
>  	regmap_read(map, soc_data->temp_data, &val);
>  
> -	if (run_measurement) {
> +	if (!data->sensor_on) {
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->measure_temp_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->power_down_mask);
>  	}
>  
> +	mutex_unlock(&data->sensor_lock);
> +
>  	if ((val & soc_data->temp_valid_mask) == 0) {
>  		dev_dbg(&tz->device, "temp measurement never finished\n");
>  		return -EAGAIN;
> @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
>  	const struct thermal_soc_data *soc_data = data->socdata;
>  
>  	if (mode == THERMAL_DEVICE_ENABLED) {
> +		mutex_lock(&data->sensor_lock);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->power_down_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->measure_temp_mask);
> +		data->sensor_on = true;
> +		mutex_unlock(&data->sensor_lock);
>  
>  		if (!data->irq_enabled) {
>  			data->irq_enabled = true;
>  			enable_irq(data->irq);
>  		}
>  	} else {
> +		mutex_lock(&data->sensor_lock);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
>  			     soc_data->measure_temp_mask);
>  		regmap_write(map, soc_data->sensor_ctrl + REG_SET,
>  			     soc_data->power_down_mask);
> +		data->sensor_on = false;
> +		mutex_unlock(&data->sensor_lock);
>  
>  		if (data->irq_enabled) {
>  			disable_irq(data->irq);
> @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  	}
>  
>  	/* Make sure sensor is in known good state for measurements */
> +	mutex_init(&data->sensor_lock);
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>  		     data->socdata->power_down_mask);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  			IMX6_MISC0_REFTOP_SELBIASOFF);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->power_down_mask);
> +	data->sensor_on = false;
> +	mutex_unlock(&data->sensor_lock);
>  
>  	ret = imx_thermal_register_legacy_cooling(data);
>  	if (ret)
> @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
>  	if (data->socdata->version == TEMPMON_IMX6SX)
>  		imx_set_panic_temp(data, data->temp_critical);
>  
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
>  		     data->socdata->power_down_mask);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->measure_temp_mask);
> +	data->sensor_on = true;
> +	mutex_unlock(&data->sensor_lock);
>  
>  	data->irq_enabled = true;
>  	ret = thermal_zone_device_enable(data->tz);
> @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
>  	struct regmap *map = data->tempmon;
>  
>  	/* Disable measurements */
> +	mutex_lock(&data->sensor_lock);
>  	regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
>  		     data->socdata->power_down_mask);
> +	data->sensor_on = false;
> +	mutex_unlock(&data->sensor_lock);
> +
>  	if (!IS_ERR(data->thermal_clk))
>  		clk_disable_unprepare(data->thermal_clk);
>  
> -- 
> 2.25.1
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-18 11:28   ` Oleksij Rempel
@ 2021-10-18 11:38     ` Daniel Lezcano
  -1 siblings, 0 replies; 22+ messages in thread
From: Daniel Lezcano @ 2021-10-18 11:38 UTC (permalink / raw)
  To: Oleksij Rempel, Michal Vokáč
  Cc: Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	petrben, stable

On 18/10/2021 13:28, Oleksij Rempel wrote:
> Hi Michal,
> 
> I hope you have seen this patch:
> https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
> 
> Are there any reason why this was ignored?

No reasons, I was waiting for some tags before merging it. But I forget
about it when reviewing the current patch.



-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-18 11:38     ` Daniel Lezcano
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Lezcano @ 2021-10-18 11:38 UTC (permalink / raw)
  To: Oleksij Rempel, Michal Vokáč
  Cc: Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	petrben, stable

On 18/10/2021 13:28, Oleksij Rempel wrote:
> Hi Michal,
> 
> I hope you have seen this patch:
> https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
> 
> Are there any reason why this was ignored?

No reasons, I was waiting for some tags before merging it. But I forget
about it when reviewing the current patch.



-- 
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-18 11:28   ` Oleksij Rempel
@ 2021-10-18 11:41     ` Petr Benes
  -1 siblings, 0 replies; 22+ messages in thread
From: Petr Benes @ 2021-10-18 11:41 UTC (permalink / raw)
  To: Oleksij Rempel
  Cc: Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Daniel Lezcano, Shawn Guo,
	Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš,
	stable

On Mon, 18 Oct 2021 at 13:28, Oleksij Rempel <o.rempel@pengutronix.de> wrote:
>
> Hi Michal,
>
> I hope you have seen this patch:
> https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
>
> Are there any reason why this was ignored?

Missed completely. It looks it addresses the problem more systematically.

>
> On Fri, Oct 08, 2021 at 10:11:37AM +0200, Michal Vokáč wrote:
> > From: Petr Beneš <petr.benes@ysoft.com>
> >
> > SoC temperature readout may not work after thermal alarm fires interrupt.
> > This harms userspace as well as CPU cooling device.
> >
> > Two issues with the logic involved. First, there is no protection against
> > concurent measurements, hence one can switch the sensor off while
> > the other one tries to read temperature later. Second, the interrupt path
> > usually fails. At the end the sensor is powered off and thermal IRQ is
> > disabled. One has to reenable the thermal zone by the sysfs interface.
> >
> > Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
> > driver's local data to decide whether to run a measurement")
> >
> > It uses data->irq_enabled as the "local data". Indeed, its value is
> > related to the state of the sensor loosely under normal operation and,
> > frankly, gets unleashed when the thermal interrupt arrives.
> >
> > Current patch adds the "local data" (new member sensor_on in
> > imx_thermal_data) and sets its value in controlled manner.
> >
> > Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
> > Cc: petrben@gmail.com
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
> > Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
> > ---
> >  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
> >  1 file changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
> > index 2c7473d86a59..df5658e21828 100644
> > --- a/drivers/thermal/imx_thermal.c
> > +++ b/drivers/thermal/imx_thermal.c
> > @@ -209,6 +209,8 @@ struct imx_thermal_data {
> >       struct clk *thermal_clk;
> >       const struct thermal_soc_data *socdata;
> >       const char *temp_grade;
> > +     struct mutex sensor_lock;
> > +     bool sensor_on;
> >  };
> >
> >  static void imx_set_panic_temp(struct imx_thermal_data *data,
> > @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
> >       const struct thermal_soc_data *soc_data = data->socdata;
> >       struct regmap *map = data->tempmon;
> >       unsigned int n_meas;
> > -     bool wait, run_measurement;
> > +     bool wait;
> >       u32 val;
> >
> > -     run_measurement = !data->irq_enabled;
> > -     if (!run_measurement) {
> > +     mutex_lock(&data->sensor_lock);
> > +
> > +     if (data->sensor_on) {
> >               /* Check if a measurement is currently in progress */
> >               regmap_read(map, soc_data->temp_data, &val);
> >               wait = !(val & soc_data->temp_valid_mask);
> > @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
> >
> >       regmap_read(map, soc_data->temp_data, &val);
> >
> > -     if (run_measurement) {
> > +     if (!data->sensor_on) {
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->measure_temp_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->power_down_mask);
> >       }
> >
> > +     mutex_unlock(&data->sensor_lock);
> > +
> >       if ((val & soc_data->temp_valid_mask) == 0) {
> >               dev_dbg(&tz->device, "temp measurement never finished\n");
> >               return -EAGAIN;
> > @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
> >       const struct thermal_soc_data *soc_data = data->socdata;
> >
> >       if (mode == THERMAL_DEVICE_ENABLED) {
> > +             mutex_lock(&data->sensor_lock);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->power_down_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->measure_temp_mask);
> > +             data->sensor_on = true;
> > +             mutex_unlock(&data->sensor_lock);
> >
> >               if (!data->irq_enabled) {
> >                       data->irq_enabled = true;
> >                       enable_irq(data->irq);
> >               }
> >       } else {
> > +             mutex_lock(&data->sensor_lock);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->measure_temp_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->power_down_mask);
> > +             data->sensor_on = false;
> > +             mutex_unlock(&data->sensor_lock);
> >
> >               if (data->irq_enabled) {
> >                       disable_irq(data->irq);
> > @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >       }
> >
> >       /* Make sure sensor is in known good state for measurements */
> > +     mutex_init(&data->sensor_lock);
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> >                    data->socdata->power_down_mask);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> > @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >                       IMX6_MISC0_REFTOP_SELBIASOFF);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->power_down_mask);
> > +     data->sensor_on = false;
> > +     mutex_unlock(&data->sensor_lock);
> >
> >       ret = imx_thermal_register_legacy_cooling(data);
> >       if (ret)
> > @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >       if (data->socdata->version == TEMPMON_IMX6SX)
> >               imx_set_panic_temp(data, data->temp_critical);
> >
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> >                    data->socdata->power_down_mask);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->measure_temp_mask);
> > +     data->sensor_on = true;
> > +     mutex_unlock(&data->sensor_lock);
> >
> >       data->irq_enabled = true;
> >       ret = thermal_zone_device_enable(data->tz);
> > @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
> >       struct regmap *map = data->tempmon;
> >
> >       /* Disable measurements */
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->power_down_mask);
> > +     data->sensor_on = false;
> > +     mutex_unlock(&data->sensor_lock);
> > +
> >       if (!IS_ERR(data->thermal_clk))
> >               clk_disable_unprepare(data->thermal_clk);
> >
> > --
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-18 11:41     ` Petr Benes
  0 siblings, 0 replies; 22+ messages in thread
From: Petr Benes @ 2021-10-18 11:41 UTC (permalink / raw)
  To: Oleksij Rempel
  Cc: Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Daniel Lezcano, Shawn Guo,
	Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš,
	stable

On Mon, 18 Oct 2021 at 13:28, Oleksij Rempel <o.rempel@pengutronix.de> wrote:
>
> Hi Michal,
>
> I hope you have seen this patch:
> https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
>
> Are there any reason why this was ignored?

Missed completely. It looks it addresses the problem more systematically.

>
> On Fri, Oct 08, 2021 at 10:11:37AM +0200, Michal Vokáč wrote:
> > From: Petr Beneš <petr.benes@ysoft.com>
> >
> > SoC temperature readout may not work after thermal alarm fires interrupt.
> > This harms userspace as well as CPU cooling device.
> >
> > Two issues with the logic involved. First, there is no protection against
> > concurent measurements, hence one can switch the sensor off while
> > the other one tries to read temperature later. Second, the interrupt path
> > usually fails. At the end the sensor is powered off and thermal IRQ is
> > disabled. One has to reenable the thermal zone by the sysfs interface.
> >
> > Most of troubles come from commit d92ed2c9d3ff ("thermal: imx: Use
> > driver's local data to decide whether to run a measurement")
> >
> > It uses data->irq_enabled as the "local data". Indeed, its value is
> > related to the state of the sensor loosely under normal operation and,
> > frankly, gets unleashed when the thermal interrupt arrives.
> >
> > Current patch adds the "local data" (new member sensor_on in
> > imx_thermal_data) and sets its value in controlled manner.
> >
> > Fixes: d92ed2c9d3ff ("thermal: imx: Use driver's local data to decide whether to run a measurement")
> > Cc: petrben@gmail.com
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Petr Beneš <petr.benes@ysoft.com>
> > Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
> > ---
> >  drivers/thermal/imx_thermal.c | 30 ++++++++++++++++++++++++++----
> >  1 file changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
> > index 2c7473d86a59..df5658e21828 100644
> > --- a/drivers/thermal/imx_thermal.c
> > +++ b/drivers/thermal/imx_thermal.c
> > @@ -209,6 +209,8 @@ struct imx_thermal_data {
> >       struct clk *thermal_clk;
> >       const struct thermal_soc_data *socdata;
> >       const char *temp_grade;
> > +     struct mutex sensor_lock;
> > +     bool sensor_on;
> >  };
> >
> >  static void imx_set_panic_temp(struct imx_thermal_data *data,
> > @@ -252,11 +254,12 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
> >       const struct thermal_soc_data *soc_data = data->socdata;
> >       struct regmap *map = data->tempmon;
> >       unsigned int n_meas;
> > -     bool wait, run_measurement;
> > +     bool wait;
> >       u32 val;
> >
> > -     run_measurement = !data->irq_enabled;
> > -     if (!run_measurement) {
> > +     mutex_lock(&data->sensor_lock);
> > +
> > +     if (data->sensor_on) {
> >               /* Check if a measurement is currently in progress */
> >               regmap_read(map, soc_data->temp_data, &val);
> >               wait = !(val & soc_data->temp_valid_mask);
> > @@ -283,13 +286,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
> >
> >       regmap_read(map, soc_data->temp_data, &val);
> >
> > -     if (run_measurement) {
> > +     if (!data->sensor_on) {
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->measure_temp_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->power_down_mask);
> >       }
> >
> > +     mutex_unlock(&data->sensor_lock);
> > +
> >       if ((val & soc_data->temp_valid_mask) == 0) {
> >               dev_dbg(&tz->device, "temp measurement never finished\n");
> >               return -EAGAIN;
> > @@ -339,20 +344,26 @@ static int imx_change_mode(struct thermal_zone_device *tz,
> >       const struct thermal_soc_data *soc_data = data->socdata;
> >
> >       if (mode == THERMAL_DEVICE_ENABLED) {
> > +             mutex_lock(&data->sensor_lock);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->power_down_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->measure_temp_mask);
> > +             data->sensor_on = true;
> > +             mutex_unlock(&data->sensor_lock);
> >
> >               if (!data->irq_enabled) {
> >                       data->irq_enabled = true;
> >                       enable_irq(data->irq);
> >               }
> >       } else {
> > +             mutex_lock(&data->sensor_lock);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
> >                            soc_data->measure_temp_mask);
> >               regmap_write(map, soc_data->sensor_ctrl + REG_SET,
> >                            soc_data->power_down_mask);
> > +             data->sensor_on = false;
> > +             mutex_unlock(&data->sensor_lock);
> >
> >               if (data->irq_enabled) {
> >                       disable_irq(data->irq);
> > @@ -728,6 +739,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >       }
> >
> >       /* Make sure sensor is in known good state for measurements */
> > +     mutex_init(&data->sensor_lock);
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> >                    data->socdata->power_down_mask);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> > @@ -739,6 +752,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >                       IMX6_MISC0_REFTOP_SELBIASOFF);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->power_down_mask);
> > +     data->sensor_on = false;
> > +     mutex_unlock(&data->sensor_lock);
> >
> >       ret = imx_thermal_register_legacy_cooling(data);
> >       if (ret)
> > @@ -796,10 +811,13 @@ static int imx_thermal_probe(struct platform_device *pdev)
> >       if (data->socdata->version == TEMPMON_IMX6SX)
> >               imx_set_panic_temp(data, data->temp_critical);
> >
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_CLR,
> >                    data->socdata->power_down_mask);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->measure_temp_mask);
> > +     data->sensor_on = true;
> > +     mutex_unlock(&data->sensor_lock);
> >
> >       data->irq_enabled = true;
> >       ret = thermal_zone_device_enable(data->tz);
> > @@ -832,8 +850,12 @@ static int imx_thermal_remove(struct platform_device *pdev)
> >       struct regmap *map = data->tempmon;
> >
> >       /* Disable measurements */
> > +     mutex_lock(&data->sensor_lock);
> >       regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
> >                    data->socdata->power_down_mask);
> > +     data->sensor_on = false;
> > +     mutex_unlock(&data->sensor_lock);
> > +
> >       if (!IS_ERR(data->thermal_clk))
> >               clk_disable_unprepare(data->thermal_clk);
> >
> > --
> > 2.25.1
> >

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-18 11:41     ` Petr Benes
@ 2021-10-18 12:00       ` Michal Vokáč
  -1 siblings, 0 replies; 22+ messages in thread
From: Michal Vokáč @ 2021-10-18 12:00 UTC (permalink / raw)
  To: Petr Benes, Oleksij Rempel
  Cc: Andrzej Pietrasiewicz, linux-pm, Daniel Lezcano, Shawn Guo,
	Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš,
	stable

On 18. 10. 21 13:41, Petr Benes wrote:
> On Mon, 18 Oct 2021 at 13:28, Oleksij Rempel <o.rempel@pengutronix.de> wrote:
>>
>> Hi Michal,
>>
>> I hope you have seen this patch:
>> https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
>>
>> Are there any reason why this was ignored?
> 
> Missed completely. It looks it addresses the problem more systematically.
> Hi Oleksij,

Thank you for pointing that out. As Peter said, we missed your patch
completely. We do not have the right HW around now but we will test
the patch later today once we get the board.

I will keep you updated.
Thank you,
Michal

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-18 12:00       ` Michal Vokáč
  0 siblings, 0 replies; 22+ messages in thread
From: Michal Vokáč @ 2021-10-18 12:00 UTC (permalink / raw)
  To: Petr Benes, Oleksij Rempel
  Cc: Andrzej Pietrasiewicz, linux-pm, Daniel Lezcano, Shawn Guo,
	Amit Kucheria, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, NXP Linux Team, linux-arm-kernel, linux-kernel,
	Petr Beneš,
	stable

On 18. 10. 21 13:41, Petr Benes wrote:
> On Mon, 18 Oct 2021 at 13:28, Oleksij Rempel <o.rempel@pengutronix.de> wrote:
>>
>> Hi Michal,
>>
>> I hope you have seen this patch:
>> https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
>>
>> Are there any reason why this was ignored?
> 
> Missed completely. It looks it addresses the problem more systematically.
> Hi Oleksij,

Thank you for pointing that out. As Peter said, we missed your patch
completely. We do not have the right HW around now but we will test
the patch later today once we get the board.

I will keep you updated.
Thank you,
Michal

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-18 11:38     ` Daniel Lezcano
@ 2021-10-19  7:24       ` Petr Benes
  -1 siblings, 0 replies; 22+ messages in thread
From: Petr Benes @ 2021-10-19  7:24 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: Oleksij Rempel, Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable

On Mon, 18 Oct 2021 at 13:38, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 18/10/2021 13:28, Oleksij Rempel wrote:
> > Hi Michal,
> >
> > I hope you have seen this patch:
> > https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
> >
> > Are there any reason why this was ignored?
>
> No reasons, I was waiting for some tags before merging it. But I forget
> about it when reviewing the current patch.

Tested Oleksij's patch. It works OK. A question arose. Does it require
CONFIG_PM=y?
If this condition is mandatory and the requirement is valid, Kconfig
should be changed accordingly.
I'm not able to verify it works without PM, seems it doesn't.
>
>
>
> --
> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-19  7:24       ` Petr Benes
  0 siblings, 0 replies; 22+ messages in thread
From: Petr Benes @ 2021-10-19  7:24 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: Oleksij Rempel, Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable

On Mon, 18 Oct 2021 at 13:38, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
> On 18/10/2021 13:28, Oleksij Rempel wrote:
> > Hi Michal,
> >
> > I hope you have seen this patch:
> > https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
> >
> > Are there any reason why this was ignored?
>
> No reasons, I was waiting for some tags before merging it. But I forget
> about it when reviewing the current patch.

Tested Oleksij's patch. It works OK. A question arose. Does it require
CONFIG_PM=y?
If this condition is mandatory and the requirement is valid, Kconfig
should be changed accordingly.
I'm not able to verify it works without PM, seems it doesn't.
>
>
>
> --
> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
  2021-10-19  7:24       ` Petr Benes
@ 2021-10-19 10:35         ` Oleksij Rempel
  -1 siblings, 0 replies; 22+ messages in thread
From: Oleksij Rempel @ 2021-10-19 10:35 UTC (permalink / raw)
  To: Petr Benes
  Cc: Daniel Lezcano, Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable

Hi Petr,

On Tue, Oct 19, 2021 at 09:24:44AM +0200, Petr Benes wrote:
> On Mon, 18 Oct 2021 at 13:38, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
> >
> > On 18/10/2021 13:28, Oleksij Rempel wrote:
> > > Hi Michal,
> > >
> > > I hope you have seen this patch:
> > > https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
> > >
> > > Are there any reason why this was ignored?
> >
> > No reasons, I was waiting for some tags before merging it. But I forget
> > about it when reviewing the current patch.
> 
> Tested Oleksij's patch. It works OK. A question arose. Does it require
> CONFIG_PM=y?
> If this condition is mandatory and the requirement is valid, Kconfig
> should be changed accordingly.
> I'm not able to verify it works without PM, seems it doesn't.

If CONFIG_PM=n, all pm_runtime_* will do nothing and the imx_thermal core
will stay enabled, see:
 
+/* the core was configured and enabled just before */
+pm_runtime_set_active(&pdev->dev);
+pm_runtime_enable(data->dev);

Regards,
Oleksij
-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm
@ 2021-10-19 10:35         ` Oleksij Rempel
  0 siblings, 0 replies; 22+ messages in thread
From: Oleksij Rempel @ 2021-10-19 10:35 UTC (permalink / raw)
  To: Petr Benes
  Cc: Daniel Lezcano, Michal Vokáč,
	Andrzej Pietrasiewicz, linux-pm, Shawn Guo, Amit Kucheria,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team, linux-arm-kernel, linux-kernel, Petr Beneš,
	stable

Hi Petr,

On Tue, Oct 19, 2021 at 09:24:44AM +0200, Petr Benes wrote:
> On Mon, 18 Oct 2021 at 13:38, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
> >
> > On 18/10/2021 13:28, Oleksij Rempel wrote:
> > > Hi Michal,
> > >
> > > I hope you have seen this patch:
> > > https://lore.kernel.org/all/20210924115032.29684-1-o.rempel@pengutronix.de/
> > >
> > > Are there any reason why this was ignored?
> >
> > No reasons, I was waiting for some tags before merging it. But I forget
> > about it when reviewing the current patch.
> 
> Tested Oleksij's patch. It works OK. A question arose. Does it require
> CONFIG_PM=y?
> If this condition is mandatory and the requirement is valid, Kconfig
> should be changed accordingly.
> I'm not able to verify it works without PM, seems it doesn't.

If CONFIG_PM=n, all pm_runtime_* will do nothing and the imx_thermal core
will stay enabled, see:
 
+/* the core was configured and enabled just before */
+pm_runtime_set_active(&pdev->dev);
+pm_runtime_enable(data->dev);

Regards,
Oleksij
-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2021-10-19 10:37 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-08  8:11 [PATCH] thermal: imx: Fix temperature measurements on i.MX6 after alarm Michal Vokáč
2021-10-08  8:11 ` Michal Vokáč
2021-10-16 22:23 ` Daniel Lezcano
2021-10-16 22:23   ` Daniel Lezcano
2021-10-18  8:36   ` Petr Benes
2021-10-18  8:36     ` Petr Benes
2021-10-18  8:43     ` Daniel Lezcano
2021-10-18  8:43       ` Daniel Lezcano
2021-10-18 11:11     ` Daniel Lezcano
2021-10-18 11:11       ` Daniel Lezcano
2021-10-18 11:28 ` Oleksij Rempel
2021-10-18 11:28   ` Oleksij Rempel
2021-10-18 11:38   ` Daniel Lezcano
2021-10-18 11:38     ` Daniel Lezcano
2021-10-19  7:24     ` Petr Benes
2021-10-19  7:24       ` Petr Benes
2021-10-19 10:35       ` Oleksij Rempel
2021-10-19 10:35         ` Oleksij Rempel
2021-10-18 11:41   ` Petr Benes
2021-10-18 11:41     ` Petr Benes
2021-10-18 12:00     ` Michal Vokáč
2021-10-18 12:00       ` Michal Vokáč

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.