linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits
@ 2020-01-10 17:49 ` Matthias Kaehlcke
  2020-01-10 17:49   ` [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine " Matthias Kaehlcke
                     ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Matthias Kaehlcke @ 2020-01-10 17:49 UTC (permalink / raw)
  To: MyungJoo Ham, Kyungmin Park, Chanwoo Choi, Zhang Rui,
	Daniel Lezcano, Amit Kucheria
  Cc: Leonard Crestez, linux-kernel, linux-pm, Matthias Kaehlcke

Now that devfreq supports limiting the frequency range of a device
through PM QoS make use of it instead of disabling OPPs that should
not be used.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
---

 drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
 1 file changed, 20 insertions(+), 46 deletions(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index ef59256887ff..3a63603afcf2 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -24,11 +24,13 @@
 #include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/pm_opp.h>
+#include <linux/pm_qos.h>
 #include <linux/thermal.h>
 
 #include <trace/events/thermal.h>
 
-#define SCALE_ERROR_MITIGATION 100
+#define HZ_PER_KHZ		1000
+#define SCALE_ERROR_MITIGATION	100
 
 static DEFINE_IDA(devfreq_ida);
 
@@ -65,49 +67,9 @@ struct devfreq_cooling_device {
 	struct devfreq_cooling_power *power_ops;
 	u32 res_util;
 	int capped_state;
+	struct dev_pm_qos_request req_max_freq;
 };
 
-/**
- * partition_enable_opps() - disable all opps above a given state
- * @dfc:	Pointer to devfreq we are operating on
- * @cdev_state:	cooling device state we're setting
- *
- * Go through the OPPs of the device, enabling all OPPs until
- * @cdev_state and disabling those frequencies above it.
- */
-static int partition_enable_opps(struct devfreq_cooling_device *dfc,
-				 unsigned long cdev_state)
-{
-	int i;
-	struct device *dev = dfc->devfreq->dev.parent;
-
-	for (i = 0; i < dfc->freq_table_size; i++) {
-		struct dev_pm_opp *opp;
-		int ret = 0;
-		unsigned int freq = dfc->freq_table[i];
-		bool want_enable = i >= cdev_state ? true : false;
-
-		opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
-
-		if (PTR_ERR(opp) == -ERANGE)
-			continue;
-		else if (IS_ERR(opp))
-			return PTR_ERR(opp);
-
-		dev_pm_opp_put(opp);
-
-		if (want_enable)
-			ret = dev_pm_opp_enable(dev, freq);
-		else
-			ret = dev_pm_opp_disable(dev, freq);
-
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
 					 unsigned long *state)
 {
@@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
 	struct devfreq_cooling_device *dfc = cdev->devdata;
 	struct devfreq *df = dfc->devfreq;
 	struct device *dev = df->dev.parent;
-	int ret;
+	unsigned long freq;
 
 	if (state == dfc->cooling_state)
 		return 0;
@@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
 	if (state >= dfc->freq_table_size)
 		return -EINVAL;
 
-	ret = partition_enable_opps(dfc, state);
-	if (ret)
-		return ret;
+	freq = dfc->freq_table[state];
+
+	dev_pm_qos_update_request(&dfc->req_max_freq,
+				  DIV_ROUND_UP(freq, HZ_PER_KHZ));
 
 	dfc->cooling_state = state;
 
@@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 	if (err)
 		goto free_dfc;
 
+	err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
+				     DEV_PM_QOS_MAX_FREQUENCY,
+				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+	if (err < 0)
+		goto remove_qos_req;
+
 	err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
 	if (err < 0)
 		goto free_tables;
@@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 
 release_ida:
 	ida_simple_remove(&devfreq_ida, dfc->id);
+
+remove_qos_req:
+	dev_pm_qos_remove_request(&dfc->req_max_freq);
+
 free_tables:
 	kfree(dfc->power_table);
 	kfree(dfc->freq_table);
@@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 
 	thermal_cooling_device_unregister(dfc->cdev);
 	ida_simple_remove(&devfreq_ida, dfc->id);
+	dev_pm_qos_remove_request(&dfc->req_max_freq);
 	kfree(dfc->power_table);
 	kfree(dfc->freq_table);
 
-- 
2.25.0.rc1.283.g88dfdc4193-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits
  2020-01-10 17:49 ` [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits Matthias Kaehlcke
@ 2020-01-10 17:49   ` Matthias Kaehlcke
  2020-01-13  7:31     ` Chanwoo Choi
  2020-01-13  7:25   ` [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set " Chanwoo Choi
  2020-01-14 16:08   ` Leonard Crestez
  2 siblings, 1 reply; 10+ messages in thread
From: Matthias Kaehlcke @ 2020-01-10 17:49 UTC (permalink / raw)
  To: MyungJoo Ham, Kyungmin Park, Chanwoo Choi, Zhang Rui,
	Daniel Lezcano, Amit Kucheria
  Cc: Leonard Crestez, linux-kernel, linux-pm, Matthias Kaehlcke

Traditionally devfreq cooling devices dynamically disabled OPPs
that shouldn't be used because of thermal pressure. Devfreq cooling
devices now use PM QoS to set frequency limits, hence the devfreq
code dealing that deals with disabled OPPs can be removed.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
---

 drivers/devfreq/devfreq.c | 75 +++++----------------------------------
 include/linux/devfreq.h   |  4 ---
 2 files changed, 8 insertions(+), 71 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 57f6944d65a6..ec66e2c27cc4 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -73,34 +73,6 @@ static struct devfreq *find_device_devfreq(struct device *dev)
 	return ERR_PTR(-ENODEV);
 }
 
-static unsigned long find_available_min_freq(struct devfreq *devfreq)
-{
-	struct dev_pm_opp *opp;
-	unsigned long min_freq = 0;
-
-	opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &min_freq);
-	if (IS_ERR(opp))
-		min_freq = 0;
-	else
-		dev_pm_opp_put(opp);
-
-	return min_freq;
-}
-
-static unsigned long find_available_max_freq(struct devfreq *devfreq)
-{
-	struct dev_pm_opp *opp;
-	unsigned long max_freq = ULONG_MAX;
-
-	opp = dev_pm_opp_find_freq_floor(devfreq->dev.parent, &max_freq);
-	if (IS_ERR(opp))
-		max_freq = 0;
-	else
-		dev_pm_opp_put(opp);
-
-	return max_freq;
-}
-
 /**
  * get_freq_range() - Get the current freq range
  * @devfreq:	the devfreq instance
@@ -141,10 +113,6 @@ static void get_freq_range(struct devfreq *devfreq,
 		*max_freq = min(*max_freq,
 				(unsigned long)HZ_PER_KHZ * qos_max_freq);
 
-	/* Apply constraints from OPP interface */
-	*min_freq = max(*min_freq, devfreq->scaling_min_freq);
-	*max_freq = min(*max_freq, devfreq->scaling_max_freq);
-
 	if (*min_freq > *max_freq)
 		*min_freq = *max_freq;
 }
@@ -610,23 +578,10 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
 				 void *devp)
 {
 	struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
-	int err = -EINVAL;
+	int err;
 
 	mutex_lock(&devfreq->lock);
-
-	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
-	if (!devfreq->scaling_min_freq)
-		goto out;
-
-	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
-	if (!devfreq->scaling_max_freq) {
-		devfreq->scaling_max_freq = ULONG_MAX;
-		goto out;
-	}
-
 	err = update_devfreq(devfreq);
-
-out:
 	mutex_unlock(&devfreq->lock);
 	if (err)
 		dev_err(devfreq->dev.parent,
@@ -781,19 +736,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		mutex_lock(&devfreq->lock);
 	}
 
-	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
-	if (!devfreq->scaling_min_freq) {
-		mutex_unlock(&devfreq->lock);
-		err = -EINVAL;
+	err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
+				     DEV_PM_QOS_MIN_FREQUENCY, 0);
+	if (err < 0)
 		goto err_dev;
-	}
-
-	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
-	if (!devfreq->scaling_max_freq) {
-		mutex_unlock(&devfreq->lock);
-		err = -EINVAL;
+	err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
+				     DEV_PM_QOS_MAX_FREQUENCY,
+				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+	if (err < 0)
 		goto err_dev;
-	}
 
 	devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
 	atomic_set(&devfreq->suspend_count, 0);
@@ -834,16 +785,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 
 	mutex_unlock(&devfreq->lock);
 
-	err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
-				     DEV_PM_QOS_MIN_FREQUENCY, 0);
-	if (err < 0)
-		goto err_devfreq;
-	err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
-				     DEV_PM_QOS_MAX_FREQUENCY,
-				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
-	if (err < 0)
-		goto err_devfreq;
-
 	devfreq->nb_min.notifier_call = qos_min_notifier_call;
 	err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
 				      DEV_PM_QOS_MIN_FREQUENCY);
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index fb376b5b7281..cb75f23ad2f4 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -126,8 +126,6 @@ struct devfreq_dev_profile {
  *		touch this.
  * @user_min_freq_req:	PM QoS minimum frequency request from user (via sysfs)
  * @user_max_freq_req:	PM QoS maximum frequency request from user (via sysfs)
- * @scaling_min_freq:	Limit minimum frequency requested by OPP interface
- * @scaling_max_freq:	Limit maximum frequency requested by OPP interface
  * @stop_polling:	 devfreq polling status of a device.
  * @suspend_freq:	 frequency of a device set during suspend phase.
  * @resume_freq:	 frequency of a device set in resume phase.
@@ -166,8 +164,6 @@ struct devfreq {
 
 	struct dev_pm_qos_request user_min_freq_req;
 	struct dev_pm_qos_request user_max_freq_req;
-	unsigned long scaling_min_freq;
-	unsigned long scaling_max_freq;
 	bool stop_polling;
 
 	unsigned long suspend_freq;
-- 
2.25.0.rc1.283.g88dfdc4193-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits
  2020-01-10 17:49 ` [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits Matthias Kaehlcke
  2020-01-10 17:49   ` [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine " Matthias Kaehlcke
@ 2020-01-13  7:25   ` Chanwoo Choi
  2020-01-14 18:51     ` Matthias Kaehlcke
  2020-01-14 16:08   ` Leonard Crestez
  2 siblings, 1 reply; 10+ messages in thread
From: Chanwoo Choi @ 2020-01-13  7:25 UTC (permalink / raw)
  To: Matthias Kaehlcke, MyungJoo Ham, Kyungmin Park, Zhang Rui,
	Daniel Lezcano, Amit Kucheria
  Cc: Leonard Crestez, linux-kernel, linux-pm

Hi,

On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
> Now that devfreq supports limiting the frequency range of a device
> through PM QoS make use of it instead of disabling OPPs that should
> not be used.
> 
> Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> ---
> 
>  drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
>  1 file changed, 20 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> index ef59256887ff..3a63603afcf2 100644
> --- a/drivers/thermal/devfreq_cooling.c
> +++ b/drivers/thermal/devfreq_cooling.c
> @@ -24,11 +24,13 @@
>  #include <linux/idr.h>
>  #include <linux/slab.h>
>  #include <linux/pm_opp.h>
> +#include <linux/pm_qos.h>
>  #include <linux/thermal.h>
>  
>  #include <trace/events/thermal.h>
>  
> -#define SCALE_ERROR_MITIGATION 100
> +#define HZ_PER_KHZ		1000
> +#define SCALE_ERROR_MITIGATION	100
>  
>  static DEFINE_IDA(devfreq_ida);
>  
> @@ -65,49 +67,9 @@ struct devfreq_cooling_device {
>  	struct devfreq_cooling_power *power_ops;
>  	u32 res_util;
>  	int capped_state;
> +	struct dev_pm_qos_request req_max_freq;

Need to add the description of 'req_max_freq'.

>  };
>  
> -/**
> - * partition_enable_opps() - disable all opps above a given state
> - * @dfc:	Pointer to devfreq we are operating on
> - * @cdev_state:	cooling device state we're setting
> - *
> - * Go through the OPPs of the device, enabling all OPPs until
> - * @cdev_state and disabling those frequencies above it.
> - */
> -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> -				 unsigned long cdev_state)
> -{
> -	int i;
> -	struct device *dev = dfc->devfreq->dev.parent;
> -
> -	for (i = 0; i < dfc->freq_table_size; i++) {
> -		struct dev_pm_opp *opp;
> -		int ret = 0;
> -		unsigned int freq = dfc->freq_table[i];
> -		bool want_enable = i >= cdev_state ? true : false;
> -
> -		opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> -
> -		if (PTR_ERR(opp) == -ERANGE)
> -			continue;
> -		else if (IS_ERR(opp))
> -			return PTR_ERR(opp);
> -
> -		dev_pm_opp_put(opp);
> -
> -		if (want_enable)
> -			ret = dev_pm_opp_enable(dev, freq);
> -		else
> -			ret = dev_pm_opp_disable(dev, freq);
> -
> -		if (ret)
> -			return ret;
> -	}
> -
> -	return 0;
> -}
> -
>  static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
>  					 unsigned long *state)
>  {
> @@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
>  	struct devfreq_cooling_device *dfc = cdev->devdata;
>  	struct devfreq *df = dfc->devfreq;
>  	struct device *dev = df->dev.parent;
> -	int ret;
> +	unsigned long freq;
>  
>  	if (state == dfc->cooling_state)
>  		return 0;
> @@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
>  	if (state >= dfc->freq_table_size)
>  		return -EINVAL;
>  
> -	ret = partition_enable_opps(dfc, state);
> -	if (ret)
> -		return ret;
> +	freq = dfc->freq_table[state];
> +
> +	dev_pm_qos_update_request(&dfc->req_max_freq,
> +				  DIV_ROUND_UP(freq, HZ_PER_KHZ));
>  
>  	dfc->cooling_state = state;
>  
> @@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
>  	if (err)
>  		goto free_dfc;
>  
> +	err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> +				     DEV_PM_QOS_MAX_FREQUENCY,
> +				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> +	if (err < 0)
> +		goto remove_qos_req;

Jump 'free_table' instead of 'remove_qos_req'.

> +
>  	err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
>  	if (err < 0)
>  		goto free_tables;

Jump remove_qos_req.

> @@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
>  
>  release_ida:
>  	ida_simple_remove(&devfreq_ida, dfc->id);
> +
> +remove_qos_req:
> +	dev_pm_qos_remove_request(&dfc->req_max_freq);
> +
>  free_tables:
>  	kfree(dfc->power_table);
>  	kfree(dfc->freq_table);
> @@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>  
>  	thermal_cooling_device_unregister(dfc->cdev);
>  	ida_simple_remove(&devfreq_ida, dfc->id);
> +	dev_pm_qos_remove_request(&dfc->req_max_freq);
>  	kfree(dfc->power_table);
>  	kfree(dfc->freq_table);
>  
> 


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits
  2020-01-10 17:49   ` [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine " Matthias Kaehlcke
@ 2020-01-13  7:31     ` Chanwoo Choi
  2020-01-14 16:08       ` Leonard Crestez
  0 siblings, 1 reply; 10+ messages in thread
From: Chanwoo Choi @ 2020-01-13  7:31 UTC (permalink / raw)
  To: Matthias Kaehlcke, MyungJoo Ham, Kyungmin Park, Zhang Rui,
	Daniel Lezcano, Amit Kucheria
  Cc: Leonard Crestez, linux-kernel, linux-pm

Hi,


Any device driver except for devfreq_cooling.c might
use dev_pm_opp_enable/disable interface. 
So, don't need to remove the devfreq->scaling_max_freq
and devfreq->scaling_min_freq for supporting OPP interface.

Regards,
Chanwoo Choi

On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
> Traditionally devfreq cooling devices dynamically disabled OPPs
> that shouldn't be used because of thermal pressure. Devfreq cooling
> devices now use PM QoS to set frequency limits, hence the devfreq
> code dealing that deals with disabled OPPs can be removed.
> 
> Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> ---
> 
>  drivers/devfreq/devfreq.c | 75 +++++----------------------------------
>  include/linux/devfreq.h   |  4 ---
>  2 files changed, 8 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 57f6944d65a6..ec66e2c27cc4 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -73,34 +73,6 @@ static struct devfreq *find_device_devfreq(struct device *dev)
>  	return ERR_PTR(-ENODEV);
>  }
>  
> -static unsigned long find_available_min_freq(struct devfreq *devfreq)
> -{
> -	struct dev_pm_opp *opp;
> -	unsigned long min_freq = 0;
> -
> -	opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &min_freq);
> -	if (IS_ERR(opp))
> -		min_freq = 0;
> -	else
> -		dev_pm_opp_put(opp);
> -
> -	return min_freq;
> -}
> -
> -static unsigned long find_available_max_freq(struct devfreq *devfreq)
> -{
> -	struct dev_pm_opp *opp;
> -	unsigned long max_freq = ULONG_MAX;
> -
> -	opp = dev_pm_opp_find_freq_floor(devfreq->dev.parent, &max_freq);
> -	if (IS_ERR(opp))
> -		max_freq = 0;
> -	else
> -		dev_pm_opp_put(opp);
> -
> -	return max_freq;
> -}
> -
>  /**
>   * get_freq_range() - Get the current freq range
>   * @devfreq:	the devfreq instance
> @@ -141,10 +113,6 @@ static void get_freq_range(struct devfreq *devfreq,
>  		*max_freq = min(*max_freq,
>  				(unsigned long)HZ_PER_KHZ * qos_max_freq);
>  
> -	/* Apply constraints from OPP interface */
> -	*min_freq = max(*min_freq, devfreq->scaling_min_freq);
> -	*max_freq = min(*max_freq, devfreq->scaling_max_freq);
> -
>  	if (*min_freq > *max_freq)
>  		*min_freq = *max_freq;
>  }
> @@ -610,23 +578,10 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
>  				 void *devp)
>  {
>  	struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
> -	int err = -EINVAL;
> +	int err;
>  
>  	mutex_lock(&devfreq->lock);
> -
> -	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> -	if (!devfreq->scaling_min_freq)
> -		goto out;
> -
> -	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
> -	if (!devfreq->scaling_max_freq) {
> -		devfreq->scaling_max_freq = ULONG_MAX;
> -		goto out;
> -	}
> -
>  	err = update_devfreq(devfreq);
> -
> -out:
>  	mutex_unlock(&devfreq->lock);
>  	if (err)
>  		dev_err(devfreq->dev.parent,
> @@ -781,19 +736,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  		mutex_lock(&devfreq->lock);
>  	}
>  
> -	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> -	if (!devfreq->scaling_min_freq) {
> -		mutex_unlock(&devfreq->lock);
> -		err = -EINVAL;
> +	err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
> +				     DEV_PM_QOS_MIN_FREQUENCY, 0);
> +	if (err < 0)
>  		goto err_dev;
> -	}
> -
> -	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
> -	if (!devfreq->scaling_max_freq) {
> -		mutex_unlock(&devfreq->lock);
> -		err = -EINVAL;
> +	err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
> +				     DEV_PM_QOS_MAX_FREQUENCY,
> +				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> +	if (err < 0)
>  		goto err_dev;
> -	}
>  
>  	devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
>  	atomic_set(&devfreq->suspend_count, 0);
> @@ -834,16 +785,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  
>  	mutex_unlock(&devfreq->lock);
>  
> -	err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
> -				     DEV_PM_QOS_MIN_FREQUENCY, 0);
> -	if (err < 0)
> -		goto err_devfreq;
> -	err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
> -				     DEV_PM_QOS_MAX_FREQUENCY,
> -				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> -	if (err < 0)
> -		goto err_devfreq;
> -
>  	devfreq->nb_min.notifier_call = qos_min_notifier_call;
>  	err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
>  				      DEV_PM_QOS_MIN_FREQUENCY);
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index fb376b5b7281..cb75f23ad2f4 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -126,8 +126,6 @@ struct devfreq_dev_profile {
>   *		touch this.
>   * @user_min_freq_req:	PM QoS minimum frequency request from user (via sysfs)
>   * @user_max_freq_req:	PM QoS maximum frequency request from user (via sysfs)
> - * @scaling_min_freq:	Limit minimum frequency requested by OPP interface
> - * @scaling_max_freq:	Limit maximum frequency requested by OPP interface
>   * @stop_polling:	 devfreq polling status of a device.
>   * @suspend_freq:	 frequency of a device set during suspend phase.
>   * @resume_freq:	 frequency of a device set in resume phase.
> @@ -166,8 +164,6 @@ struct devfreq {
>  
>  	struct dev_pm_qos_request user_min_freq_req;
>  	struct dev_pm_qos_request user_max_freq_req;
> -	unsigned long scaling_min_freq;
> -	unsigned long scaling_max_freq;
>  	bool stop_polling;
>  
>  	unsigned long suspend_freq;
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits
  2020-01-13  7:31     ` Chanwoo Choi
@ 2020-01-14 16:08       ` Leonard Crestez
  2020-01-14 17:35         ` Chanwoo Choi
  0 siblings, 1 reply; 10+ messages in thread
From: Leonard Crestez @ 2020-01-14 16:08 UTC (permalink / raw)
  To: Chanwoo Choi, Matthias Kaehlcke, Viresh Kumar
  Cc: MyungJoo Ham, Kyungmin Park, Zhang Rui, Daniel Lezcano,
	Amit Kucheria, linux-kernel, linux-pm, Rafael J. Wysocki

On 13.01.2020 09:24, Chanwoo Choi wrote:
> Hi,
> 
> Any device driver except for devfreq_cooling.c might
> use dev_pm_opp_enable/disable interface.
> So, don't need to remove the devfreq->scaling_max_freq
> and devfreq->scaling_min_freq for supporting OPP interface.

It seems that devfreq_cooling was the only upstream user of 
dev_pm_opp_enable and the remaining callers of dev_pm_opp_disable are 
probe-time checks.

> Regards,
> Chanwoo Choi
> 
> On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
>> Traditionally devfreq cooling devices dynamically disabled OPPs
>> that shouldn't be used because of thermal pressure. Devfreq cooling
>> devices now use PM QoS to set frequency limits, hence the devfreq
>> code dealing that deals with disabled OPPs can be removed.
>>
>> Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
>> ---
>>
>>   drivers/devfreq/devfreq.c | 75 +++++----------------------------------
>>   include/linux/devfreq.h   |  4 ---
>>   2 files changed, 8 insertions(+), 71 deletions(-)
>>
>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>> index 57f6944d65a6..ec66e2c27cc4 100644
>> --- a/drivers/devfreq/devfreq.c
>> +++ b/drivers/devfreq/devfreq.c
>> @@ -73,34 +73,6 @@ static struct devfreq *find_device_devfreq(struct device *dev)
>>   	return ERR_PTR(-ENODEV);
>>   }
>>   
>> -static unsigned long find_available_min_freq(struct devfreq *devfreq)
>> -{
>> -	struct dev_pm_opp *opp;
>> -	unsigned long min_freq = 0;
>> -
>> -	opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &min_freq);
>> -	if (IS_ERR(opp))
>> -		min_freq = 0;
>> -	else
>> -		dev_pm_opp_put(opp);
>> -
>> -	return min_freq;
>> -}
>> -
>> -static unsigned long find_available_max_freq(struct devfreq *devfreq)
>> -{
>> -	struct dev_pm_opp *opp;
>> -	unsigned long max_freq = ULONG_MAX;
>> -
>> -	opp = dev_pm_opp_find_freq_floor(devfreq->dev.parent, &max_freq);
>> -	if (IS_ERR(opp))
>> -		max_freq = 0;
>> -	else
>> -		dev_pm_opp_put(opp);
>> -
>> -	return max_freq;
>> -}
>> -
>>   /**
>>    * get_freq_range() - Get the current freq range
>>    * @devfreq:	the devfreq instance
>> @@ -141,10 +113,6 @@ static void get_freq_range(struct devfreq *devfreq,
>>   		*max_freq = min(*max_freq,
>>   				(unsigned long)HZ_PER_KHZ * qos_max_freq);
>>   
>> -	/* Apply constraints from OPP interface */
>> -	*min_freq = max(*min_freq, devfreq->scaling_min_freq);
>> -	*max_freq = min(*max_freq, devfreq->scaling_max_freq);
>> -
>>   	if (*min_freq > *max_freq)
>>   		*min_freq = *max_freq;
>>   }
>> @@ -610,23 +578,10 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
>>   				 void *devp)
>>   {
>>   	struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
>> -	int err = -EINVAL;
>> +	int err;
>>   
>>   	mutex_lock(&devfreq->lock);
>> -
>> -	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
>> -	if (!devfreq->scaling_min_freq)
>> -		goto out;
>> -
>> -	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
>> -	if (!devfreq->scaling_max_freq) {
>> -		devfreq->scaling_max_freq = ULONG_MAX;
>> -		goto out;
>> -	}
>> -
>>   	err = update_devfreq(devfreq);
>> -
>> -out:
>>   	mutex_unlock(&devfreq->lock);
>>   	if (err)
>>   		dev_err(devfreq->dev.parent,
>> @@ -781,19 +736,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>   		mutex_lock(&devfreq->lock);
>>   	}
>>   
>> -	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
>> -	if (!devfreq->scaling_min_freq) {
>> -		mutex_unlock(&devfreq->lock);
>> -		err = -EINVAL;
>> +	err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
>> +				     DEV_PM_QOS_MIN_FREQUENCY, 0);
>> +	if (err < 0)
>>   		goto err_dev;
>> -	}
>> -
>> -	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
>> -	if (!devfreq->scaling_max_freq) {
>> -		mutex_unlock(&devfreq->lock);
>> -		err = -EINVAL;
>> +	err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
>> +				     DEV_PM_QOS_MAX_FREQUENCY,
>> +				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
>> +	if (err < 0)
>>   		goto err_dev;
>> -	}
>>   
>>   	devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
>>   	atomic_set(&devfreq->suspend_count, 0);
>> @@ -834,16 +785,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>   
>>   	mutex_unlock(&devfreq->lock);
>>   
>> -	err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
>> -				     DEV_PM_QOS_MIN_FREQUENCY, 0);
>> -	if (err < 0)
>> -		goto err_devfreq;
>> -	err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
>> -				     DEV_PM_QOS_MAX_FREQUENCY,
>> -				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
>> -	if (err < 0)
>> -		goto err_devfreq;
>> -

Performing PM QoS initialization under devfreq->lock triggers lockdep 
warnings for me. The warnings seem to be legitimate:

1) At init time &dev_pm_qos_mtx is taken under &devfreq->lock;
2) At update time &devfreq->lock is called under &dev_pm_qos_mtx (it's 
held while notifiers are called).

It's not clear why you moved dev_pm_qos_add_request higher?

>>   	devfreq->nb_min.notifier_call = qos_min_notifier_call;
>>   	err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
>>   				      DEV_PM_QOS_MIN_FREQUENCY);
>> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
>> index fb376b5b7281..cb75f23ad2f4 100644
>> --- a/include/linux/devfreq.h
>> +++ b/include/linux/devfreq.h
>> @@ -126,8 +126,6 @@ struct devfreq_dev_profile {
>>    *		touch this.
>>    * @user_min_freq_req:	PM QoS minimum frequency request from user (via sysfs)
>>    * @user_max_freq_req:	PM QoS maximum frequency request from user (via sysfs)
>> - * @scaling_min_freq:	Limit minimum frequency requested by OPP interface
>> - * @scaling_max_freq:	Limit maximum frequency requested by OPP interface
>>    * @stop_polling:	 devfreq polling status of a device.
>>    * @suspend_freq:	 frequency of a device set during suspend phase.
>>    * @resume_freq:	 frequency of a device set in resume phase.
>> @@ -166,8 +164,6 @@ struct devfreq {
>>   
>>   	struct dev_pm_qos_request user_min_freq_req;
>>   	struct dev_pm_qos_request user_max_freq_req;
>> -	unsigned long scaling_min_freq;
>> -	unsigned long scaling_max_freq;
>>   	bool stop_polling;
>>   
>>   	unsigned long suspend_freq;
>>
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits
  2020-01-10 17:49 ` [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits Matthias Kaehlcke
  2020-01-10 17:49   ` [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine " Matthias Kaehlcke
  2020-01-13  7:25   ` [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set " Chanwoo Choi
@ 2020-01-14 16:08   ` Leonard Crestez
  2020-01-14 19:07     ` Matthias Kaehlcke
  2 siblings, 1 reply; 10+ messages in thread
From: Leonard Crestez @ 2020-01-14 16:08 UTC (permalink / raw)
  To: Matthias Kaehlcke, Chanwoo Choi
  Cc: MyungJoo Ham, Kyungmin Park, Zhang Rui, Daniel Lezcano,
	Amit Kucheria, linux-kernel, linux-pm, Viresh Kumar,
	Rafael J. Wysocki

On 10.01.2020 19:49, Matthias Kaehlcke wrote:
> Now that devfreq supports limiting the frequency range of a device
> through PM QoS make use of it instead of disabling OPPs that should
> not be used.
> 
> Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> ---

It is not obvious but this changes behavior when min max requests 
conflict (min > max): with PM QoS a MIN_FREQUENCY request takes 
precedence but if higher OPPs are disabled then this will override 
MIN_FREQUENCY.

There are very few users of this functionality so I don't think there 
are any systems that depend on this behaving one way or the other but 
perhaps it should be mentioned in commit message?

As far as I can tell the only user of devfreq_cooling in upstream is 
drivers/gpu/drm/panfrost?

>   drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
>   1 file changed, 20 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> index ef59256887ff..3a63603afcf2 100644
> --- a/drivers/thermal/devfreq_cooling.c
> +++ b/drivers/thermal/devfreq_cooling.c
> @@ -24,11 +24,13 @@
>   #include <linux/idr.h>
>   #include <linux/slab.h>
>   #include <linux/pm_opp.h>
> +#include <linux/pm_qos.h>
>   #include <linux/thermal.h>
>   
>   #include <trace/events/thermal.h>
>   
> -#define SCALE_ERROR_MITIGATION 100
> +#define HZ_PER_KHZ		1000
> +#define SCALE_ERROR_MITIGATION	100
>   
>   static DEFINE_IDA(devfreq_ida);
>   
> @@ -65,49 +67,9 @@ struct devfreq_cooling_device {
>   	struct devfreq_cooling_power *power_ops;
>   	u32 res_util;
>   	int capped_state;
> +	struct dev_pm_qos_request req_max_freq;
>   };
>   
> -/**
> - * partition_enable_opps() - disable all opps above a given state
> - * @dfc:	Pointer to devfreq we are operating on
> - * @cdev_state:	cooling device state we're setting
> - *
> - * Go through the OPPs of the device, enabling all OPPs until
> - * @cdev_state and disabling those frequencies above it.
> - */
> -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> -				 unsigned long cdev_state)
> -{
> -	int i;
> -	struct device *dev = dfc->devfreq->dev.parent;
> -
> -	for (i = 0; i < dfc->freq_table_size; i++) {
> -		struct dev_pm_opp *opp;
> -		int ret = 0;
> -		unsigned int freq = dfc->freq_table[i];
> -		bool want_enable = i >= cdev_state ? true : false;
> -
> -		opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> -
> -		if (PTR_ERR(opp) == -ERANGE)
> -			continue;
> -		else if (IS_ERR(opp))
> -			return PTR_ERR(opp);
> -
> -		dev_pm_opp_put(opp);
> -
> -		if (want_enable)
> -			ret = dev_pm_opp_enable(dev, freq);
> -		else
> -			ret = dev_pm_opp_disable(dev, freq);
> -
> -		if (ret)
> -			return ret;
> -	}
> -
> -	return 0;
> -}
> -
>   static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
>   					 unsigned long *state)
>   {
> @@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
>   	struct devfreq_cooling_device *dfc = cdev->devdata;
>   	struct devfreq *df = dfc->devfreq;
>   	struct device *dev = df->dev.parent;
> -	int ret;
> +	unsigned long freq;
>   
>   	if (state == dfc->cooling_state)
>   		return 0;
> @@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
>   	if (state >= dfc->freq_table_size)
>   		return -EINVAL;
>   
> -	ret = partition_enable_opps(dfc, state);
> -	if (ret)
> -		return ret;
> +	freq = dfc->freq_table[state];
> +
> +	dev_pm_qos_update_request(&dfc->req_max_freq,
> +				  DIV_ROUND_UP(freq, HZ_PER_KHZ));
>   
>   	dfc->cooling_state = state;
>   
> @@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
>   	if (err)
>   		goto free_dfc;
>   
> +	err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> +				     DEV_PM_QOS_MAX_FREQUENCY,
> +				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> +	if (err < 0)
> +		goto remove_qos_req;
> +
>   	err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
>   	if (err < 0)
>   		goto free_tables;
> @@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
>   
>   release_ida:
>   	ida_simple_remove(&devfreq_ida, dfc->id);
> +
> +remove_qos_req:
> +	dev_pm_qos_remove_request(&dfc->req_max_freq); > +

A quirk of the dev_pm_qos API is that dev_pm_qos_remove_request prints a 
WARN splat if !dev_pm_qos_request_active and this can true on 
dev_pm_qos_add_request error.

I dealt with this by checking dev_pm_qos_request_active explicitly but 
perhaps dev_pm_qos API could be changed? In general "free/release" 
functions shouldn't complain if there's nothing to do.

>   free_tables:
>   	kfree(dfc->power_table);
>   	kfree(dfc->freq_table);
> @@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>   
>   	thermal_cooling_device_unregister(dfc->cdev);
>   	ida_simple_remove(&devfreq_ida, dfc->id);
> +	dev_pm_qos_remove_request(&dfc->req_max_freq);
>   	kfree(dfc->power_table);
>   	kfree(dfc->freq_table);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits
  2020-01-14 16:08       ` Leonard Crestez
@ 2020-01-14 17:35         ` Chanwoo Choi
  2020-01-14 19:13           ` Matthias Kaehlcke
  0 siblings, 1 reply; 10+ messages in thread
From: Chanwoo Choi @ 2020-01-14 17:35 UTC (permalink / raw)
  To: Leonard Crestez
  Cc: Chanwoo Choi, Matthias Kaehlcke, Viresh Kumar, MyungJoo Ham,
	Kyungmin Park, Zhang Rui, Daniel Lezcano, Amit Kucheria,
	linux-kernel, linux-pm, Rafael J. Wysocki

On Wed, Jan 15, 2020 at 1:08 AM Leonard Crestez <leonard.crestez@nxp.com> wrote:
>
> On 13.01.2020 09:24, Chanwoo Choi wrote:
> > Hi,
> >
> > Any device driver except for devfreq_cooling.c might
> > use dev_pm_opp_enable/disable interface.
> > So, don't need to remove the devfreq->scaling_max_freq
> > and devfreq->scaling_min_freq for supporting OPP interface.
>
> It seems that devfreq_cooling was the only upstream user of
> dev_pm_opp_enable and the remaining callers of dev_pm_opp_disable are
> probe-time checks.

OPP interface has still dev_pm_opp_enable and dev_pm_opp_disable
function. As long as remains them, any device driver related to devfreq
could call them at some time. The devfreq supports the OPP interface,
not just for only devfreq_cooling.

>
> > Regards,
> > Chanwoo Choi
> >
> > On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
> >> Traditionally devfreq cooling devices dynamically disabled OPPs
> >> that shouldn't be used because of thermal pressure. Devfreq cooling
> >> devices now use PM QoS to set frequency limits, hence the devfreq
> >> code dealing that deals with disabled OPPs can be removed.
> >>
> >> Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> >> ---
> >>
> >>   drivers/devfreq/devfreq.c | 75 +++++----------------------------------
> >>   include/linux/devfreq.h   |  4 ---
> >>   2 files changed, 8 insertions(+), 71 deletions(-)
> >>
> >> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> >> index 57f6944d65a6..ec66e2c27cc4 100644
> >> --- a/drivers/devfreq/devfreq.c
> >> +++ b/drivers/devfreq/devfreq.c
> >> @@ -73,34 +73,6 @@ static struct devfreq *find_device_devfreq(struct device *dev)
> >>      return ERR_PTR(-ENODEV);
> >>   }
> >>
> >> -static unsigned long find_available_min_freq(struct devfreq *devfreq)
> >> -{
> >> -    struct dev_pm_opp *opp;
> >> -    unsigned long min_freq = 0;
> >> -
> >> -    opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &min_freq);
> >> -    if (IS_ERR(opp))
> >> -            min_freq = 0;
> >> -    else
> >> -            dev_pm_opp_put(opp);
> >> -
> >> -    return min_freq;
> >> -}
> >> -
> >> -static unsigned long find_available_max_freq(struct devfreq *devfreq)
> >> -{
> >> -    struct dev_pm_opp *opp;
> >> -    unsigned long max_freq = ULONG_MAX;
> >> -
> >> -    opp = dev_pm_opp_find_freq_floor(devfreq->dev.parent, &max_freq);
> >> -    if (IS_ERR(opp))
> >> -            max_freq = 0;
> >> -    else
> >> -            dev_pm_opp_put(opp);
> >> -
> >> -    return max_freq;
> >> -}
> >> -
> >>   /**
> >>    * get_freq_range() - Get the current freq range
> >>    * @devfreq:       the devfreq instance
> >> @@ -141,10 +113,6 @@ static void get_freq_range(struct devfreq *devfreq,
> >>              *max_freq = min(*max_freq,
> >>                              (unsigned long)HZ_PER_KHZ * qos_max_freq);
> >>
> >> -    /* Apply constraints from OPP interface */
> >> -    *min_freq = max(*min_freq, devfreq->scaling_min_freq);
> >> -    *max_freq = min(*max_freq, devfreq->scaling_max_freq);
> >> -
> >>      if (*min_freq > *max_freq)
> >>              *min_freq = *max_freq;
> >>   }
> >> @@ -610,23 +578,10 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
> >>                               void *devp)
> >>   {
> >>      struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
> >> -    int err = -EINVAL;
> >> +    int err;
> >>
> >>      mutex_lock(&devfreq->lock);
> >> -
> >> -    devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> >> -    if (!devfreq->scaling_min_freq)
> >> -            goto out;
> >> -
> >> -    devfreq->scaling_max_freq = find_available_max_freq(devfreq);
> >> -    if (!devfreq->scaling_max_freq) {
> >> -            devfreq->scaling_max_freq = ULONG_MAX;
> >> -            goto out;
> >> -    }
> >> -
> >>      err = update_devfreq(devfreq);
> >> -
> >> -out:
> >>      mutex_unlock(&devfreq->lock);
> >>      if (err)
> >>              dev_err(devfreq->dev.parent,
> >> @@ -781,19 +736,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
> >>              mutex_lock(&devfreq->lock);
> >>      }
> >>
> >> -    devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> >> -    if (!devfreq->scaling_min_freq) {
> >> -            mutex_unlock(&devfreq->lock);
> >> -            err = -EINVAL;
> >> +    err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
> >> +                                 DEV_PM_QOS_MIN_FREQUENCY, 0);
> >> +    if (err < 0)
> >>              goto err_dev;
> >> -    }
> >> -
> >> -    devfreq->scaling_max_freq = find_available_max_freq(devfreq);
> >> -    if (!devfreq->scaling_max_freq) {
> >> -            mutex_unlock(&devfreq->lock);
> >> -            err = -EINVAL;
> >> +    err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
> >> +                                 DEV_PM_QOS_MAX_FREQUENCY,
> >> +                                 PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> >> +    if (err < 0)
> >>              goto err_dev;
> >> -    }
> >>
> >>      devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
> >>      atomic_set(&devfreq->suspend_count, 0);
> >> @@ -834,16 +785,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
> >>
> >>      mutex_unlock(&devfreq->lock);
> >>
> >> -    err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
> >> -                                 DEV_PM_QOS_MIN_FREQUENCY, 0);
> >> -    if (err < 0)
> >> -            goto err_devfreq;
> >> -    err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
> >> -                                 DEV_PM_QOS_MAX_FREQUENCY,
> >> -                                 PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> >> -    if (err < 0)
> >> -            goto err_devfreq;
> >> -
>
> Performing PM QoS initialization under devfreq->lock triggers lockdep
> warnings for me. The warnings seem to be legitimate:
>
> 1) At init time &dev_pm_qos_mtx is taken under &devfreq->lock;
> 2) At update time &devfreq->lock is called under &dev_pm_qos_mtx (it's
> held while notifiers are called).
>
> It's not clear why you moved dev_pm_qos_add_request higher?
>
> >>      devfreq->nb_min.notifier_call = qos_min_notifier_call;
> >>      err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
> >>                                    DEV_PM_QOS_MIN_FREQUENCY);
> >> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> >> index fb376b5b7281..cb75f23ad2f4 100644
> >> --- a/include/linux/devfreq.h
> >> +++ b/include/linux/devfreq.h
> >> @@ -126,8 +126,6 @@ struct devfreq_dev_profile {
> >>    *         touch this.
> >>    * @user_min_freq_req:     PM QoS minimum frequency request from user (via sysfs)
> >>    * @user_max_freq_req:     PM QoS maximum frequency request from user (via sysfs)
> >> - * @scaling_min_freq:       Limit minimum frequency requested by OPP interface
> >> - * @scaling_max_freq:       Limit maximum frequency requested by OPP interface
> >>    * @stop_polling:   devfreq polling status of a device.
> >>    * @suspend_freq:   frequency of a device set during suspend phase.
> >>    * @resume_freq:    frequency of a device set in resume phase.
> >> @@ -166,8 +164,6 @@ struct devfreq {
> >>
> >>      struct dev_pm_qos_request user_min_freq_req;
> >>      struct dev_pm_qos_request user_max_freq_req;
> >> -    unsigned long scaling_min_freq;
> >> -    unsigned long scaling_max_freq;
> >>      bool stop_polling;
> >>
> >>      unsigned long suspend_freq;
> >>
> >
>


-- 
Best Regards,
Chanwoo Choi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits
  2020-01-13  7:25   ` [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set " Chanwoo Choi
@ 2020-01-14 18:51     ` Matthias Kaehlcke
  0 siblings, 0 replies; 10+ messages in thread
From: Matthias Kaehlcke @ 2020-01-14 18:51 UTC (permalink / raw)
  To: Chanwoo Choi
  Cc: MyungJoo Ham, Kyungmin Park, Zhang Rui, Daniel Lezcano,
	Amit Kucheria, Leonard Crestez, linux-kernel, linux-pm

On Mon, Jan 13, 2020 at 04:25:17PM +0900, Chanwoo Choi wrote:
> Hi,
> 
> On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
> > Now that devfreq supports limiting the frequency range of a device
> > through PM QoS make use of it instead of disabling OPPs that should
> > not be used.
> > 
> > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > ---
> > 
> >  drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
> >  1 file changed, 20 insertions(+), 46 deletions(-)
> > 
> > diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> > index ef59256887ff..3a63603afcf2 100644
> > --- a/drivers/thermal/devfreq_cooling.c
> > +++ b/drivers/thermal/devfreq_cooling.c
> > @@ -24,11 +24,13 @@
> >  #include <linux/idr.h>
> >  #include <linux/slab.h>
> >  #include <linux/pm_opp.h>
> > +#include <linux/pm_qos.h>
> >  #include <linux/thermal.h>
> >  
> >  #include <trace/events/thermal.h>
> >  
> > -#define SCALE_ERROR_MITIGATION 100
> > +#define HZ_PER_KHZ		1000
> > +#define SCALE_ERROR_MITIGATION	100
> >  
> >  static DEFINE_IDA(devfreq_ida);
> >  
> > @@ -65,49 +67,9 @@ struct devfreq_cooling_device {
> >  	struct devfreq_cooling_power *power_ops;
> >  	u32 res_util;
> >  	int capped_state;
> > +	struct dev_pm_qos_request req_max_freq;
> 
> Need to add the description of 'req_max_freq'.

will add the description

> >  };
> >  
> > -/**
> > - * partition_enable_opps() - disable all opps above a given state
> > - * @dfc:	Pointer to devfreq we are operating on
> > - * @cdev_state:	cooling device state we're setting
> > - *
> > - * Go through the OPPs of the device, enabling all OPPs until
> > - * @cdev_state and disabling those frequencies above it.
> > - */
> > -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> > -				 unsigned long cdev_state)
> > -{
> > -	int i;
> > -	struct device *dev = dfc->devfreq->dev.parent;
> > -
> > -	for (i = 0; i < dfc->freq_table_size; i++) {
> > -		struct dev_pm_opp *opp;
> > -		int ret = 0;
> > -		unsigned int freq = dfc->freq_table[i];
> > -		bool want_enable = i >= cdev_state ? true : false;
> > -
> > -		opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> > -
> > -		if (PTR_ERR(opp) == -ERANGE)
> > -			continue;
> > -		else if (IS_ERR(opp))
> > -			return PTR_ERR(opp);
> > -
> > -		dev_pm_opp_put(opp);
> > -
> > -		if (want_enable)
> > -			ret = dev_pm_opp_enable(dev, freq);
> > -		else
> > -			ret = dev_pm_opp_disable(dev, freq);
> > -
> > -		if (ret)
> > -			return ret;
> > -	}
> > -
> > -	return 0;
> > -}
> > -
> >  static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
> >  					 unsigned long *state)
> >  {
> > @@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> >  	struct devfreq_cooling_device *dfc = cdev->devdata;
> >  	struct devfreq *df = dfc->devfreq;
> >  	struct device *dev = df->dev.parent;
> > -	int ret;
> > +	unsigned long freq;
> >  
> >  	if (state == dfc->cooling_state)
> >  		return 0;
> > @@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> >  	if (state >= dfc->freq_table_size)
> >  		return -EINVAL;
> >  
> > -	ret = partition_enable_opps(dfc, state);
> > -	if (ret)
> > -		return ret;
> > +	freq = dfc->freq_table[state];
> > +
> > +	dev_pm_qos_update_request(&dfc->req_max_freq,
> > +				  DIV_ROUND_UP(freq, HZ_PER_KHZ));
> >  
> >  	dfc->cooling_state = state;
> >  
> > @@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> >  	if (err)
> >  		goto free_dfc;
> >  
> > +	err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> > +				     DEV_PM_QOS_MAX_FREQUENCY,
> > +				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> > +	if (err < 0)
> > +		goto remove_qos_req;
> 
> Jump 'free_table' instead of 'remove_qos_req'.

ack

> > +
> >  	err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
> >  	if (err < 0)
> >  		goto free_tables;
> 
> Jump remove_qos_req.

ack

> > @@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> >  
> >  release_ida:
> >  	ida_simple_remove(&devfreq_ida, dfc->id);
> > +
> > +remove_qos_req:
> > +	dev_pm_qos_remove_request(&dfc->req_max_freq);
> > +
> >  free_tables:
> >  	kfree(dfc->power_table);
> >  	kfree(dfc->freq_table);
> > @@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
> >  
> >  	thermal_cooling_device_unregister(dfc->cdev);
> >  	ida_simple_remove(&devfreq_ida, dfc->id);
> > +	dev_pm_qos_remove_request(&dfc->req_max_freq);
> >  	kfree(dfc->power_table);
> >  	kfree(dfc->freq_table);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits
  2020-01-14 16:08   ` Leonard Crestez
@ 2020-01-14 19:07     ` Matthias Kaehlcke
  0 siblings, 0 replies; 10+ messages in thread
From: Matthias Kaehlcke @ 2020-01-14 19:07 UTC (permalink / raw)
  To: Leonard Crestez
  Cc: Chanwoo Choi, MyungJoo Ham, Kyungmin Park, Zhang Rui,
	Daniel Lezcano, Amit Kucheria, linux-kernel, linux-pm,
	Viresh Kumar, Rafael J. Wysocki

On Tue, Jan 14, 2020 at 04:08:38PM +0000, Leonard Crestez wrote:
> On 10.01.2020 19:49, Matthias Kaehlcke wrote:
> > Now that devfreq supports limiting the frequency range of a device
> > through PM QoS make use of it instead of disabling OPPs that should
> > not be used.
> > 
> > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > ---
> 
> It is not obvious but this changes behavior when min max requests 
> conflict (min > max): with PM QoS a MIN_FREQUENCY request takes 
> precedence but if higher OPPs are disabled then this will override 
> MIN_FREQUENCY.

Thanks for pointing this out.

> There are very few users of this functionality so I don't think there 
> are any systems that depend on this behaving one way or the other but 
> perhaps it should be mentioned in commit message?

Sounds good, I'll add a note.

> As far as I can tell the only user of devfreq_cooling in upstream is 
> drivers/gpu/drm/panfrost?

Indeed, apparently GPUs are the primary devfreq device used for cooling,
and unfortunately most GPU drivers are not in upstream.

> >   drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
> >   1 file changed, 20 insertions(+), 46 deletions(-)
> > 
> > diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> > index ef59256887ff..3a63603afcf2 100644
> > --- a/drivers/thermal/devfreq_cooling.c
> > +++ b/drivers/thermal/devfreq_cooling.c
> > @@ -24,11 +24,13 @@
> >   #include <linux/idr.h>
> >   #include <linux/slab.h>
> >   #include <linux/pm_opp.h>
> > +#include <linux/pm_qos.h>
> >   #include <linux/thermal.h>
> >   
> >   #include <trace/events/thermal.h>
> >   
> > -#define SCALE_ERROR_MITIGATION 100
> > +#define HZ_PER_KHZ		1000
> > +#define SCALE_ERROR_MITIGATION	100
> >   
> >   static DEFINE_IDA(devfreq_ida);
> >   
> > @@ -65,49 +67,9 @@ struct devfreq_cooling_device {
> >   	struct devfreq_cooling_power *power_ops;
> >   	u32 res_util;
> >   	int capped_state;
> > +	struct dev_pm_qos_request req_max_freq;
> >   };
> >   
> > -/**
> > - * partition_enable_opps() - disable all opps above a given state
> > - * @dfc:	Pointer to devfreq we are operating on
> > - * @cdev_state:	cooling device state we're setting
> > - *
> > - * Go through the OPPs of the device, enabling all OPPs until
> > - * @cdev_state and disabling those frequencies above it.
> > - */
> > -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> > -				 unsigned long cdev_state)
> > -{
> > -	int i;
> > -	struct device *dev = dfc->devfreq->dev.parent;
> > -
> > -	for (i = 0; i < dfc->freq_table_size; i++) {
> > -		struct dev_pm_opp *opp;
> > -		int ret = 0;
> > -		unsigned int freq = dfc->freq_table[i];
> > -		bool want_enable = i >= cdev_state ? true : false;
> > -
> > -		opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> > -
> > -		if (PTR_ERR(opp) == -ERANGE)
> > -			continue;
> > -		else if (IS_ERR(opp))
> > -			return PTR_ERR(opp);
> > -
> > -		dev_pm_opp_put(opp);
> > -
> > -		if (want_enable)
> > -			ret = dev_pm_opp_enable(dev, freq);
> > -		else
> > -			ret = dev_pm_opp_disable(dev, freq);
> > -
> > -		if (ret)
> > -			return ret;
> > -	}
> > -
> > -	return 0;
> > -}
> > -
> >   static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
> >   					 unsigned long *state)
> >   {
> > @@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> >   	struct devfreq_cooling_device *dfc = cdev->devdata;
> >   	struct devfreq *df = dfc->devfreq;
> >   	struct device *dev = df->dev.parent;
> > -	int ret;
> > +	unsigned long freq;
> >   
> >   	if (state == dfc->cooling_state)
> >   		return 0;
> > @@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> >   	if (state >= dfc->freq_table_size)
> >   		return -EINVAL;
> >   
> > -	ret = partition_enable_opps(dfc, state);
> > -	if (ret)
> > -		return ret;
> > +	freq = dfc->freq_table[state];
> > +
> > +	dev_pm_qos_update_request(&dfc->req_max_freq,
> > +				  DIV_ROUND_UP(freq, HZ_PER_KHZ));
> >   
> >   	dfc->cooling_state = state;
> >   
> > @@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> >   	if (err)
> >   		goto free_dfc;
> >   
> > +	err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> > +				     DEV_PM_QOS_MAX_FREQUENCY,
> > +				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> > +	if (err < 0)
> > +		goto remove_qos_req;
> > +
> >   	err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
> >   	if (err < 0)
> >   		goto free_tables;
> > @@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> >   
> >   release_ida:
> >   	ida_simple_remove(&devfreq_ida, dfc->id);
> > +
> > +remove_qos_req:
> > +	dev_pm_qos_remove_request(&dfc->req_max_freq); > +
> 
> A quirk of the dev_pm_qos API is that dev_pm_qos_remove_request prints a 
> WARN splat if !dev_pm_qos_request_active and this can true on 
> dev_pm_qos_add_request error.
> 
> I dealt with this by checking dev_pm_qos_request_active explicitly but 
> perhaps dev_pm_qos API could be changed? In general "free/release" 
> functions shouldn't complain if there's nothing to do.

I think we should be good here if we jump to 'free_tables' if
_add_request() fails, as requested by Chanwoo. Then _remove_request() is
only called when _add_request() was successful.

> >   free_tables:
> >   	kfree(dfc->power_table);
> >   	kfree(dfc->freq_table);
> > @@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
> >   
> >   	thermal_cooling_device_unregister(dfc->cdev);
> >   	ida_simple_remove(&devfreq_ida, dfc->id);
> > +	dev_pm_qos_remove_request(&dfc->req_max_freq);
> >   	kfree(dfc->power_table);
> >   	kfree(dfc->freq_table);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits
  2020-01-14 17:35         ` Chanwoo Choi
@ 2020-01-14 19:13           ` Matthias Kaehlcke
  0 siblings, 0 replies; 10+ messages in thread
From: Matthias Kaehlcke @ 2020-01-14 19:13 UTC (permalink / raw)
  To: Chanwoo Choi
  Cc: Leonard Crestez, Chanwoo Choi, Viresh Kumar, MyungJoo Ham,
	Kyungmin Park, Zhang Rui, Daniel Lezcano, Amit Kucheria,
	linux-kernel, linux-pm, Rafael J. Wysocki

On Wed, Jan 15, 2020 at 02:35:48AM +0900, Chanwoo Choi wrote:
> On Wed, Jan 15, 2020 at 1:08 AM Leonard Crestez <leonard.crestez@nxp.com> wrote:
> >
> > On 13.01.2020 09:24, Chanwoo Choi wrote:
> > > Hi,
> > >
> > > Any device driver except for devfreq_cooling.c might
> > > use dev_pm_opp_enable/disable interface.
> > > So, don't need to remove the devfreq->scaling_max_freq
> > > and devfreq->scaling_min_freq for supporting OPP interface.
> >
> > It seems that devfreq_cooling was the only upstream user of
> > dev_pm_opp_enable and the remaining callers of dev_pm_opp_disable are
> > probe-time checks.
> 
> OPP interface has still dev_pm_opp_enable and dev_pm_opp_disable
> function. As long as remains them, any device driver related to devfreq
> could call them at some time. The devfreq supports the OPP interface,
> not just for only devfreq_cooling.

I would like to remove the disabled OPP handling since no devfreq device
makes use of dev_pm_opp_enable/disable, but I fear you are right that
we have to keep it as long as the API is available.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-01-14 19:13 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20200110174931epcas1p49c79567945e125829188174293d99850@epcas1p4.samsung.com>
2020-01-10 17:49 ` [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits Matthias Kaehlcke
2020-01-10 17:49   ` [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine " Matthias Kaehlcke
2020-01-13  7:31     ` Chanwoo Choi
2020-01-14 16:08       ` Leonard Crestez
2020-01-14 17:35         ` Chanwoo Choi
2020-01-14 19:13           ` Matthias Kaehlcke
2020-01-13  7:25   ` [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set " Chanwoo Choi
2020-01-14 18:51     ` Matthias Kaehlcke
2020-01-14 16:08   ` Leonard Crestez
2020-01-14 19:07     ` Matthias Kaehlcke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).