[2/4] PM / devfreq: add possibility for delayed work
diff mbox series

Message ID 20191209144425.13321-3-k.konieczny@samsung.com
State New
Headers show
Series
  • PM / devfreq: add possibility for delayed work
Related show

Commit Message

Kamil Konieczny Dec. 9, 2019, 2:44 p.m. UTC
Current devfreq workqueue uses deferred timer. Introduce sysfs
file delayed_timer and use it for change from deferred to
delayed work. The default is to use old deferred one, which
saves power, but can miss increased demand for higher bus
frequency if timer was assigned to idle cpu.

Signed-off-by: Kamil Konieczny <k.konieczny@samsung.com>
---
 Documentation/ABI/testing/sysfs-class-devfreq | 10 ++++
 drivers/devfreq/devfreq.c                     | 46 ++++++++++++++++++-
 include/linux/devfreq.h                       |  2 +
 3 files changed, 57 insertions(+), 1 deletion(-)

Comments

Matthias Kaehlcke Dec. 9, 2019, 7:27 p.m. UTC | #1
Hi,

On Mon, Dec 09, 2019 at 03:44:23PM +0100, Kamil Konieczny wrote:
> Current devfreq workqueue uses deferred timer. Introduce sysfs
> file delayed_timer and use it for change from deferred to
> delayed work. The default is to use old deferred one, which
> saves power, but can miss increased demand for higher bus
> frequency if timer was assigned to idle cpu.
> 
> Signed-off-by: Kamil Konieczny <k.konieczny@samsung.com>
> ---
>  Documentation/ABI/testing/sysfs-class-devfreq | 10 ++++
>  drivers/devfreq/devfreq.c                     | 46 ++++++++++++++++++-
>  include/linux/devfreq.h                       |  2 +
>  3 files changed, 57 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
> index 9758eb85ade3..07bfd0df6a4a 100644
> --- a/Documentation/ABI/testing/sysfs-class-devfreq
> +++ b/Documentation/ABI/testing/sysfs-class-devfreq
> @@ -30,6 +30,16 @@ Description:
>  		target_freq when get_cur_freq() is not implemented by
>  		devfreq driver.
>  
> +What:		/sys/class/devfreq/.../delayed_timer
> +Date:		December 2019
> +Contact:	Kamil Konieczny <k.konieczny@samsung.com>
> +Description:
> +		This ABI shows or clears timer type used by devfreq
> +		workqueue. When 0, it uses default deferred timer.
> +		When set to 1 devfreq will use delayed timer. Example
> +		useage:
> +			echo 1 > /sys/class/devfreq/.../delayed_timer
> +
>  What:		/sys/class/devfreq/.../target_freq
>  Date:		September 2012
>  Contact:	Rajagopal Venkat <rajagopal.venkat@linaro.org>
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 955949c6fc1f..c277d1770fef 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -445,7 +445,11 @@ void devfreq_monitor_start(struct devfreq *devfreq)
>  	if (devfreq->governor->interrupt_driven)
>  		return;
>  
> -	INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
> +	if (devfreq->delayed_timer)
> +		INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
> +	else
> +		INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
> +
>  	if (devfreq->profile->polling_ms)
>  		queue_delayed_work(devfreq_wq, &devfreq->work,
>  			msecs_to_jiffies(devfreq->profile->polling_ms));
> @@ -698,6 +702,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  	devfreq->last_status.current_frequency = profile->initial_freq;
>  	devfreq->data = data;
>  	devfreq->nb.notifier_call = devfreq_notifier_call;
> +	devfreq->delayed_timer = false;

devfreq is zero initialized (allocated with kzalloc()), hence this is
unnecessary.

>  
>  	if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
>  		mutex_unlock(&devfreq->lock);
> @@ -1288,6 +1293,44 @@ static ssize_t available_governors_show(struct device *d,
>  }
>  static DEVICE_ATTR_RO(available_governors);
>  
> +static ssize_t delayed_timer_show(struct device *dev,
> +				  struct device_attribute *attr, char *buf)
> +{
> +	int i;
> +
> +	i = to_devfreq(dev)->delayed_timer ? 1 : 0;
> +	return sprintf(buf, "%d\n", i);

get rid of 'i' and just use df->delayed_timer in sprintf().

> +}
> +
> +static ssize_t delayed_timer_store(struct device *dev,
> +				   struct device_attribute *attr,
> +				   const char *buf, size_t count)
> +{
> +	struct devfreq *df = to_devfreq(dev);
> +	bool old_timer;

Not a great name, the variable doesn't hold a timer. I'd suggest something
like 'prev_val'.

> +	int value, ret;
> +
> +	if (!df->governor)
> +		return -EINVAL;
> +
> +	ret = kstrtoint(buf, 10, &value);
> +	if (ret || (value != 1 && value != 0))
> +		return -EINVAL;

use kstrtobool() instead of partially re-implementing it.

> +
> +	mutex_lock(&df->lock);
> +	old_timer = df->delayed_timer;
> +	df->delayed_timer = value == 0 ? false : true;

What's wrong with:

df->delayed_timer = value;

?

> +	mutex_unlock(&df->lock);

Does the locking as is actually provide any value? The use case seems to
be concurrent setting of the sysfs attribute. The lock is released after
the assignment, hence the value of 'df->delayed_timer' could be overwritten
before the condition below is evaluated.

If you want to protect against this case you need something like this:

// don't release the lock before evaluating the condition

> +	if (old_timer != df->delayed_timer) {
  	   	mutex_unlock(&df->lock);
> +		devfreq_monitor_stop(df);
> +		devfreq_monitor_start(df);
> +	}
  	else {
		mutex_unlock(&df->lock);
	}

I don't pretend it's pretty ;-)
Chanwoo Choi Dec. 10, 2019, 1:47 a.m. UTC | #2
On 12/9/19 11:44 PM, Kamil Konieczny wrote:
> Current devfreq workqueue uses deferred timer. Introduce sysfs
> file delayed_timer and use it for change from deferred to
> delayed work. The default is to use old deferred one, which
> saves power, but can miss increased demand for higher bus
> frequency if timer was assigned to idle cpu.

As I commented on patch1, If you hope to change the feature
related to both performance  and power-consumption, 
you have to suggest the reasonable data with test result
on multiple scenarios.

Firstly,
I don't agree to add 'delayed_timer' sysfs entries.
If some device driver want to use the different type of
workqueue, they can choice the workqueue type in the
probe function of device driver. 

Secondly, the 'dealyed_timer' is not for all devfreq
device driver. Only devfreq device driver uses the
'simple_ondemand' governor. It is wrong to show
without any specific reason.


If you suggest the reasonable data with test result,
I prefer to add the new flag to 'struct devfreq_dev_profile'.

> 
> Signed-off-by: Kamil Konieczny <k.konieczny@samsung.com>
> ---
>  Documentation/ABI/testing/sysfs-class-devfreq | 10 ++++
>  drivers/devfreq/devfreq.c                     | 46 ++++++++++++++++++-
>  include/linux/devfreq.h                       |  2 +
>  3 files changed, 57 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
> index 9758eb85ade3..07bfd0df6a4a 100644
> --- a/Documentation/ABI/testing/sysfs-class-devfreq
> +++ b/Documentation/ABI/testing/sysfs-class-devfreq
> @@ -30,6 +30,16 @@ Description:
>  		target_freq when get_cur_freq() is not implemented by
>  		devfreq driver.
>  
> +What:		/sys/class/devfreq/.../delayed_timer
> +Date:		December 2019
> +Contact:	Kamil Konieczny <k.konieczny@samsung.com>
> +Description:
> +		This ABI shows or clears timer type used by devfreq
> +		workqueue. When 0, it uses default deferred timer.
> +		When set to 1 devfreq will use delayed timer. Example
> +		useage:
> +			echo 1 > /sys/class/devfreq/.../delayed_timer
> +
>  What:		/sys/class/devfreq/.../target_freq
>  Date:		September 2012
>  Contact:	Rajagopal Venkat <rajagopal.venkat@linaro.org>
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 955949c6fc1f..c277d1770fef 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -445,7 +445,11 @@ void devfreq_monitor_start(struct devfreq *devfreq)
>  	if (devfreq->governor->interrupt_driven)
>  		return;
>  
> -	INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
> +	if (devfreq->delayed_timer)
> +		INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
> +	else
> +		INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
> +
>  	if (devfreq->profile->polling_ms)
>  		queue_delayed_work(devfreq_wq, &devfreq->work,
>  			msecs_to_jiffies(devfreq->profile->polling_ms));
> @@ -698,6 +702,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  	devfreq->last_status.current_frequency = profile->initial_freq;
>  	devfreq->data = data;
>  	devfreq->nb.notifier_call = devfreq_notifier_call;
> +	devfreq->delayed_timer = false;
>  
>  	if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
>  		mutex_unlock(&devfreq->lock);
> @@ -1288,6 +1293,44 @@ static ssize_t available_governors_show(struct device *d,
>  }
>  static DEVICE_ATTR_RO(available_governors);
>  
> +static ssize_t delayed_timer_show(struct device *dev,
> +				  struct device_attribute *attr, char *buf)
> +{
> +	int i;
> +
> +	i = to_devfreq(dev)->delayed_timer ? 1 : 0;
> +	return sprintf(buf, "%d\n", i);
> +}
> +
> +static ssize_t delayed_timer_store(struct device *dev,
> +				   struct device_attribute *attr,
> +				   const char *buf, size_t count)
> +{
> +	struct devfreq *df = to_devfreq(dev);
> +	bool old_timer;
> +	int value, ret;
> +
> +	if (!df->governor)
> +		return -EINVAL;
> +
> +	ret = kstrtoint(buf, 10, &value);
> +	if (ret || (value != 1 && value != 0))
> +		return -EINVAL;
> +
> +	mutex_lock(&df->lock);
> +	old_timer = df->delayed_timer;
> +	df->delayed_timer = value == 0 ? false : true;
> +	mutex_unlock(&df->lock);
> +
> +	if (old_timer != df->delayed_timer) {
> +		devfreq_monitor_stop(df);
> +		devfreq_monitor_start(df);
> +	}
> +
> +	return count;
> +}
> +static DEVICE_ATTR_RW(delayed_timer);
> +
>  static ssize_t cur_freq_show(struct device *dev, struct device_attribute *attr,
>  			     char *buf)
>  {
> @@ -1513,6 +1556,7 @@ static struct attribute *devfreq_attrs[] = {
>  	&dev_attr_name.attr,
>  	&dev_attr_governor.attr,
>  	&dev_attr_available_governors.attr,
> +	&dev_attr_delayed_timer.attr,
>  	&dev_attr_cur_freq.attr,
>  	&dev_attr_available_frequencies.attr,
>  	&dev_attr_target_freq.attr,
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index de2fdc56aa5b..761aa0a09db7 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -134,6 +134,7 @@ struct devfreq_stats {
>   *		reevaluate operable frequencies. Devfreq users may use
>   *		devfreq.nb to the corresponding register notifier call chain.
>   * @work:	delayed work for load monitoring.
> + * @delayed_timer:	use delayed or deferred timer for workqueue.
>   * @previous_freq:	previously configured frequency value.
>   * @data:	Private data of the governor. The devfreq framework does not
>   *		touch this.
> @@ -166,6 +167,7 @@ struct devfreq {
>  	char governor_name[DEVFREQ_NAME_LEN];
>  	struct notifier_block nb;
>  	struct delayed_work work;
> +	bool delayed_timer;
>  
>  	unsigned long previous_freq;
>  	struct devfreq_dev_status last_status;
>
Kamil Konieczny Dec. 10, 2019, 9:03 a.m. UTC | #3
Hi Chanwoo,

On 10.12.2019 02:47, Chanwoo Choi wrote:
> On 12/9/19 11:44 PM, Kamil Konieczny wrote:
>> Current devfreq workqueue uses deferred timer. Introduce sysfs
>> file delayed_timer and use it for change from deferred to
>> delayed work. The default is to use old deferred one, which
>> saves power, but can miss increased demand for higher bus
>> frequency if timer was assigned to idle cpu.
> 
> As I commented on patch1, If you hope to change the feature
> related to both performance  and power-consumption, 
> you have to suggest the reasonable data with test result
> on multiple scenarios.

Unfortunatly I do not have such tests. Do you have them ?
May you share them with me or Marek ?

> Firstly,
> I don't agree to add 'delayed_timer' sysfs entries.
> If some device driver want to use the different type of
> workqueue, they can choice the workqueue type in the
> probe function of device driver. 

sysfs allows change in runtime

> Secondly, the 'dealyed_timer' is not for all devfreq
> device driver. Only devfreq device driver uses the
> 'simple_ondemand' governor. It is wrong to show
> without any specific reason.

Good point, performance or powersave with fixed max or min freq
do not need them.

> If you suggest the reasonable data with test result,
> I prefer to add the new flag to 'struct devfreq_dev_profile'.

imho users of devfreq may give it a try and perform tests themselfs
and then share results.

>>
>> Signed-off-by: Kamil Konieczny <k.konieczny@samsung.com>
>> ---
>>  Documentation/ABI/testing/sysfs-class-devfreq | 10 ++++
>>  drivers/devfreq/devfreq.c                     | 46 ++++++++++++++++++-
>>  include/linux/devfreq.h                       |  2 +
>>  3 files changed, 57 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
>> index 9758eb85ade3..07bfd0df6a4a 100644
>> --- a/Documentation/ABI/testing/sysfs-class-devfreq
>> +++ b/Documentation/ABI/testing/sysfs-class-devfreq
>> @@ -30,6 +30,16 @@ Description:
>>  		target_freq when get_cur_freq() is not implemented by
>>  		devfreq driver.
>>  
>> +What:		/sys/class/devfreq/.../delayed_timer
>> +Date:		December 2019
>> +Contact:	Kamil Konieczny <k.konieczny@samsung.com>
>> +Description:
>> +		This ABI shows or clears timer type used by devfreq
>> +		workqueue. When 0, it uses default deferred timer.
>> +		When set to 1 devfreq will use delayed timer. Example
>> +		useage:
>> +			echo 1 > /sys/class/devfreq/.../delayed_timer
>> +
>>  What:		/sys/class/devfreq/.../target_freq
>>  Date:		September 2012
>>  Contact:	Rajagopal Venkat <rajagopal.venkat@linaro.org>
>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>> index 955949c6fc1f..c277d1770fef 100644
>> --- a/drivers/devfreq/devfreq.c
>> +++ b/drivers/devfreq/devfreq.c
>> @@ -445,7 +445,11 @@ void devfreq_monitor_start(struct devfreq *devfreq)
>>  	if (devfreq->governor->interrupt_driven)
>>  		return;
>>  
>> -	INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
>> +	if (devfreq->delayed_timer)
>> +		INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
>> +	else
>> +		INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
>> +
>>  	if (devfreq->profile->polling_ms)
>>  		queue_delayed_work(devfreq_wq, &devfreq->work,
>>  			msecs_to_jiffies(devfreq->profile->polling_ms));
>> @@ -698,6 +702,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>  	devfreq->last_status.current_frequency = profile->initial_freq;
>>  	devfreq->data = data;
>>  	devfreq->nb.notifier_call = devfreq_notifier_call;
>> +	devfreq->delayed_timer = false;
>>  
>>  	if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
>>  		mutex_unlock(&devfreq->lock);
>> @@ -1288,6 +1293,44 @@ static ssize_t available_governors_show(struct device *d,
>>  }
>>  static DEVICE_ATTR_RO(available_governors);
>>  
>> +static ssize_t delayed_timer_show(struct device *dev,
>> +				  struct device_attribute *attr, char *buf)
>> +{
>> +	int i;
>> +
>> +	i = to_devfreq(dev)->delayed_timer ? 1 : 0;
>> +	return sprintf(buf, "%d\n", i);
>> +}
>> +
>> +static ssize_t delayed_timer_store(struct device *dev,
>> +				   struct device_attribute *attr,
>> +				   const char *buf, size_t count)
>> +{
>> +	struct devfreq *df = to_devfreq(dev);
>> +	bool old_timer;
>> +	int value, ret;
>> +
>> +	if (!df->governor)
>> +		return -EINVAL;
>> +
>> +	ret = kstrtoint(buf, 10, &value);
>> +	if (ret || (value != 1 && value != 0))
>> +		return -EINVAL;
>> +
>> +	mutex_lock(&df->lock);
>> +	old_timer = df->delayed_timer;
>> +	df->delayed_timer = value == 0 ? false : true;
>> +	mutex_unlock(&df->lock);
>> +
>> +	if (old_timer != df->delayed_timer) {
>> +		devfreq_monitor_stop(df);
>> +		devfreq_monitor_start(df);
>> +	}
>> +
>> +	return count;
>> +}
>> +static DEVICE_ATTR_RW(delayed_timer);
>> +
>>  static ssize_t cur_freq_show(struct device *dev, struct device_attribute *attr,
>>  			     char *buf)
>>  {
>> @@ -1513,6 +1556,7 @@ static struct attribute *devfreq_attrs[] = {
>>  	&dev_attr_name.attr,
>>  	&dev_attr_governor.attr,
>>  	&dev_attr_available_governors.attr,
>> +	&dev_attr_delayed_timer.attr,
>>  	&dev_attr_cur_freq.attr,
>>  	&dev_attr_available_frequencies.attr,
>>  	&dev_attr_target_freq.attr,
>> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
>> index de2fdc56aa5b..761aa0a09db7 100644
>> --- a/include/linux/devfreq.h
>> +++ b/include/linux/devfreq.h
>> @@ -134,6 +134,7 @@ struct devfreq_stats {
>>   *		reevaluate operable frequencies. Devfreq users may use
>>   *		devfreq.nb to the corresponding register notifier call chain.
>>   * @work:	delayed work for load monitoring.
>> + * @delayed_timer:	use delayed or deferred timer for workqueue.
>>   * @previous_freq:	previously configured frequency value.
>>   * @data:	Private data of the governor. The devfreq framework does not
>>   *		touch this.
>> @@ -166,6 +167,7 @@ struct devfreq {
>>  	char governor_name[DEVFREQ_NAME_LEN];
>>  	struct notifier_block nb;
>>  	struct delayed_work work;
>> +	bool delayed_timer;
>>  
>>  	unsigned long previous_freq;
>>  	struct devfreq_dev_status last_status;
>>
> 
>
Chanwoo Choi Dec. 10, 2019, 9:25 a.m. UTC | #4
On 12/10/19 6:03 PM, Kamil Konieczny wrote:
> Hi Chanwoo,
> 
> On 10.12.2019 02:47, Chanwoo Choi wrote:
>> On 12/9/19 11:44 PM, Kamil Konieczny wrote:
>>> Current devfreq workqueue uses deferred timer. Introduce sysfs
>>> file delayed_timer and use it for change from deferred to
>>> delayed work. The default is to use old deferred one, which
>>> saves power, but can miss increased demand for higher bus
>>> frequency if timer was assigned to idle cpu.
>>
>> As I commented on patch1, If you hope to change the feature
>> related to both performance  and power-consumption, 
>> you have to suggest the reasonable data with test result
>> on multiple scenarios.
> 
> Unfortunatly I do not have such tests. Do you have them ?
> May you share them with me or Marek ?

You have to do some test for this patch
because you tried to use the 'delayed work' by this patch.

Actually, If possible, I want to apply some feature
if there are any real requirements.

> 
>> Firstly,
>> I don't agree to add 'delayed_timer' sysfs entries.
>> If some device driver want to use the different type of
>> workqueue, they can choice the workqueue type in the
>> probe function of device driver. 
> 
> sysfs allows change in runtime

hmm. I don't say that sysfs can't change the value at the runtime.
It is not proper answer from my opinion. Without any answer
from my opinion, you just say the feature of sysfs interface.
You and me already knew that user can change the value via sysfs.

> 
>> Secondly, the 'dealyed_timer' is not for all devfreq
>> device driver. Only devfreq device driver uses the
>> 'simple_ondemand' governor. It is wrong to show
>> without any specific reason.
> 
> Good point, performance or powersave with fixed max or min freq
> do not need them.
> 
>> If you suggest the reasonable data with test result,
>> I prefer to add the new flag to 'struct devfreq_dev_profile'.
> 
> imho users of devfreq may give it a try and perform tests themselfs
> and then share results.

As I commented above, I want to apply some feature
if there are any real requirements.

> 
>>>
>>> Signed-off-by: Kamil Konieczny <k.konieczny@samsung.com>
>>> ---
>>>  Documentation/ABI/testing/sysfs-class-devfreq | 10 ++++
>>>  drivers/devfreq/devfreq.c                     | 46 ++++++++++++++++++-
>>>  include/linux/devfreq.h                       |  2 +
>>>  3 files changed, 57 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
>>> index 9758eb85ade3..07bfd0df6a4a 100644
>>> --- a/Documentation/ABI/testing/sysfs-class-devfreq
>>> +++ b/Documentation/ABI/testing/sysfs-class-devfreq
>>> @@ -30,6 +30,16 @@ Description:
>>>  		target_freq when get_cur_freq() is not implemented by
>>>  		devfreq driver.
>>>  
>>> +What:		/sys/class/devfreq/.../delayed_timer
>>> +Date:		December 2019
>>> +Contact:	Kamil Konieczny <k.konieczny@samsung.com>
>>> +Description:
>>> +		This ABI shows or clears timer type used by devfreq
>>> +		workqueue. When 0, it uses default deferred timer.
>>> +		When set to 1 devfreq will use delayed timer. Example
>>> +		useage:
>>> +			echo 1 > /sys/class/devfreq/.../delayed_timer
>>> +
>>>  What:		/sys/class/devfreq/.../target_freq
>>>  Date:		September 2012
>>>  Contact:	Rajagopal Venkat <rajagopal.venkat@linaro.org>
>>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>>> index 955949c6fc1f..c277d1770fef 100644
>>> --- a/drivers/devfreq/devfreq.c
>>> +++ b/drivers/devfreq/devfreq.c
>>> @@ -445,7 +445,11 @@ void devfreq_monitor_start(struct devfreq *devfreq)
>>>  	if (devfreq->governor->interrupt_driven)
>>>  		return;
>>>  
>>> -	INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
>>> +	if (devfreq->delayed_timer)
>>> +		INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
>>> +	else
>>> +		INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
>>> +
>>>  	if (devfreq->profile->polling_ms)
>>>  		queue_delayed_work(devfreq_wq, &devfreq->work,
>>>  			msecs_to_jiffies(devfreq->profile->polling_ms));
>>> @@ -698,6 +702,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>>  	devfreq->last_status.current_frequency = profile->initial_freq;
>>>  	devfreq->data = data;
>>>  	devfreq->nb.notifier_call = devfreq_notifier_call;
>>> +	devfreq->delayed_timer = false;
>>>  
>>>  	if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
>>>  		mutex_unlock(&devfreq->lock);
>>> @@ -1288,6 +1293,44 @@ static ssize_t available_governors_show(struct device *d,
>>>  }
>>>  static DEVICE_ATTR_RO(available_governors);
>>>  
>>> +static ssize_t delayed_timer_show(struct device *dev,
>>> +				  struct device_attribute *attr, char *buf)
>>> +{
>>> +	int i;
>>> +
>>> +	i = to_devfreq(dev)->delayed_timer ? 1 : 0;
>>> +	return sprintf(buf, "%d\n", i);
>>> +}
>>> +
>>> +static ssize_t delayed_timer_store(struct device *dev,
>>> +				   struct device_attribute *attr,
>>> +				   const char *buf, size_t count)
>>> +{
>>> +	struct devfreq *df = to_devfreq(dev);
>>> +	bool old_timer;
>>> +	int value, ret;
>>> +
>>> +	if (!df->governor)
>>> +		return -EINVAL;
>>> +
>>> +	ret = kstrtoint(buf, 10, &value);
>>> +	if (ret || (value != 1 && value != 0))
>>> +		return -EINVAL;
>>> +
>>> +	mutex_lock(&df->lock);
>>> +	old_timer = df->delayed_timer;
>>> +	df->delayed_timer = value == 0 ? false : true;
>>> +	mutex_unlock(&df->lock);
>>> +
>>> +	if (old_timer != df->delayed_timer) {
>>> +		devfreq_monitor_stop(df);
>>> +		devfreq_monitor_start(df);
>>> +	}
>>> +
>>> +	return count;
>>> +}
>>> +static DEVICE_ATTR_RW(delayed_timer);
>>> +
>>>  static ssize_t cur_freq_show(struct device *dev, struct device_attribute *attr,
>>>  			     char *buf)
>>>  {
>>> @@ -1513,6 +1556,7 @@ static struct attribute *devfreq_attrs[] = {
>>>  	&dev_attr_name.attr,
>>>  	&dev_attr_governor.attr,
>>>  	&dev_attr_available_governors.attr,
>>> +	&dev_attr_delayed_timer.attr,
>>>  	&dev_attr_cur_freq.attr,
>>>  	&dev_attr_available_frequencies.attr,
>>>  	&dev_attr_target_freq.attr,
>>> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
>>> index de2fdc56aa5b..761aa0a09db7 100644
>>> --- a/include/linux/devfreq.h
>>> +++ b/include/linux/devfreq.h
>>> @@ -134,6 +134,7 @@ struct devfreq_stats {
>>>   *		reevaluate operable frequencies. Devfreq users may use
>>>   *		devfreq.nb to the corresponding register notifier call chain.
>>>   * @work:	delayed work for load monitoring.
>>> + * @delayed_timer:	use delayed or deferred timer for workqueue.
>>>   * @previous_freq:	previously configured frequency value.
>>>   * @data:	Private data of the governor. The devfreq framework does not
>>>   *		touch this.
>>> @@ -166,6 +167,7 @@ struct devfreq {
>>>  	char governor_name[DEVFREQ_NAME_LEN];
>>>  	struct notifier_block nb;
>>>  	struct delayed_work work;
>>> +	bool delayed_timer;
>>>  
>>>  	unsigned long previous_freq;
>>>  	struct devfreq_dev_status last_status;
>>>
>>
>>
>
Kamil Konieczny Dec. 10, 2019, 10:15 a.m. UTC | #5
Hi,

On 09.12.2019 20:27, Matthias Kaehlcke wrote:
> Hi,
> 
> On Mon, Dec 09, 2019 at 03:44:23PM +0100, Kamil Konieczny wrote:
>> Current devfreq workqueue uses deferred timer. Introduce sysfs
>> file delayed_timer and use it for change from deferred to
>> delayed work. The default is to use old deferred one, which
>> saves power, but can miss increased demand for higher bus
>> frequency if timer was assigned to idle cpu.
>>
>> Signed-off-by: Kamil Konieczny <k.konieczny@samsung.com>
>> ---
>>  Documentation/ABI/testing/sysfs-class-devfreq | 10 ++++
>>  drivers/devfreq/devfreq.c                     | 46 ++++++++++++++++++-
>>  include/linux/devfreq.h                       |  2 +
>>  3 files changed, 57 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
>> index 9758eb85ade3..07bfd0df6a4a 100644
>> --- a/Documentation/ABI/testing/sysfs-class-devfreq
>> +++ b/Documentation/ABI/testing/sysfs-class-devfreq
>> @@ -30,6 +30,16 @@ Description:
>>  		target_freq when get_cur_freq() is not implemented by
>>  		devfreq driver.
>>  
>> +What:		/sys/class/devfreq/.../delayed_timer
>> +Date:		December 2019
>> +Contact:	Kamil Konieczny <k.konieczny@samsung.com>
>> +Description:
>> +		This ABI shows or clears timer type used by devfreq
>> +		workqueue. When 0, it uses default deferred timer.
>> +		When set to 1 devfreq will use delayed timer. Example
>> +		useage:
>> +			echo 1 > /sys/class/devfreq/.../delayed_timer
>> +
>>  What:		/sys/class/devfreq/.../target_freq
>>  Date:		September 2012
>>  Contact:	Rajagopal Venkat <rajagopal.venkat@linaro.org>
>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>> index 955949c6fc1f..c277d1770fef 100644
>> --- a/drivers/devfreq/devfreq.c
>> +++ b/drivers/devfreq/devfreq.c
>> @@ -445,7 +445,11 @@ void devfreq_monitor_start(struct devfreq *devfreq)
>>  	if (devfreq->governor->interrupt_driven)
>>  		return;
>>  
>> -	INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
>> +	if (devfreq->delayed_timer)
>> +		INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
>> +	else
>> +		INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
>> +
>>  	if (devfreq->profile->polling_ms)
>>  		queue_delayed_work(devfreq_wq, &devfreq->work,
>>  			msecs_to_jiffies(devfreq->profile->polling_ms));
>> @@ -698,6 +702,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>  	devfreq->last_status.current_frequency = profile->initial_freq;
>>  	devfreq->data = data;
>>  	devfreq->nb.notifier_call = devfreq_notifier_call;
>> +	devfreq->delayed_timer = false;
> 
> devfreq is zero initialized (allocated with kzalloc()), hence this is
> unnecessary.

ok, I will remove it

> 
>>  
>>  	if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
>>  		mutex_unlock(&devfreq->lock);
>> @@ -1288,6 +1293,44 @@ static ssize_t available_governors_show(struct device *d,
>>  }
>>  static DEVICE_ATTR_RO(available_governors);
>>  
>> +static ssize_t delayed_timer_show(struct device *dev,
>> +				  struct device_attribute *attr, char *buf)
>> +{
>> +	int i;
>> +
>> +	i = to_devfreq(dev)->delayed_timer ? 1 : 0;
>> +	return sprintf(buf, "%d\n", i);
> 
> get rid of 'i' and just use df->delayed_timer in sprintf().

ok

> 
>> +}
>> +
>> +static ssize_t delayed_timer_store(struct device *dev,
>> +				   struct device_attribute *attr,
>> +				   const char *buf, size_t count)
>> +{
>> +	struct devfreq *df = to_devfreq(dev);
>> +	bool old_timer;
> 
> Not a great name, the variable doesn't hold a timer. I'd suggest something
> like 'prev_val'.

ok, will change to 'bool prev, value;'

> 
>> +	int value, ret;
>> +
>> +	if (!df->governor)
>> +		return -EINVAL;
>> +
>> +	ret = kstrtoint(buf, 10, &value);
>> +	if (ret || (value != 1 && value != 0))
>> +		return -EINVAL;
> 
> use kstrtobool() instead of partially re-implementing it.

ok

>> +
>> +	mutex_lock(&df->lock);
>> +	old_timer = df->delayed_timer;
>> +	df->delayed_timer = value == 0 ? false : true;
> 
> What's wrong with:
> 
> df->delayed_timer = value;
> 
> ?

ok, when I change type of value and use above kstrtobool, this will be ok

> 
>> +	mutex_unlock(&df->lock);
> 
> Does the locking as is actually provide any value? The use case seems to
> be concurrent setting of the sysfs attribute. The lock is released after
> the assignment, hence the value of 'df->delayed_timer' could be overwritten
> before the condition below is evaluated.

Good point, before send I spotted these lines and "optimized" code so it got worse...

> If you want to protect against this case you need something like this:
> 
> // don't release the lock before evaluating the condition
> 
>> +	if (old_timer != df->delayed_timer) {

even better: if (prev != value) {
so no need for ugly mutex_unlock in both if-else paths

>   	   	mutex_unlock(&df->lock);

>> +		devfreq_monitor_stop(df);
>> +		devfreq_monitor_start(df);
>> +	}
>   	else {
> 		mutex_unlock(&df->lock);
> 	}
> 
> I don't pretend it's pretty ;-)

Thank you for review.
Kamil Konieczny Jan. 20, 2020, 2:20 p.m. UTC | #6
Hi Chanwoo,

here are some logs from Exynos XU3 with next-20200113
As you can see from them, counters saturate to -1 and then are useless.
The solution will be to change workqueue type from DEFERRED into DELAYED
(default polling time 50ms or few times more seems fine).

I changed exynos-nocp for some more debug data:

--- a/drivers/devfreq/event/exynos-nocp.c
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -167,7 +167,8 @@ static int exynos_nocp_get_event(struct devfreq_event_dev *edev,
        edata->load_count = ((counter[1] << 16) | counter[0]);
        edata->total_count = ((counter[3] << 16) | counter[2]);
 
-       dev_dbg(&edev->dev, "%s (event: %ld/%ld)\n", edev->desc->name,
+       dev_dbg(&edev->dev, "%s (event: %ld/%ld) %08lx/%08lx\n", edev->desc->name,
+edata->load_count, edata->total_count,
 edata->load_count, edata->total_count);

Below are logs:

root@target:~/devfreq# cat nocp_debug
echo -n 'func exynos_nocp_get_event +p' > /sys/kernel/debug/dynamic_debug/control

root@target:~/devfreq# dmesg|tail
[  171.619022] devfreq-event event2: nocp@10ca1800 (event: 15170920/4932961) 00e77d68/004b4561
[  171.619088] devfreq-event event3: nocp@10ca1c00 (event: 0/4931345) 00000000/004b3f11
[  194.358276] devfreq-event event0: nocp@10ca1000 (event: -1/1875960257) ffffffff/6fd0e1c1
[  194.358529] devfreq-event event1: nocp@10ca1400 (event: 0/1875978297) 00000000/6fd12839
[  194.358860] devfreq-event event2: nocp@10ca1800 (event: -1/1875994997) ffffffff/6fd16975
[  194.359162] devfreq-event event3: nocp@10ca1c00 (event: 0/1876014197) 00000000/6fd1b475
[  194.457796] devfreq-event event0: nocp@10ca1000 (event: 25127060/8085549) 017f6894/007b602d
[  194.457930] devfreq-event event1: nocp@10ca1400 (event: 0/8062505) 00000000/007b0629
[  194.458044] devfreq-event event2: nocp@10ca1800 (event: 24674808/8032785) 017881f8/007a9211
[  194.458160] devfreq-event event3: nocp@10ca1c00 (event: 0/8014513) 00000000/007a4ab1

root@target:~/devfreq# dmesg|tail                                                                                             
[  938.626509] devfreq-event event2: nocp@10ca1800 (event: -1/-1) ffffffff/ffffffff                                           
[  938.626656] devfreq-event event3: nocp@10ca1c00 (event: 0/-1) 00000000/ffffffff                                            
[ 1026.756029] devfreq-event event0: nocp@10ca1000 (event: -1/-1) ffffffff/ffffffff                                           
[ 1026.756386] devfreq-event event1: nocp@10ca1400 (event: 0/-1) 00000000/ffffffff                                            
[ 1026.756620] devfreq-event event2: nocp@10ca1800 (event: -1/-1) ffffffff/ffffffff                                           
[ 1026.756831] devfreq-event event3: nocp@10ca1c00 (event: 0/-1) 00000000/ffffffff                                            
[ 1026.875345] devfreq-event event0: nocp@10ca1000 (event: 30306432/9742217) 01ce7080/0094a789                                
[ 1026.875466] devfreq-event event1: nocp@10ca1400 (event: 0/9733001) 00000000/00948389                                       
[ 1026.875562] devfreq-event event2: nocp@10ca1800 (event: 29891212/9717537) 01c81a8c/00944721                                
[ 1026.875658] devfreq-event event3: nocp@10ca1c00 (event: 0/9702245) 00000000/00940b65                                       
root@target:~/devfreq# dmesg|tail                                                                                             
[ 1231.215227] devfreq-event event2: nocp@10ca1800 (event: 20069796/6510789) 01323da4/006358c5                                
[ 1231.215362] devfreq-event event3: nocp@10ca1c00 (event: 0/6505265) 00000000/00634331                                       
[ 1231.274744] devfreq-event event0: nocp@10ca1000 (event: 15373176/4877213) 00ea9378/004a6b9d                                
[ 1231.274840] devfreq-event event1: nocp@10ca1400 (event: 0/4869301) 00000000/004a4cb5                                       
[ 1231.274928] devfreq-event event2: nocp@10ca1800 (event: 15107524/4860117) 00e685c4/004a28d5                                
[ 1231.275017] devfreq-event event3: nocp@10ca1c00 (event: 0/4850901) 00000000/004a04d5                                       
[ 1345.925227] devfreq-event event0: nocp@10ca1000 (event: -1/-1) ffffffff/ffffffff                                           
[ 1345.925540] devfreq-event event1: nocp@10ca1400 (event: 0/-1) 00000000/ffffffff                                            
[ 1345.925751] devfreq-event event2: nocp@10ca1800 (event: -1/-1) ffffffff/ffffffff                                           
[ 1345.926095] devfreq-event event3: nocp@10ca1c00 (event: 0/-1) 00000000/ffffffff                                            
root@target:~/devfreq# uname -a                                                                                               
Linux target 5.5.0-rc5-next-20200113-00001-g9c27c4bdca7e #82 SMP PREEMPT Mon Jan 20 14:31:11 CET 2020 armv7l GNU/Linux        
root@target:~/devfreq# cat /sys/class/devfreq/devfreq2/trans_stat                                                             
     From  :   To                                                                                                             
           :  88700000 133000000 177400000 266000000 532000000   time(ms)                                                     
*  88700000:         0         0         0         0        31    660390                                                      
  133000000:         4         0         0         0         3     32630                                                      
  177400000:         2         0         0         0         1    103520                                                      
  266000000:         1         2         1         0         0      8020                                                      
  532000000:        24         5         2         4         0    680050                                                      
Total transition : 80

On 10.12.2019 10:03, Kamil Konieczny wrote:
> Hi Chanwoo,
> 
> On 10.12.2019 02:47, Chanwoo Choi wrote:
>> On 12/9/19 11:44 PM, Kamil Konieczny wrote:
>>> Current devfreq workqueue uses deferred timer. Introduce sysfs
>>> file delayed_timer and use it for change from deferred to
>>> delayed work. The default is to use old deferred one, which
>>> saves power, but can miss increased demand for higher bus
>>> frequency if timer was assigned to idle cpu.
>>
>> As I commented on patch1, If you hope to change the feature
>> related to both performance  and power-consumption, 
>> you have to suggest the reasonable data with test result
>> on multiple scenarios.
> 
> Unfortunatly I do not have such tests. Do you have them ?
> May you share them with me or Marek ?
> 
>> Firstly,
>> I don't agree to add 'delayed_timer' sysfs entries.
>> If some device driver want to use the different type of
>> workqueue, they can choice the workqueue type in the
>> probe function of device driver. 
> 
> sysfs allows change in runtime
> 
>> Secondly, the 'dealyed_timer' is not for all devfreq
>> device driver. Only devfreq device driver uses the
>> 'simple_ondemand' governor. It is wrong to show
>> without any specific reason.
> 
> Good point, performance or powersave with fixed max or min freq
> do not need them.
> 
>> If you suggest the reasonable data with test result,
>> I prefer to add the new flag to 'struct devfreq_dev_profile'.
> 
> imho users of devfreq may give it a try and perform tests themselfs
> and then share results.
> 
>>>
>>> Signed-off-by: Kamil Konieczny <k.konieczny@samsung.com>
>>> ---
>>>  Documentation/ABI/testing/sysfs-class-devfreq | 10 ++++
>>>  drivers/devfreq/devfreq.c                     | 46 ++++++++++++++++++-
>>>  include/linux/devfreq.h                       |  2 +
>>>  3 files changed, 57 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
>>> index 9758eb85ade3..07bfd0df6a4a 100644
>>> --- a/Documentation/ABI/testing/sysfs-class-devfreq
>>> +++ b/Documentation/ABI/testing/sysfs-class-devfreq
>>> @@ -30,6 +30,16 @@ Description:
>>>  		target_freq when get_cur_freq() is not implemented by
>>>  		devfreq driver.
>>>  
>>> +What:		/sys/class/devfreq/.../delayed_timer
>>> +Date:		December 2019
>>> +Contact:	Kamil Konieczny <k.konieczny@samsung.com>
>>> +Description:
>>> +		This ABI shows or clears timer type used by devfreq
>>> +		workqueue. When 0, it uses default deferred timer.
>>> +		When set to 1 devfreq will use delayed timer. Example
>>> +		useage:
>>> +			echo 1 > /sys/class/devfreq/.../delayed_timer
>>> +
>>>  What:		/sys/class/devfreq/.../target_freq
>>>  Date:		September 2012
>>>  Contact:	Rajagopal Venkat <rajagopal.venkat@linaro.org>
>>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>>> index 955949c6fc1f..c277d1770fef 100644
>>> --- a/drivers/devfreq/devfreq.c
>>> +++ b/drivers/devfreq/devfreq.c
>>> @@ -445,7 +445,11 @@ void devfreq_monitor_start(struct devfreq *devfreq)
>>>  	if (devfreq->governor->interrupt_driven)
>>>  		return;
>>>  
>>> -	INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
>>> +	if (devfreq->delayed_timer)
>>> +		INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
>>> +	else
>>> +		INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
>>> +
>>>  	if (devfreq->profile->polling_ms)
>>>  		queue_delayed_work(devfreq_wq, &devfreq->work,
>>>  			msecs_to_jiffies(devfreq->profile->polling_ms));
>>> @@ -698,6 +702,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>>  	devfreq->last_status.current_frequency = profile->initial_freq;
>>>  	devfreq->data = data;
>>>  	devfreq->nb.notifier_call = devfreq_notifier_call;
>>> +	devfreq->delayed_timer = false;
>>>  
>>>  	if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
>>>  		mutex_unlock(&devfreq->lock);
>>> @@ -1288,6 +1293,44 @@ static ssize_t available_governors_show(struct device *d,
>>>  }
>>>  static DEVICE_ATTR_RO(available_governors);
>>>  
>>> +static ssize_t delayed_timer_show(struct device *dev,
>>> +				  struct device_attribute *attr, char *buf)
>>> +{
>>> +	int i;
>>> +
>>> +	i = to_devfreq(dev)->delayed_timer ? 1 : 0;
>>> +	return sprintf(buf, "%d\n", i);
>>> +}
>>> +
>>> +static ssize_t delayed_timer_store(struct device *dev,
>>> +				   struct device_attribute *attr,
>>> +				   const char *buf, size_t count)
>>> +{
>>> +	struct devfreq *df = to_devfreq(dev);
>>> +	bool old_timer;
>>> +	int value, ret;
>>> +
>>> +	if (!df->governor)
>>> +		return -EINVAL;
>>> +
>>> +	ret = kstrtoint(buf, 10, &value);
>>> +	if (ret || (value != 1 && value != 0))
>>> +		return -EINVAL;
>>> +
>>> +	mutex_lock(&df->lock);
>>> +	old_timer = df->delayed_timer;
>>> +	df->delayed_timer = value == 0 ? false : true;
>>> +	mutex_unlock(&df->lock);
>>> +
>>> +	if (old_timer != df->delayed_timer) {
>>> +		devfreq_monitor_stop(df);
>>> +		devfreq_monitor_start(df);
>>> +	}
>>> +
>>> +	return count;
>>> +}
>>> +static DEVICE_ATTR_RW(delayed_timer);
>>> +
>>>  static ssize_t cur_freq_show(struct device *dev, struct device_attribute *attr,
>>>  			     char *buf)
>>>  {
>>> @@ -1513,6 +1556,7 @@ static struct attribute *devfreq_attrs[] = {
>>>  	&dev_attr_name.attr,
>>>  	&dev_attr_governor.attr,
>>>  	&dev_attr_available_governors.attr,
>>> +	&dev_attr_delayed_timer.attr,
>>>  	&dev_attr_cur_freq.attr,
>>>  	&dev_attr_available_frequencies.attr,
>>>  	&dev_attr_target_freq.attr,
>>> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
>>> index de2fdc56aa5b..761aa0a09db7 100644
>>> --- a/include/linux/devfreq.h
>>> +++ b/include/linux/devfreq.h
>>> @@ -134,6 +134,7 @@ struct devfreq_stats {
>>>   *		reevaluate operable frequencies. Devfreq users may use
>>>   *		devfreq.nb to the corresponding register notifier call chain.
>>>   * @work:	delayed work for load monitoring.
>>> + * @delayed_timer:	use delayed or deferred timer for workqueue.
>>>   * @previous_freq:	previously configured frequency value.
>>>   * @data:	Private data of the governor. The devfreq framework does not
>>>   *		touch this.
>>> @@ -166,6 +167,7 @@ struct devfreq {
>>>  	char governor_name[DEVFREQ_NAME_LEN];
>>>  	struct notifier_block nb;
>>>  	struct delayed_work work;
>>> +	bool delayed_timer;
>>>  
>>>  	unsigned long previous_freq;
>>>  	struct devfreq_dev_status last_status;
>>>
>>
>>
>

Patch
diff mbox series

diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index 9758eb85ade3..07bfd0df6a4a 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -30,6 +30,16 @@  Description:
 		target_freq when get_cur_freq() is not implemented by
 		devfreq driver.
 
+What:		/sys/class/devfreq/.../delayed_timer
+Date:		December 2019
+Contact:	Kamil Konieczny <k.konieczny@samsung.com>
+Description:
+		This ABI shows or clears timer type used by devfreq
+		workqueue. When 0, it uses default deferred timer.
+		When set to 1 devfreq will use delayed timer. Example
+		useage:
+			echo 1 > /sys/class/devfreq/.../delayed_timer
+
 What:		/sys/class/devfreq/.../target_freq
 Date:		September 2012
 Contact:	Rajagopal Venkat <rajagopal.venkat@linaro.org>
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 955949c6fc1f..c277d1770fef 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -445,7 +445,11 @@  void devfreq_monitor_start(struct devfreq *devfreq)
 	if (devfreq->governor->interrupt_driven)
 		return;
 
-	INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
+	if (devfreq->delayed_timer)
+		INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
+	else
+		INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
+
 	if (devfreq->profile->polling_ms)
 		queue_delayed_work(devfreq_wq, &devfreq->work,
 			msecs_to_jiffies(devfreq->profile->polling_ms));
@@ -698,6 +702,7 @@  struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->last_status.current_frequency = profile->initial_freq;
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
+	devfreq->delayed_timer = false;
 
 	if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
 		mutex_unlock(&devfreq->lock);
@@ -1288,6 +1293,44 @@  static ssize_t available_governors_show(struct device *d,
 }
 static DEVICE_ATTR_RO(available_governors);
 
+static ssize_t delayed_timer_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	int i;
+
+	i = to_devfreq(dev)->delayed_timer ? 1 : 0;
+	return sprintf(buf, "%d\n", i);
+}
+
+static ssize_t delayed_timer_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct devfreq *df = to_devfreq(dev);
+	bool old_timer;
+	int value, ret;
+
+	if (!df->governor)
+		return -EINVAL;
+
+	ret = kstrtoint(buf, 10, &value);
+	if (ret || (value != 1 && value != 0))
+		return -EINVAL;
+
+	mutex_lock(&df->lock);
+	old_timer = df->delayed_timer;
+	df->delayed_timer = value == 0 ? false : true;
+	mutex_unlock(&df->lock);
+
+	if (old_timer != df->delayed_timer) {
+		devfreq_monitor_stop(df);
+		devfreq_monitor_start(df);
+	}
+
+	return count;
+}
+static DEVICE_ATTR_RW(delayed_timer);
+
 static ssize_t cur_freq_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
@@ -1513,6 +1556,7 @@  static struct attribute *devfreq_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_governor.attr,
 	&dev_attr_available_governors.attr,
+	&dev_attr_delayed_timer.attr,
 	&dev_attr_cur_freq.attr,
 	&dev_attr_available_frequencies.attr,
 	&dev_attr_target_freq.attr,
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index de2fdc56aa5b..761aa0a09db7 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -134,6 +134,7 @@  struct devfreq_stats {
  *		reevaluate operable frequencies. Devfreq users may use
  *		devfreq.nb to the corresponding register notifier call chain.
  * @work:	delayed work for load monitoring.
+ * @delayed_timer:	use delayed or deferred timer for workqueue.
  * @previous_freq:	previously configured frequency value.
  * @data:	Private data of the governor. The devfreq framework does not
  *		touch this.
@@ -166,6 +167,7 @@  struct devfreq {
 	char governor_name[DEVFREQ_NAME_LEN];
 	struct notifier_block nb;
 	struct delayed_work work;
+	bool delayed_timer;
 
 	unsigned long previous_freq;
 	struct devfreq_dev_status last_status;