linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully
@ 2015-05-08  7:35 Preeti U Murthy
  2015-05-08 12:43 ` Sudeep Holla
  2015-05-08 14:18 ` Rafael J. Wysocki
  0 siblings, 2 replies; 29+ messages in thread
From: Preeti U Murthy @ 2015-05-08  7:35 UTC (permalink / raw)
  To: peterz, tglx, rafael.j.wysocki, daniel.lezcano
  Cc: rlippert, linux-pm, linus.walleij, linux-kernel, mingo,
	sudeep.holla, linuxppc-dev

When a CPU has to enter an idle state where tick stops, it makes a call
to tick_broadcast_enter(). The call will fail if this CPU is the
broadcast CPU. Today, under such a circumstance, the arch cpuidle code
handles this CPU.  This is not convincing because not only do we not
know what the arch cpuidle code does, but we also do not account for the
idle state residency time and usage of such a CPU.

This scenario can be handled better by simply choosing an idle state
where in ticks do not stop. To accommodate this change move the setting
of runqueue idle state from the core to the cpuidle driver, else the
rq->idle_state will be set wrong.

Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
---
Changes from V2: https://lkml.org/lkml/2015/5/7/78
Introduce a function in cpuidle core to select an idle state where ticks do not
stop rather than going through the governors.

Changes from V1: https://lkml.org/lkml/2015/5/7/24
Rebased on the latest linux-pm/bleeding-edge branch

 drivers/cpuidle/cpuidle.c |   45 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/sched.h     |   16 ++++++++++++++++
 kernel/sched/core.c       |   17 +++++++++++++++++
 kernel/sched/fair.c       |    2 +-
 kernel/sched/idle.c       |    6 ------
 kernel/sched/sched.h      |   24 ------------------------
 6 files changed, 77 insertions(+), 33 deletions(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 8c24f95..d1af760 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/suspend.h>
 #include <linux/tick.h>
+#include <linux/sched.h>
 #include <trace/events/power.h>
 
 #include "cpuidle.h"
@@ -146,6 +147,36 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	return index;
 }
 
+/*
+ * find_tick_valid_state - select a state where tick does not stop
+ * @dev: cpuidle device for this cpu
+ * @drv: cpuidle driver for this cpu
+ */
+static int find_tick_valid_state(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv)
+{
+	int i, ret = -1;
+
+	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
+		struct cpuidle_state *s = &drv->states[i];
+		struct cpuidle_state_usage *su = &dev->states_usage[i];
+
+		/*
+		 * We do not explicitly check for latency requirement
+		 * since it is safe to assume that only shallower idle
+		 * states will have the CPUIDLE_FLAG_TIMER_STOP bit
+		 * cleared and they will invariably meet the latency
+		 * requirement.
+		 */
+		if (s->disabled || su->disable ||
+			(s->flags & CPUIDLE_FLAG_TIMER_STOP))
+			continue;
+
+		ret = i;
+	}
+	return ret;
+}
+
 /**
  * cpuidle_enter_state - enter the state and update stats
  * @dev: cpuidle device for this cpu
@@ -168,10 +199,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	 * CPU as a broadcast timer, this call may fail if it is not available.
 	 */
 	if (broadcast && tick_broadcast_enter()) {
-		default_idle_call();
-		return -EBUSY;
+		index = find_tick_valid_state(dev, drv);
+		if (index < 0) {
+			default_idle_call();
+			return -EBUSY;
+		}
+		target_state = &drv->states[index];
 	}
 
+	/* Take note of the planned idle state. */
+	idle_set_state(smp_processor_id(), target_state);
+
 	trace_cpu_idle_rcuidle(index, dev->cpu);
 	time_start = ktime_get();
 
@@ -180,6 +218,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	time_end = ktime_get();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
+	/* The cpu is no longer idle or about to enter idle. */
+	idle_set_state(smp_processor_id(), NULL);
+
 	if (broadcast) {
 		if (WARN_ON_ONCE(!irqs_disabled()))
 			local_irq_disable();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e61..fef8359 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -45,6 +45,7 @@ struct sched_param {
 #include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/rtmutex.h>
+#include <linux/cpuidle.h>
 
 #include <linux/time.h>
 #include <linux/param.h>
@@ -893,6 +894,21 @@ enum cpu_idle_type {
 	CPU_MAX_IDLE_TYPES
 };
 
+#ifdef CONFIG_CPU_IDLE
+extern void idle_set_state(int cpu, struct cpuidle_state *idle_state);
+extern struct cpuidle_state *idle_get_state(int cpu);
+#else
+static inline void idle_set_state(int cpu,
+				  struct cpuidle_state *idle_state)
+{
+}
+
+static inline struct cpuidle_state *idle_get_state(int cpu)
+{
+	return NULL;
+}
+#endif
+
 /*
  * Increase resolution of cpu_capacity calculations
  */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe22f75..8e1cc50 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3216,6 +3216,23 @@ struct task_struct *idle_task(int cpu)
 	return cpu_rq(cpu)->idle;
 }
 
+#ifdef CONFIG_CPU_IDLE
+void idle_set_state(int cpu, struct cpuidle_state *idle_state)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	rq->idle_state = idle_state;
+}
+
+struct cpuidle_state *idle_get_state(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	WARN_ON(!rcu_read_lock_held());
+	return rq->idle_state;
+}
+#endif /* CONFIG_CPU_IDLE */
+
 /**
  * find_process_by_pid - find a process with a matching PID value.
  * @pid: the pid in question.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ffeaa41..211ef9a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4709,7 +4709,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 	for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
 		if (idle_cpu(i)) {
 			struct rq *rq = cpu_rq(i);
-			struct cpuidle_state *idle = idle_get_state(rq);
+			struct cpuidle_state *idle = idle_get_state(i);
 			if (idle && idle->exit_latency < min_exit_latency) {
 				/*
 				 * We give priority to a CPU whose idle state
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 5933d06..04af46f 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -101,9 +101,6 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		return -EBUSY;
 	}
 
-	/* Take note of the planned idle state. */
-	idle_set_state(this_rq(), &drv->states[next_state]);
-
 	/*
 	 * Enter the idle state previously returned by the governor decision.
 	 * This function will block until an interrupt occurs and will take
@@ -111,9 +108,6 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 */
 	entered_state = cpuidle_enter(drv, dev, next_state);
 
-	/* The cpu is no longer idle or about to enter idle. */
-	idle_set_state(this_rq(), NULL);
-
 	return entered_state;
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e0e1299..2c56caa 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1253,30 +1253,6 @@ static inline void idle_exit_fair(struct rq *rq) { }
 
 #endif
 
-#ifdef CONFIG_CPU_IDLE
-static inline void idle_set_state(struct rq *rq,
-				  struct cpuidle_state *idle_state)
-{
-	rq->idle_state = idle_state;
-}
-
-static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-{
-	WARN_ON(!rcu_read_lock_held());
-	return rq->idle_state;
-}
-#else
-static inline void idle_set_state(struct rq *rq,
-				  struct cpuidle_state *idle_state)
-{
-}
-
-static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-{
-	return NULL;
-}
-#endif
-
 extern void sysrq_sched_debug_show(void);
 extern void sched_init_granularity(void);
 extern void update_max_interval(void);


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully
  2015-05-08  7:35 [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully Preeti U Murthy
@ 2015-05-08 12:43 ` Sudeep Holla
  2015-05-08 14:18 ` Rafael J. Wysocki
  1 sibling, 0 replies; 29+ messages in thread
From: Sudeep Holla @ 2015-05-08 12:43 UTC (permalink / raw)
  To: Preeti U Murthy
  Cc: peterz, tglx, rafael.j.wysocki, daniel.lezcano, Sudeep Holla,
	rlippert, linux-pm, linus.walleij, linux-kernel, mingo,
	linuxppc-dev



On 08/05/15 08:35, Preeti U Murthy wrote:
> When a CPU has to enter an idle state where tick stops, it makes a call
> to tick_broadcast_enter(). The call will fail if this CPU is the
> broadcast CPU. Today, under such a circumstance, the arch cpuidle code
> handles this CPU.  This is not convincing because not only do we not
> know what the arch cpuidle code does, but we also do not account for the
> idle state residency time and usage of such a CPU.
>
> This scenario can be handled better by simply choosing an idle state
> where in ticks do not stop. To accommodate this change move the setting
> of runqueue idle state from the core to the cpuidle driver, else the
> rq->idle_state will be set wrong.
>
> Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>

I gave it a spin on ARM64 Juno platform with one of the CPU in broadcast
mode and Vexpress TC2 with broadcast timer. I found no issues in both
the cases. So, you can add:

Tested-by: Sudeep Holla <sudeep.holla@arm.com>

Regards,
Sudeep

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully
  2015-05-08  7:35 [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully Preeti U Murthy
  2015-05-08 12:43 ` Sudeep Holla
@ 2015-05-08 14:18 ` Rafael J. Wysocki
  2015-05-08 21:51   ` Rafael J. Wysocki
  2015-05-09  5:49   ` Preeti U Murthy
  1 sibling, 2 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-08 14:18 UTC (permalink / raw)
  To: Preeti U Murthy
  Cc: peterz, tglx, rafael.j.wysocki, daniel.lezcano, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev

On Friday, May 08, 2015 01:05:32 PM Preeti U Murthy wrote:
> When a CPU has to enter an idle state where tick stops, it makes a call
> to tick_broadcast_enter(). The call will fail if this CPU is the
> broadcast CPU. Today, under such a circumstance, the arch cpuidle code
> handles this CPU.  This is not convincing because not only do we not
> know what the arch cpuidle code does, but we also do not account for the
> idle state residency time and usage of such a CPU.
> 
> This scenario can be handled better by simply choosing an idle state
> where in ticks do not stop. To accommodate this change move the setting
> of runqueue idle state from the core to the cpuidle driver, else the
> rq->idle_state will be set wrong.
> 
> Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
> ---
> Changes from V2: https://lkml.org/lkml/2015/5/7/78
> Introduce a function in cpuidle core to select an idle state where ticks do not
> stop rather than going through the governors.
> 
> Changes from V1: https://lkml.org/lkml/2015/5/7/24
> Rebased on the latest linux-pm/bleeding-edge branch
> 
>  drivers/cpuidle/cpuidle.c |   45 +++++++++++++++++++++++++++++++++++++++++++--
>  include/linux/sched.h     |   16 ++++++++++++++++
>  kernel/sched/core.c       |   17 +++++++++++++++++
>  kernel/sched/fair.c       |    2 +-
>  kernel/sched/idle.c       |    6 ------
>  kernel/sched/sched.h      |   24 ------------------------
>  6 files changed, 77 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index 8c24f95..d1af760 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -21,6 +21,7 @@
>  #include <linux/module.h>
>  #include <linux/suspend.h>
>  #include <linux/tick.h>
> +#include <linux/sched.h>
>  #include <trace/events/power.h>
>  
>  #include "cpuidle.h"
> @@ -146,6 +147,36 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
>  	return index;
>  }
>  
> +/*
> + * find_tick_valid_state - select a state where tick does not stop
> + * @dev: cpuidle device for this cpu
> + * @drv: cpuidle driver for this cpu
> + */
> +static int find_tick_valid_state(struct cpuidle_device *dev,
> +				struct cpuidle_driver *drv)
> +{
> +	int i, ret = -1;
> +
> +	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
> +		struct cpuidle_state *s = &drv->states[i];
> +		struct cpuidle_state_usage *su = &dev->states_usage[i];
> +
> +		/*
> +		 * We do not explicitly check for latency requirement
> +		 * since it is safe to assume that only shallower idle
> +		 * states will have the CPUIDLE_FLAG_TIMER_STOP bit
> +		 * cleared and they will invariably meet the latency
> +		 * requirement.
> +		 */
> +		if (s->disabled || su->disable ||
> +			(s->flags & CPUIDLE_FLAG_TIMER_STOP))
> +			continue;
> +
> +		ret = i;
> +	}
> +	return ret;
> +}
> +
>  /**
>   * cpuidle_enter_state - enter the state and update stats
>   * @dev: cpuidle device for this cpu
> @@ -168,10 +199,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
>  	 * CPU as a broadcast timer, this call may fail if it is not available.
>  	 */
>  	if (broadcast && tick_broadcast_enter()) {
> -		default_idle_call();
> -		return -EBUSY;
> +		index = find_tick_valid_state(dev, drv);

Well, the new state needs to be deeper than the old one or you may violate the
governor's choice and this doesn't guarantee that.

Also I don't quite see a reason to duplicate the find_deepest_state() functionality
here.

> +		if (index < 0) {
> +			default_idle_call();
> +			return -EBUSY;
> +		}
> +		target_state = &drv->states[index];
>  	}
>  
> +	/* Take note of the planned idle state. */
> +	idle_set_state(smp_processor_id(), target_state);

And I wouldn't do this either.

The behavior here is pretty much as though the driver demoted the state chosen
by the governor and we don't call idle_set_state() again in those cases.

> +
>  	trace_cpu_idle_rcuidle(index, dev->cpu);
>  	time_start = ktime_get();

Overall, something like the patch below (untested) should work I suppose?

---
 drivers/cpuidle/cpuidle.c |   21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

Index: linux-pm/drivers/cpuidle/cpuidle.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/cpuidle.c
+++ linux-pm/drivers/cpuidle/cpuidle.c
@@ -73,17 +73,19 @@ int cpuidle_play_dead(void)
 }
 
 static int find_deepest_state(struct cpuidle_driver *drv,
-			      struct cpuidle_device *dev, bool freeze)
+			      struct cpuidle_device *dev, bool freeze,
+			      int limit, unsigned int flags_to_avoid)
 {
 	unsigned int latency_req = 0;
 	int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
 
-	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
+	for (i = CPUIDLE_DRIVER_STATE_START; i < limit; i++) {
 		struct cpuidle_state *s = &drv->states[i];
 		struct cpuidle_state_usage *su = &dev->states_usage[i];
 
 		if (s->disabled || su->disable || s->exit_latency <= latency_req
-		    || (freeze && !s->enter_freeze))
+		    || (freeze && !s->enter_freeze)
+		    || (s->flags & flags_to_avoid))
 			continue;
 
 		latency_req = s->exit_latency;
@@ -100,7 +102,7 @@ static int find_deepest_state(struct cpu
 int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 			       struct cpuidle_device *dev)
 {
-	return find_deepest_state(drv, dev, false);
+	return find_deepest_state(drv, dev, false, drv->state_count, 0);
 }
 
 static void enter_freeze_proper(struct cpuidle_driver *drv,
@@ -139,7 +141,7 @@ int cpuidle_enter_freeze(struct cpuidle_
 	 * that interrupts won't be enabled when it exits and allows the tick to
 	 * be frozen safely.
 	 */
-	index = find_deepest_state(drv, dev, true);
+	index = find_deepest_state(drv, dev, true, drv->state_count, 0);
 	if (index >= 0)
 		enter_freeze_proper(drv, dev, index);
 
@@ -168,8 +170,13 @@ int cpuidle_enter_state(struct cpuidle_d
 	 * CPU as a broadcast timer, this call may fail if it is not available.
 	 */
 	if (broadcast && tick_broadcast_enter()) {
-		default_idle_call();
-		return -EBUSY;
+		index = find_deepest_state(drv, dev, false, index,
+					   CPUIDLE_FLAG_TIMER_STOP);
+		if (index < 0) {
+			default_idle_call();
+			return -EBUSY;
+		}
+		target_state = &drv->states[index];
 	}
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully
  2015-05-08 14:18 ` Rafael J. Wysocki
@ 2015-05-08 21:51   ` Rafael J. Wysocki
  2015-05-09  5:49   ` Preeti U Murthy
  1 sibling, 0 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-08 21:51 UTC (permalink / raw)
  To: Preeti U Murthy
  Cc: peterz, tglx, rafael.j.wysocki, daniel.lezcano, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev

On Friday, May 08, 2015 04:18:02 PM Rafael J. Wysocki wrote:
> On Friday, May 08, 2015 01:05:32 PM Preeti U Murthy wrote:
> > When a CPU has to enter an idle state where tick stops, it makes a call
> > to tick_broadcast_enter(). The call will fail if this CPU is the
> > broadcast CPU. Today, under such a circumstance, the arch cpuidle code
> > handles this CPU.  This is not convincing because not only do we not
> > know what the arch cpuidle code does, but we also do not account for the
> > idle state residency time and usage of such a CPU.
> > 
> > This scenario can be handled better by simply choosing an idle state
> > where in ticks do not stop. To accommodate this change move the setting
> > of runqueue idle state from the core to the cpuidle driver, else the
> > rq->idle_state will be set wrong.
> > 
> > Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
> > ---
> > Changes from V2: https://lkml.org/lkml/2015/5/7/78
> > Introduce a function in cpuidle core to select an idle state where ticks do not
> > stop rather than going through the governors.
> > 
> > Changes from V1: https://lkml.org/lkml/2015/5/7/24
> > Rebased on the latest linux-pm/bleeding-edge branch
> > 
> >  drivers/cpuidle/cpuidle.c |   45 +++++++++++++++++++++++++++++++++++++++++++--
> >  include/linux/sched.h     |   16 ++++++++++++++++
> >  kernel/sched/core.c       |   17 +++++++++++++++++
> >  kernel/sched/fair.c       |    2 +-
> >  kernel/sched/idle.c       |    6 ------
> >  kernel/sched/sched.h      |   24 ------------------------
> >  6 files changed, 77 insertions(+), 33 deletions(-)
> > 
> > diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> > index 8c24f95..d1af760 100644
> > --- a/drivers/cpuidle/cpuidle.c
> > +++ b/drivers/cpuidle/cpuidle.c
> > @@ -21,6 +21,7 @@
> >  #include <linux/module.h>
> >  #include <linux/suspend.h>
> >  #include <linux/tick.h>
> > +#include <linux/sched.h>
> >  #include <trace/events/power.h>
> >  
> >  #include "cpuidle.h"
> > @@ -146,6 +147,36 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> >  	return index;
> >  }
> >  
> > +/*
> > + * find_tick_valid_state - select a state where tick does not stop
> > + * @dev: cpuidle device for this cpu
> > + * @drv: cpuidle driver for this cpu
> > + */
> > +static int find_tick_valid_state(struct cpuidle_device *dev,
> > +				struct cpuidle_driver *drv)
> > +{
> > +	int i, ret = -1;
> > +
> > +	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
> > +		struct cpuidle_state *s = &drv->states[i];
> > +		struct cpuidle_state_usage *su = &dev->states_usage[i];
> > +
> > +		/*
> > +		 * We do not explicitly check for latency requirement
> > +		 * since it is safe to assume that only shallower idle
> > +		 * states will have the CPUIDLE_FLAG_TIMER_STOP bit
> > +		 * cleared and they will invariably meet the latency
> > +		 * requirement.
> > +		 */
> > +		if (s->disabled || su->disable ||
> > +			(s->flags & CPUIDLE_FLAG_TIMER_STOP))
> > +			continue;
> > +
> > +		ret = i;
> > +	}
> > +	return ret;
> > +}
> > +
> >  /**
> >   * cpuidle_enter_state - enter the state and update stats
> >   * @dev: cpuidle device for this cpu
> > @@ -168,10 +199,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
> >  	 * CPU as a broadcast timer, this call may fail if it is not available.
> >  	 */
> >  	if (broadcast && tick_broadcast_enter()) {
> > -		default_idle_call();
> > -		return -EBUSY;
> > +		index = find_tick_valid_state(dev, drv);
> 
> Well, the new state needs to be deeper

I should have said "shallower", sorry about that.

The state chosen by the governor satisfies certain latency requirements and we
can't violate those by choosing a deeper state here.

But the patch I sent actually did the right thing. :-)

> than the old one or you may violate the governor's choice and this doesn't
> guarantee that.
> 
> Also I don't quite see a reason to duplicate the find_deepest_state() functionality
> here.
> 
> > +		if (index < 0) {
> > +			default_idle_call();
> > +			return -EBUSY;
> > +		}
> > +		target_state = &drv->states[index];
> >  	}
> >  
> > +	/* Take note of the planned idle state. */
> > +	idle_set_state(smp_processor_id(), target_state);
> 
> And I wouldn't do this either.
> 
> The behavior here is pretty much as though the driver demoted the state chosen
> by the governor and we don't call idle_set_state() again in those cases.
> 
> > +
> >  	trace_cpu_idle_rcuidle(index, dev->cpu);
> >  	time_start = ktime_get();
> 
> Overall, something like the patch below (untested) should work I suppose?
> 
> ---
>  drivers/cpuidle/cpuidle.c |   21 ++++++++++++++-------
>  1 file changed, 14 insertions(+), 7 deletions(-)
> 
> Index: linux-pm/drivers/cpuidle/cpuidle.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/cpuidle.c
> +++ linux-pm/drivers/cpuidle/cpuidle.c
> @@ -73,17 +73,19 @@ int cpuidle_play_dead(void)
>  }
>  
>  static int find_deepest_state(struct cpuidle_driver *drv,
> -			      struct cpuidle_device *dev, bool freeze)
> +			      struct cpuidle_device *dev, bool freeze,
> +			      int limit, unsigned int flags_to_avoid)
>  {
>  	unsigned int latency_req = 0;
>  	int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
>  
> -	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
> +	for (i = CPUIDLE_DRIVER_STATE_START; i < limit; i++) {
>  		struct cpuidle_state *s = &drv->states[i];
>  		struct cpuidle_state_usage *su = &dev->states_usage[i];
>  
>  		if (s->disabled || su->disable || s->exit_latency <= latency_req
> -		    || (freeze && !s->enter_freeze))
> +		    || (freeze && !s->enter_freeze)
> +		    || (s->flags & flags_to_avoid))
>  			continue;
>  
>  		latency_req = s->exit_latency;
> @@ -100,7 +102,7 @@ static int find_deepest_state(struct cpu
>  int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
>  			       struct cpuidle_device *dev)
>  {
> -	return find_deepest_state(drv, dev, false);
> +	return find_deepest_state(drv, dev, false, drv->state_count, 0);
>  }
>  
>  static void enter_freeze_proper(struct cpuidle_driver *drv,
> @@ -139,7 +141,7 @@ int cpuidle_enter_freeze(struct cpuidle_
>  	 * that interrupts won't be enabled when it exits and allows the tick to
>  	 * be frozen safely.
>  	 */
> -	index = find_deepest_state(drv, dev, true);
> +	index = find_deepest_state(drv, dev, true, drv->state_count, 0);
>  	if (index >= 0)
>  		enter_freeze_proper(drv, dev, index);
>  
> @@ -168,8 +170,13 @@ int cpuidle_enter_state(struct cpuidle_d
>  	 * CPU as a broadcast timer, this call may fail if it is not available.
>  	 */
>  	if (broadcast && tick_broadcast_enter()) {
> -		default_idle_call();
> -		return -EBUSY;
> +		index = find_deepest_state(drv, dev, false, index,
> +					   CPUIDLE_FLAG_TIMER_STOP);
> +		if (index < 0) {
> +			default_idle_call();
> +			return -EBUSY;
> +		}
> +		target_state = &drv->states[index];
>  	}
>  
>  	trace_cpu_idle_rcuidle(index, dev->cpu);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully
  2015-05-08 14:18 ` Rafael J. Wysocki
  2015-05-08 21:51   ` Rafael J. Wysocki
@ 2015-05-09  5:49   ` Preeti U Murthy
  2015-05-09 18:46     ` Rafael J. Wysocki
  2015-05-09 20:11     ` Rafael J. Wysocki
  1 sibling, 2 replies; 29+ messages in thread
From: Preeti U Murthy @ 2015-05-09  5:49 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: peterz, tglx, rafael.j.wysocki, daniel.lezcano, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev

Hi Rafael,

On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
>> +/*
>> + * find_tick_valid_state - select a state where tick does not stop
>> + * @dev: cpuidle device for this cpu
>> + * @drv: cpuidle driver for this cpu
>> + */
>> +static int find_tick_valid_state(struct cpuidle_device *dev,
>> +				struct cpuidle_driver *drv)
>> +{
>> +	int i, ret = -1;
>> +
>> +	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
>> +		struct cpuidle_state *s = &drv->states[i];
>> +		struct cpuidle_state_usage *su = &dev->states_usage[i];
>> +
>> +		/*
>> +		 * We do not explicitly check for latency requirement
>> +		 * since it is safe to assume that only shallower idle
>> +		 * states will have the CPUIDLE_FLAG_TIMER_STOP bit
>> +		 * cleared and they will invariably meet the latency
>> +		 * requirement.
>> +		 */
>> +		if (s->disabled || su->disable ||
>> +			(s->flags & CPUIDLE_FLAG_TIMER_STOP))
>> +			continue;
>> +
>> +		ret = i;
>> +	}
>> +	return ret;
>> +}
>> +
>>  /**
>>   * cpuidle_enter_state - enter the state and update stats
>>   * @dev: cpuidle device for this cpu
>> @@ -168,10 +199,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
>>  	 * CPU as a broadcast timer, this call may fail if it is not available.
>>  	 */
>>  	if (broadcast && tick_broadcast_enter()) {
>> -		default_idle_call();
>> -		return -EBUSY;
>> +		index = find_tick_valid_state(dev, drv);
> 
> Well, the new state needs to be deeper than the old one or you may violate the
> governor's choice and this doesn't guarantee that.

The comment above in find_tick_valid_state() explains why we are bound
to choose a shallow idle state. I think its safe to assume that any
state deeper than this one, would have the CPUIDLE_FLAG_TIMER_STOP flag
set and hence would be skipped.

Your patch relies on the assumption that the idle states are arranged in
the increasing order of exit_latency/in the order of shallow to deep.
This is not guaranteed, is it?

> 
> Also I don't quite see a reason to duplicate the find_deepest_state() functionality
> here.

Agreed. We could club them like in your patch.

> 
>> +		if (index < 0) {
>> +			default_idle_call();
>> +			return -EBUSY;
>> +		}
>> +		target_state = &drv->states[index];
>>  	}
>>  
>> +	/* Take note of the planned idle state. */
>> +	idle_set_state(smp_processor_id(), target_state);
> 
> And I wouldn't do this either.
> 
> The behavior here is pretty much as though the driver demoted the state chosen
> by the governor and we don't call idle_set_state() again in those cases.

Why is this wrong? The idea here is to set the idle state of the
runqueue to the one that it is more likely to enter into. Its is true
that the state has been demoted, but I don't see any code that requires
rq->idle_state to be a only a governor chosen state or nothing at all.

This is a more important chunk of this patch because it allows us to
track the idle states of the broadcast CPU. Else the system idle time is
bound to be higher than the residency time in different idle states of
all the CPUs. This shows up starkly as an anomaly if we are profiling
cpuidle state entry/exit.

> 
>> +
>>  	trace_cpu_idle_rcuidle(index, dev->cpu);
>>  	time_start = ktime_get();
> 
> Overall, something like the patch below (untested) should work I suppose?

With the exception of the above two points,yes this should work.
> 
> ---
>  drivers/cpuidle/cpuidle.c |   21 ++++++++++++++-------
>  1 file changed, 14 insertions(+), 7 deletions(-)
> 
> Index: linux-pm/drivers/cpuidle/cpuidle.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/cpuidle.c
> +++ linux-pm/drivers/cpuidle/cpuidle.c
> @@ -73,17 +73,19 @@ int cpuidle_play_dead(void)
>  }
> 
>  static int find_deepest_state(struct cpuidle_driver *drv,
> -			      struct cpuidle_device *dev, bool freeze)
> +			      struct cpuidle_device *dev, bool freeze,
> +			      int limit, unsigned int flags_to_avoid)
>  {
>  	unsigned int latency_req = 0;
>  	int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
> 
> -	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
> +	for (i = CPUIDLE_DRIVER_STATE_START; i < limit; i++) {
>  		struct cpuidle_state *s = &drv->states[i];
>  		struct cpuidle_state_usage *su = &dev->states_usage[i];
> 
>  		if (s->disabled || su->disable || s->exit_latency <= latency_req
> -		    || (freeze && !s->enter_freeze))
> +		    || (freeze && !s->enter_freeze)
> +		    || (s->flags & flags_to_avoid))
>  			continue;
> 
>  		latency_req = s->exit_latency;
> @@ -100,7 +102,7 @@ static int find_deepest_state(struct cpu
>  int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
>  			       struct cpuidle_device *dev)
>  {
> -	return find_deepest_state(drv, dev, false);
> +	return find_deepest_state(drv, dev, false, drv->state_count, 0);
>  }
> 
>  static void enter_freeze_proper(struct cpuidle_driver *drv,
> @@ -139,7 +141,7 @@ int cpuidle_enter_freeze(struct cpuidle_
>  	 * that interrupts won't be enabled when it exits and allows the tick to
>  	 * be frozen safely.
>  	 */
> -	index = find_deepest_state(drv, dev, true);
> +	index = find_deepest_state(drv, dev, true, drv->state_count, 0);
>  	if (index >= 0)
>  		enter_freeze_proper(drv, dev, index);
> 
> @@ -168,8 +170,13 @@ int cpuidle_enter_state(struct cpuidle_d
>  	 * CPU as a broadcast timer, this call may fail if it is not available.
>  	 */
>  	if (broadcast && tick_broadcast_enter()) {
> -		default_idle_call();
> -		return -EBUSY;
> +		index = find_deepest_state(drv, dev, false, index,
> +					   CPUIDLE_FLAG_TIMER_STOP);
> +		if (index < 0) {
> +			default_idle_call();
> +			return -EBUSY;
> +		}
> +		target_state = &drv->states[index];
>  	}
> 
>  	trace_cpu_idle_rcuidle(index, dev->cpu);

Regards
Preeti U Murthy
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully
  2015-05-09  5:49   ` Preeti U Murthy
@ 2015-05-09 18:46     ` Rafael J. Wysocki
  2015-05-09 18:48       ` Rafael J. Wysocki
  2015-05-09 20:11     ` Rafael J. Wysocki
  1 sibling, 1 reply; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-09 18:46 UTC (permalink / raw)
  To: Preeti U Murthy
  Cc: peterz, tglx, rafael.j.wysocki, daniel.lezcano, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev

On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
> Hi Rafael,
> 
> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
> >> +/*
> >> + * find_tick_valid_state - select a state where tick does not stop
> >> + * @dev: cpuidle device for this cpu
> >> + * @drv: cpuidle driver for this cpu
> >> + */
> >> +static int find_tick_valid_state(struct cpuidle_device *dev,
> >> +				struct cpuidle_driver *drv)
> >> +{
> >> +	int i, ret = -1;
> >> +
> >> +	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
> >> +		struct cpuidle_state *s = &drv->states[i];
> >> +		struct cpuidle_state_usage *su = &dev->states_usage[i];
> >> +
> >> +		/*
> >> +		 * We do not explicitly check for latency requirement
> >> +		 * since it is safe to assume that only shallower idle
> >> +		 * states will have the CPUIDLE_FLAG_TIMER_STOP bit
> >> +		 * cleared and they will invariably meet the latency
> >> +		 * requirement.
> >> +		 */
> >> +		if (s->disabled || su->disable ||
> >> +			(s->flags & CPUIDLE_FLAG_TIMER_STOP))
> >> +			continue;
> >> +
> >> +		ret = i;
> >> +	}
> >> +	return ret;
> >> +}
> >> +
> >>  /**
> >>   * cpuidle_enter_state - enter the state and update stats
> >>   * @dev: cpuidle device for this cpu
> >> @@ -168,10 +199,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
> >>  	 * CPU as a broadcast timer, this call may fail if it is not available.
> >>  	 */
> >>  	if (broadcast && tick_broadcast_enter()) {
> >> -		default_idle_call();
> >> -		return -EBUSY;
> >> +		index = find_tick_valid_state(dev, drv);
> > 
> > Well, the new state needs to be deeper than the old one or you may violate the
> > governor's choice and this doesn't guarantee that.
> 
> The comment above in find_tick_valid_state() explains why we are bound
> to choose a shallow idle state. I think its safe to assume that any
> state deeper than this one, would have the CPUIDLE_FLAG_TIMER_STOP flag
> set and hence would be skipped.
> 
> Your patch relies on the assumption that the idle states are arranged in
> the increasing order of exit_latency/in the order of shallow to deep.
> This is not guaranteed, is it?

No, it isn't, which is a good point.  There's no reason to rely on that
assumption, so appended is an updated version of the patch using a latency
limit instead of an index limit.

> 
> > 
> > Also I don't quite see a reason to duplicate the find_deepest_state() functionality
> > here.
> 
> Agreed. We could club them like in your patch.
> 
> > 
> >> +		if (index < 0) {
> >> +			default_idle_call();
> >> +			return -EBUSY;
> >> +		}
> >> +		target_state = &drv->states[index];
> >>  	}
> >>  
> >> +	/* Take note of the planned idle state. */
> >> +	idle_set_state(smp_processor_id(), target_state);
> > 
> > And I wouldn't do this either.
> > 
> > The behavior here is pretty much as though the driver demoted the state chosen
> > by the governor and we don't call idle_set_state() again in those cases.
> 
> Why is this wrong?

Because it is inconsistent, but let me reply to this in a separate message.

Anyway, it is a different problem and should be addressed by a separate
patch IMO.



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully
  2015-05-09 18:46     ` Rafael J. Wysocki
@ 2015-05-09 18:48       ` Rafael J. Wysocki
  0 siblings, 0 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-09 18:48 UTC (permalink / raw)
  To: Preeti U Murthy
  Cc: peterz, tglx, rafael.j.wysocki, daniel.lezcano, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev

On Saturday, May 09, 2015 08:46:20 PM Rafael J. Wysocki wrote:
> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
> > Hi Rafael,
> > 
> > On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
> > >> +/*
> > >> + * find_tick_valid_state - select a state where tick does not stop
> > >> + * @dev: cpuidle device for this cpu
> > >> + * @drv: cpuidle driver for this cpu
> > >> + */
> > >> +static int find_tick_valid_state(struct cpuidle_device *dev,
> > >> +				struct cpuidle_driver *drv)
> > >> +{
> > >> +	int i, ret = -1;
> > >> +
> > >> +	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
> > >> +		struct cpuidle_state *s = &drv->states[i];
> > >> +		struct cpuidle_state_usage *su = &dev->states_usage[i];
> > >> +
> > >> +		/*
> > >> +		 * We do not explicitly check for latency requirement
> > >> +		 * since it is safe to assume that only shallower idle
> > >> +		 * states will have the CPUIDLE_FLAG_TIMER_STOP bit
> > >> +		 * cleared and they will invariably meet the latency
> > >> +		 * requirement.
> > >> +		 */
> > >> +		if (s->disabled || su->disable ||
> > >> +			(s->flags & CPUIDLE_FLAG_TIMER_STOP))
> > >> +			continue;
> > >> +
> > >> +		ret = i;
> > >> +	}
> > >> +	return ret;
> > >> +}
> > >> +
> > >>  /**
> > >>   * cpuidle_enter_state - enter the state and update stats
> > >>   * @dev: cpuidle device for this cpu
> > >> @@ -168,10 +199,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
> > >>  	 * CPU as a broadcast timer, this call may fail if it is not available.
> > >>  	 */
> > >>  	if (broadcast && tick_broadcast_enter()) {
> > >> -		default_idle_call();
> > >> -		return -EBUSY;
> > >> +		index = find_tick_valid_state(dev, drv);
> > > 
> > > Well, the new state needs to be deeper than the old one or you may violate the
> > > governor's choice and this doesn't guarantee that.
> > 
> > The comment above in find_tick_valid_state() explains why we are bound
> > to choose a shallow idle state. I think its safe to assume that any
> > state deeper than this one, would have the CPUIDLE_FLAG_TIMER_STOP flag
> > set and hence would be skipped.
> > 
> > Your patch relies on the assumption that the idle states are arranged in
> > the increasing order of exit_latency/in the order of shallow to deep.
> > This is not guaranteed, is it?
> 
> No, it isn't, which is a good point.  There's no reason to rely on that
> assumption, so appended is an updated version of the patch using a latency
> limit instead of an index limit.

And the patch *is* actually appended this time, sorry.


---
 drivers/cpuidle/cpuidle.c |   20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

Index: linux-pm/drivers/cpuidle/cpuidle.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/cpuidle.c
+++ linux-pm/drivers/cpuidle/cpuidle.c
@@ -73,7 +73,10 @@ int cpuidle_play_dead(void)
 }
 
 static int find_deepest_state(struct cpuidle_driver *drv,
-			      struct cpuidle_device *dev, bool freeze)
+			      struct cpuidle_device *dev,
+			      unsigned int max_latency,
+			      unsigned int forbidden_flags,
+			      bool freeze)
 {
 	unsigned int latency_req = 0;
 	int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
@@ -83,6 +86,8 @@ static int find_deepest_state(struct cpu
 		struct cpuidle_state_usage *su = &dev->states_usage[i];
 
 		if (s->disabled || su->disable || s->exit_latency <= latency_req
+		    || s->exit_latency > max_latency
+		    || (s->flags & forbidden_flags)
 		    || (freeze && !s->enter_freeze))
 			continue;
 
@@ -100,7 +105,7 @@ static int find_deepest_state(struct cpu
 int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 			       struct cpuidle_device *dev)
 {
-	return find_deepest_state(drv, dev, false);
+	return find_deepest_state(drv, dev, UINT_MAX, 0, false);
 }
 
 static void enter_freeze_proper(struct cpuidle_driver *drv,
@@ -139,7 +144,7 @@ int cpuidle_enter_freeze(struct cpuidle_
 	 * that interrupts won't be enabled when it exits and allows the tick to
 	 * be frozen safely.
 	 */
-	index = find_deepest_state(drv, dev, true);
+	index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
 	if (index >= 0)
 		enter_freeze_proper(drv, dev, index);
 
@@ -168,8 +173,13 @@ int cpuidle_enter_state(struct cpuidle_d
 	 * CPU as a broadcast timer, this call may fail if it is not available.
 	 */
 	if (broadcast && tick_broadcast_enter()) {
-		default_idle_call();
-		return -EBUSY;
+		index = find_deepest_state(drv, dev, target_state->exit_latency,
+					   CPUIDLE_FLAG_TIMER_STOP, false);
+		if (index < 0) {
+			default_idle_call();
+			return -EBUSY;
+		}
+		target_state = &drv->states[index];
 	}
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully
  2015-05-09  5:49   ` Preeti U Murthy
  2015-05-09 18:46     ` Rafael J. Wysocki
@ 2015-05-09 20:11     ` Rafael J. Wysocki
  2015-05-09 20:33       ` Rafael J. Wysocki
  1 sibling, 1 reply; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-09 20:11 UTC (permalink / raw)
  To: Preeti U Murthy
  Cc: peterz, tglx, rafael.j.wysocki, daniel.lezcano, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev

On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
> Hi Rafael,
> 
> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:

[cut]

> >>  
> >> +	/* Take note of the planned idle state. */
> >> +	idle_set_state(smp_processor_id(), target_state);
> > 
> > And I wouldn't do this either.
> > 
> > The behavior here is pretty much as though the driver demoted the state chosen
> > by the governor and we don't call idle_set_state() again in those cases.
> 
> Why is this wrong?

It is not "wrong", but incomplete, because demotions done by the cpuidle driver
should also be taken into account in the same way.

But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
significantly as to what state the CPU is in.  I'll drop that one for now.


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully
  2015-05-09 20:11     ` Rafael J. Wysocki
@ 2015-05-09 20:33       ` Rafael J. Wysocki
  2015-05-09 23:15         ` [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures Rafael J. Wysocki
  0 siblings, 1 reply; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-09 20:33 UTC (permalink / raw)
  To: Preeti U Murthy
  Cc: peterz, tglx, rafael.j.wysocki, daniel.lezcano, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev

On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
> > Hi Rafael,
> > 
> > On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
> 
> [cut]
> 
> > >>  
> > >> +	/* Take note of the planned idle state. */
> > >> +	idle_set_state(smp_processor_id(), target_state);
> > > 
> > > And I wouldn't do this either.
> > > 
> > > The behavior here is pretty much as though the driver demoted the state chosen
> > > by the governor and we don't call idle_set_state() again in those cases.
> > 
> > Why is this wrong?
> 
> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
> should also be taken into account in the same way.
> 
> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
> significantly as to what state the CPU is in.  I'll drop that one for now.

OK, done.

So after I've dropped it I think we need to do three things:
(1) Move the idle_set_state() calls to cpuidle_enter_state().
(2) Make cpuidle_enter_state() call default_idle_call() again, but this time
    do that *before* it has called idle_set_state() for target_state.
(3) Introduce demotion as per my last patch.

Let me cut patches for that.


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-09 20:33       ` Rafael J. Wysocki
@ 2015-05-09 23:15         ` Rafael J. Wysocki
  2015-05-09 23:18           ` [PATCH 1/3] sched / idle: Call idle_set_state() from cpuidle_enter_state() Rafael J. Wysocki
                             ` (6 more replies)
  0 siblings, 7 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-09 23:15 UTC (permalink / raw)
  To: Preeti U Murthy, peterz
  Cc: tglx, rafael.j.wysocki, daniel.lezcano, rlippert, linux-pm,
	linus.walleij, linux-kernel, mingo, sudeep.holla, linuxppc-dev

On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
> > On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
> > > Hi Rafael,
> > > 
> > > On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
> > 
> > [cut]
> > 
> > > >>  
> > > >> +	/* Take note of the planned idle state. */
> > > >> +	idle_set_state(smp_processor_id(), target_state);
> > > > 
> > > > And I wouldn't do this either.
> > > > 
> > > > The behavior here is pretty much as though the driver demoted the state chosen
> > > > by the governor and we don't call idle_set_state() again in those cases.
> > > 
> > > Why is this wrong?
> > 
> > It is not "wrong", but incomplete, because demotions done by the cpuidle driver
> > should also be taken into account in the same way.
> > 
> > But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
> > call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
> > significantly as to what state the CPU is in.  I'll drop that one for now.
> 
> OK, done.
> 
> So after I've dropped it I think we need to do three things:
> (1) Move the idle_set_state() calls to cpuidle_enter_state().
> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
>     do that *before* it has called idle_set_state() for target_state.
> (3) Introduce demotion as per my last patch.
> 
> Let me cut patches for that.

Done as per the above and the patches follow in replies to this messge.

All on top of the current linux-next branch of the linux-pm.git tree.


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/3] sched / idle: Call idle_set_state() from cpuidle_enter_state()
  2015-05-09 23:15         ` [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures Rafael J. Wysocki
@ 2015-05-09 23:18           ` Rafael J. Wysocki
  2015-05-09 23:18           ` [PATCH 2/3] sched / idle: Call default_idle_call() " Rafael J. Wysocki
                             ` (5 subsequent siblings)
  6 siblings, 0 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-09 23:18 UTC (permalink / raw)
  To: Preeti U Murthy, peterz
  Cc: tglx, rafael.j.wysocki, daniel.lezcano, rlippert, linux-pm,
	linus.walleij, linux-kernel, mingo, sudeep.holla, linuxppc-dev

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Introduce a wrapper function around idle_set_state() called
sched_idle_set_state() that will pass this_rq() to it as the
first argument and make cpuidle_enter_state() call the new
function before and after entering the target state.

At the same time, remove direct invocations of idle_set_state()
from call_cpuidle().

This will allow the invocation of default_idle_call() to be
moved from call_cpuidle() to cpuidle_enter_state() safely
and call_cpuidle() to be simplified a bit as a result.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c |    6 ++++++
 include/linux/cpuidle.h   |    3 +++
 kernel/sched/idle.c       |   15 +++++++++------
 3 files changed, 18 insertions(+), 6 deletions(-)

Index: linux-pm/kernel/sched/idle.c
===================================================================
--- linux-pm.orig/kernel/sched/idle.c
+++ linux-pm/kernel/sched/idle.c
@@ -15,6 +15,15 @@
 
 #include "sched.h"
 
+/**
+ * sched_idle_set_state - Record idle state for the current CPU.
+ * @idle_state: State to record.
+ */
+void sched_idle_set_state(struct cpuidle_state *idle_state)
+{
+	idle_set_state(this_rq(), idle_state);
+}
+
 static int __read_mostly cpu_idle_force_poll;
 
 void cpu_idle_poll_ctrl(bool enable)
@@ -100,9 +109,6 @@ static int call_cpuidle(struct cpuidle_d
 		return -EBUSY;
 	}
 
-	/* Take note of the planned idle state. */
-	idle_set_state(this_rq(), &drv->states[next_state]);
-
 	/*
 	 * Enter the idle state previously returned by the governor decision.
 	 * This function will block until an interrupt occurs and will take
@@ -110,9 +116,6 @@ static int call_cpuidle(struct cpuidle_d
 	 */
 	entered_state = cpuidle_enter(drv, dev, next_state);
 
-	/* The cpu is no longer idle or about to enter idle. */
-	idle_set_state(this_rq(), NULL);
-
 	if (entered_state == -EBUSY)
 		default_idle_call();
 
Index: linux-pm/drivers/cpuidle/cpuidle.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/cpuidle.c
+++ linux-pm/drivers/cpuidle/cpuidle.c
@@ -170,6 +170,9 @@ int cpuidle_enter_state(struct cpuidle_d
 	if (broadcast && tick_broadcast_enter())
 		return -EBUSY;
 
+	/* Take note of the planned idle state. */
+	sched_idle_set_state(target_state);
+
 	trace_cpu_idle_rcuidle(index, dev->cpu);
 	time_start = ktime_get();
 
@@ -178,6 +181,9 @@ int cpuidle_enter_state(struct cpuidle_d
 	time_end = ktime_get();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
+	/* The cpu is no longer idle or about to enter idle. */
+	sched_idle_set_state(NULL);
+
 	if (broadcast) {
 		if (WARN_ON_ONCE(!irqs_disabled()))
 			local_irq_disable();
Index: linux-pm/include/linux/cpuidle.h
===================================================================
--- linux-pm.orig/include/linux/cpuidle.h
+++ linux-pm/include/linux/cpuidle.h
@@ -200,6 +200,9 @@ static inline struct cpuidle_driver *cpu
 	struct cpuidle_device *dev) {return NULL; }
 #endif
 
+/* kernel/sched/idle.c */
+extern void sched_idle_set_state(struct cpuidle_state *idle_state);
+
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
 void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a);
 #else

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 2/3] sched / idle: Call default_idle_call() from cpuidle_enter_state()
  2015-05-09 23:15         ` [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures Rafael J. Wysocki
  2015-05-09 23:18           ` [PATCH 1/3] sched / idle: Call idle_set_state() from cpuidle_enter_state() Rafael J. Wysocki
@ 2015-05-09 23:18           ` Rafael J. Wysocki
  2015-05-09 23:19           ` [PATCH 3/3] cpuidle: Select a different state on tick_broadcast_enter() failures Rafael J. Wysocki
                             ` (4 subsequent siblings)
  6 siblings, 0 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-09 23:18 UTC (permalink / raw)
  To: Preeti U Murthy, peterz
  Cc: tglx, rafael.j.wysocki, daniel.lezcano, rlippert, linux-pm,
	linus.walleij, linux-kernel, mingo, sudeep.holla, linuxppc-dev

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

The check of the cpuidle_enter() return value against -EBUSY
made in call_cpuidle() will not be necessary any more if
cpuidle_enter_state() calls default_idle_call() directly when it
is about to return -EBUSY, so make that happen and eliminate the
check.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c |    4 +++-
 include/linux/cpuidle.h   |    1 +
 kernel/sched/idle.c       |   20 +++++++-------------
 3 files changed, 11 insertions(+), 14 deletions(-)

Index: linux-pm/drivers/cpuidle/cpuidle.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/cpuidle.c
+++ linux-pm/drivers/cpuidle/cpuidle.c
@@ -167,8 +167,10 @@ int cpuidle_enter_state(struct cpuidle_d
 	 * local timer will be shut down.  If a local timer is used from another
 	 * CPU as a broadcast timer, this call may fail if it is not available.
 	 */
-	if (broadcast && tick_broadcast_enter())
+	if (broadcast && tick_broadcast_enter()) {
+		default_idle_call();
 		return -EBUSY;
+	}
 
 	/* Take note of the planned idle state. */
 	sched_idle_set_state(target_state);
Index: linux-pm/kernel/sched/idle.c
===================================================================
--- linux-pm.orig/kernel/sched/idle.c
+++ linux-pm/kernel/sched/idle.c
@@ -76,12 +76,13 @@ void __weak arch_cpu_idle(void)
 	local_irq_enable();
 }
 
-static void default_idle_call(void)
+/**
+ * default_idle_call - Default CPU idle routine.
+ *
+ * To use when the cpuidle framework cannot be used.
+ */
+void default_idle_call(void)
 {
-	/*
-	 * We can't use the cpuidle framework, let's use the default idle
-	 * routine.
-	 */
 	if (current_clr_polling_and_test())
 		local_irq_enable();
 	else
@@ -91,8 +92,6 @@ static void default_idle_call(void)
 static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		      int next_state)
 {
-	int entered_state;
-
 	/* Fall back to the default arch idle method on errors. */
 	if (next_state < 0) {
 		default_idle_call();
@@ -114,12 +113,7 @@ static int call_cpuidle(struct cpuidle_d
 	 * This function will block until an interrupt occurs and will take
 	 * care of re-enabling the local interrupts
 	 */
-	entered_state = cpuidle_enter(drv, dev, next_state);
-
-	if (entered_state == -EBUSY)
-		default_idle_call();
-
-	return entered_state;
+	return cpuidle_enter(drv, dev, next_state);
 }
 
 /**
Index: linux-pm/include/linux/cpuidle.h
===================================================================
--- linux-pm.orig/include/linux/cpuidle.h
+++ linux-pm/include/linux/cpuidle.h
@@ -202,6 +202,7 @@ static inline struct cpuidle_driver *cpu
 
 /* kernel/sched/idle.c */
 extern void sched_idle_set_state(struct cpuidle_state *idle_state);
+extern void default_idle_call(void);
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
 void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a);


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 3/3] cpuidle: Select a different state on tick_broadcast_enter() failures
  2015-05-09 23:15         ` [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures Rafael J. Wysocki
  2015-05-09 23:18           ` [PATCH 1/3] sched / idle: Call idle_set_state() from cpuidle_enter_state() Rafael J. Wysocki
  2015-05-09 23:18           ` [PATCH 2/3] sched / idle: Call default_idle_call() " Rafael J. Wysocki
@ 2015-05-09 23:19           ` Rafael J. Wysocki
  2015-05-11  3:48           ` [PATCH 0/3] cpuidle: updates related to " Preeti U Murthy
                             ` (3 subsequent siblings)
  6 siblings, 0 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-09 23:19 UTC (permalink / raw)
  To: Preeti U Murthy, peterz
  Cc: tglx, rafael.j.wysocki, daniel.lezcano, rlippert, linux-pm,
	linus.walleij, linux-kernel, mingo, sudeep.holla, linuxppc-dev

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

If tick_broadcast_enter() fails in cpuidle_enter_state(),
try to find another idle state to enter instead of invoking
default_idle_call() immediately and returning -EBUSY which
should increase the chances of saving some energy in those
cases.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c |   20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

Index: linux-pm/drivers/cpuidle/cpuidle.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/cpuidle.c
+++ linux-pm/drivers/cpuidle/cpuidle.c
@@ -73,7 +73,10 @@ int cpuidle_play_dead(void)
 }
 
 static int find_deepest_state(struct cpuidle_driver *drv,
-			      struct cpuidle_device *dev, bool freeze)
+			      struct cpuidle_device *dev,
+			      unsigned int max_latency,
+			      unsigned int forbidden_flags,
+			      bool freeze)
 {
 	unsigned int latency_req = 0;
 	int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
@@ -83,6 +86,8 @@ static int find_deepest_state(struct cpu
 		struct cpuidle_state_usage *su = &dev->states_usage[i];
 
 		if (s->disabled || su->disable || s->exit_latency <= latency_req
+		    || s->exit_latency > max_latency
+		    || (s->flags & forbidden_flags)
 		    || (freeze && !s->enter_freeze))
 			continue;
 
@@ -100,7 +105,7 @@ static int find_deepest_state(struct cpu
 int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 			       struct cpuidle_device *dev)
 {
-	return find_deepest_state(drv, dev, false);
+	return find_deepest_state(drv, dev, UINT_MAX, 0, false);
 }
 
 static void enter_freeze_proper(struct cpuidle_driver *drv,
@@ -139,7 +144,7 @@ int cpuidle_enter_freeze(struct cpuidle_
 	 * that interrupts won't be enabled when it exits and allows the tick to
 	 * be frozen safely.
 	 */
-	index = find_deepest_state(drv, dev, true);
+	index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
 	if (index >= 0)
 		enter_freeze_proper(drv, dev, index);
 
@@ -168,8 +173,13 @@ int cpuidle_enter_state(struct cpuidle_d
 	 * CPU as a broadcast timer, this call may fail if it is not available.
 	 */
 	if (broadcast && tick_broadcast_enter()) {
-		default_idle_call();
-		return -EBUSY;
+		index = find_deepest_state(drv, dev, target_state->exit_latency,
+					   CPUIDLE_FLAG_TIMER_STOP, false);
+		if (index < 0) {
+			default_idle_call();
+			return -EBUSY;
+		}
+		target_state = &drv->states[index];
 	}
 
 	/* Take note of the planned idle state. */

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-09 23:15         ` [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures Rafael J. Wysocki
                             ` (2 preceding siblings ...)
  2015-05-09 23:19           ` [PATCH 3/3] cpuidle: Select a different state on tick_broadcast_enter() failures Rafael J. Wysocki
@ 2015-05-11  3:48           ` Preeti U Murthy
  2015-05-11  5:21           ` Preeti U Murthy
                             ` (2 subsequent siblings)
  6 siblings, 0 replies; 29+ messages in thread
From: Preeti U Murthy @ 2015-05-11  3:48 UTC (permalink / raw)
  To: Rafael J. Wysocki, peterz
  Cc: tglx, rafael.j.wysocki, daniel.lezcano, rlippert, linux-pm,
	linus.walleij, linux-kernel, mingo, sudeep.holla, linuxppc-dev

On 05/10/2015 04:45 AM, Rafael J. Wysocki wrote:
> On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
>> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
>>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
>>>> Hi Rafael,
>>>>
>>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
>>>
>>> [cut]
>>>
>>>>>>  
>>>>>> +	/* Take note of the planned idle state. */
>>>>>> +	idle_set_state(smp_processor_id(), target_state);
>>>>>
>>>>> And I wouldn't do this either.
>>>>>
>>>>> The behavior here is pretty much as though the driver demoted the state chosen
>>>>> by the governor and we don't call idle_set_state() again in those cases.
>>>>
>>>> Why is this wrong?
>>>
>>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
>>> should also be taken into account in the same way.
>>>
>>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
>>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
>>> significantly as to what state the CPU is in.  I'll drop that one for now.
>>
>> OK, done.
>>
>> So after I've dropped it I think we need to do three things:
>> (1) Move the idle_set_state() calls to cpuidle_enter_state().
>> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
>>     do that *before* it has called idle_set_state() for target_state.
>> (3) Introduce demotion as per my last patch.
>>
>> Let me cut patches for that.
> 
> Done as per the above and the patches follow in replies to this messge.
> 
> All on top of the current linux-next branch of the linux-pm.git tree.

I don't see the patches on linux-pm/linux-next.

Regards
Preeti U Murthy
> 
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-09 23:15         ` [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures Rafael J. Wysocki
                             ` (3 preceding siblings ...)
  2015-05-11  3:48           ` [PATCH 0/3] cpuidle: updates related to " Preeti U Murthy
@ 2015-05-11  5:21           ` Preeti U Murthy
  2015-05-11 23:13             ` Rafael J. Wysocki
  2015-05-11 15:13           ` Sudeep Holla
  2015-05-11 17:40           ` Daniel Lezcano
  6 siblings, 1 reply; 29+ messages in thread
From: Preeti U Murthy @ 2015-05-11  5:21 UTC (permalink / raw)
  To: Rafael J. Wysocki, peterz
  Cc: tglx, rafael.j.wysocki, daniel.lezcano, rlippert, linux-pm,
	linus.walleij, linux-kernel, mingo, sudeep.holla, linuxppc-dev

On 05/10/2015 04:45 AM, Rafael J. Wysocki wrote:
> On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
>> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
>>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
>>>> Hi Rafael,
>>>>
>>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
>>>
>>> [cut]
>>>
>>>>>>  
>>>>>> +	/* Take note of the planned idle state. */
>>>>>> +	idle_set_state(smp_processor_id(), target_state);
>>>>>
>>>>> And I wouldn't do this either.
>>>>>
>>>>> The behavior here is pretty much as though the driver demoted the state chosen
>>>>> by the governor and we don't call idle_set_state() again in those cases.
>>>>
>>>> Why is this wrong?
>>>
>>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
>>> should also be taken into account in the same way.
>>>
>>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
>>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
>>> significantly as to what state the CPU is in.  I'll drop that one for now.
>>
>> OK, done.
>>
>> So after I've dropped it I think we need to do three things:
>> (1) Move the idle_set_state() calls to cpuidle_enter_state().
>> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
>>     do that *before* it has called idle_set_state() for target_state.
>> (3) Introduce demotion as per my last patch.
>>
>> Let me cut patches for that.
> 
> Done as per the above and the patches follow in replies to this messge.
> 
> All on top of the current linux-next branch of the linux-pm.git tree.

The patches look good. Based and tested these patches on top of
linux-pm/linux-next (They are not yet in the branch as far as I can see.)

All patches in this series
Reviewed and Tested-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
> 
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-09 23:15         ` [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures Rafael J. Wysocki
                             ` (4 preceding siblings ...)
  2015-05-11  5:21           ` Preeti U Murthy
@ 2015-05-11 15:13           ` Sudeep Holla
  2015-05-11 23:14             ` Rafael J. Wysocki
  2015-05-11 17:40           ` Daniel Lezcano
  6 siblings, 1 reply; 29+ messages in thread
From: Sudeep Holla @ 2015-05-11 15:13 UTC (permalink / raw)
  To: Rafael J. Wysocki, Preeti U Murthy, peterz
  Cc: Sudeep Holla, tglx, rafael.j.wysocki, daniel.lezcano, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, linuxppc-dev



On 10/05/15 00:15, Rafael J. Wysocki wrote:
> On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
>> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
>>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
>>>> Hi Rafael,
>>>>
>>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
>>>
>>> [cut]
>>>
>>>>>>
>>>>>> +	/* Take note of the planned idle state. */
>>>>>> +	idle_set_state(smp_processor_id(), target_state);
>>>>>
>>>>> And I wouldn't do this either.
>>>>>
>>>>> The behavior here is pretty much as though the driver demoted the state chosen
>>>>> by the governor and we don't call idle_set_state() again in those cases.
>>>>
>>>> Why is this wrong?
>>>
>>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
>>> should also be taken into account in the same way.
>>>
>>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
>>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
>>> significantly as to what state the CPU is in.  I'll drop that one for now.
>>
>> OK, done.
>>
>> So after I've dropped it I think we need to do three things:
>> (1) Move the idle_set_state() calls to cpuidle_enter_state().
>> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
>>      do that *before* it has called idle_set_state() for target_state.
>> (3) Introduce demotion as per my last patch.
>>
>> Let me cut patches for that.
>
> Done as per the above and the patches follow in replies to this messge.
>
> All on top of the current linux-next branch of the linux-pm.git tree.
>

Tested on ARM Vexpress platforms with one of the CPU in broadcast mode
and also with broadcast timer. So, you can add:

Tested-by: Sudeep Holla <sudeep.holla@arm.com>

Regards,
Sudeep

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-09 23:15         ` [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures Rafael J. Wysocki
                             ` (5 preceding siblings ...)
  2015-05-11 15:13           ` Sudeep Holla
@ 2015-05-11 17:40           ` Daniel Lezcano
  2015-05-11 23:31             ` Rafael J. Wysocki
  6 siblings, 1 reply; 29+ messages in thread
From: Daniel Lezcano @ 2015-05-11 17:40 UTC (permalink / raw)
  To: Rafael J. Wysocki, Preeti U Murthy, peterz
  Cc: tglx, rafael.j.wysocki, rlippert, linux-pm, linus.walleij,
	linux-kernel, mingo, sudeep.holla, linuxppc-dev, Kevin Hilman,
	Lina Iyer, Ulf Hansson

On 05/10/2015 01:15 AM, Rafael J. Wysocki wrote:
> On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
>> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
>>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
>>>> Hi Rafael,
>>>>
>>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
>>>
>>> [cut]
>>>
>>>>>>
>>>>>> +	/* Take note of the planned idle state. */
>>>>>> +	idle_set_state(smp_processor_id(), target_state);
>>>>>
>>>>> And I wouldn't do this either.
>>>>>
>>>>> The behavior here is pretty much as though the driver demoted the state chosen
>>>>> by the governor and we don't call idle_set_state() again in those cases.
>>>>
>>>> Why is this wrong?
>>>
>>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
>>> should also be taken into account in the same way.
>>>
>>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
>>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
>>> significantly as to what state the CPU is in.  I'll drop that one for now.
>>
>> OK, done.
>>
>> So after I've dropped it I think we need to do three things:
>> (1) Move the idle_set_state() calls to cpuidle_enter_state().
>> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
>>      do that *before* it has called idle_set_state() for target_state.
>> (3) Introduce demotion as per my last patch.
>>
>> Let me cut patches for that.
>
> Done as per the above and the patches follow in replies to this messge.
>
> All on top of the current linux-next branch of the linux-pm.git tree.

IMO the resulting code is more and more confusing.

Except I miss something, the tick_broadcast_enter can fail only if the 
local timer of the current cpu is used as a broadcast timer (which is 
the case today for PPC only).

The correct fix would be to tie this local timer with the cpu power 
domain and disable the idle state powering down this domain like it was 
done for the renesas cpuidle driver.

IOW, the cpu power domain is in use (because of its local timer), so we 
shouldn't shut it down.

No ?

I am aware this is not easily fixable because the genpd framework is 
incomplete and has some restrictions but I believe it is worth to have a 
discussion. Add Kevin and Ulf in Cc.

   -- Daniel

-- 
  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-11  5:21           ` Preeti U Murthy
@ 2015-05-11 23:13             ` Rafael J. Wysocki
  0 siblings, 0 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-11 23:13 UTC (permalink / raw)
  To: Preeti U Murthy
  Cc: peterz, tglx, rafael.j.wysocki, daniel.lezcano, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev

On Monday, May 11, 2015 10:51:02 AM Preeti U Murthy wrote:
> On 05/10/2015 04:45 AM, Rafael J. Wysocki wrote:
> > On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
> >> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
> >>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
> >>>> Hi Rafael,
> >>>>
> >>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
> >>>
> >>> [cut]
> >>>
> >>>>>>  
> >>>>>> +	/* Take note of the planned idle state. */
> >>>>>> +	idle_set_state(smp_processor_id(), target_state);
> >>>>>
> >>>>> And I wouldn't do this either.
> >>>>>
> >>>>> The behavior here is pretty much as though the driver demoted the state chosen
> >>>>> by the governor and we don't call idle_set_state() again in those cases.
> >>>>
> >>>> Why is this wrong?
> >>>
> >>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
> >>> should also be taken into account in the same way.
> >>>
> >>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
> >>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
> >>> significantly as to what state the CPU is in.  I'll drop that one for now.
> >>
> >> OK, done.
> >>
> >> So after I've dropped it I think we need to do three things:
> >> (1) Move the idle_set_state() calls to cpuidle_enter_state().
> >> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
> >>     do that *before* it has called idle_set_state() for target_state.
> >> (3) Introduce demotion as per my last patch.
> >>
> >> Let me cut patches for that.
> > 
> > Done as per the above and the patches follow in replies to this messge.
> > 
> > All on top of the current linux-next branch of the linux-pm.git tree.
> 
> The patches look good. Based and tested these patches on top of
> linux-pm/linux-next (They are not yet in the branch as far as I can see.)

They aren't in the tree yet.  I'll put them in there later today.

> All patches in this series
> Reviewed and Tested-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>

Thanks!


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-11 15:13           ` Sudeep Holla
@ 2015-05-11 23:14             ` Rafael J. Wysocki
  0 siblings, 0 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-11 23:14 UTC (permalink / raw)
  To: Sudeep Holla
  Cc: Preeti U Murthy, peterz, tglx, rafael.j.wysocki, daniel.lezcano,
	rlippert, linux-pm, linus.walleij, linux-kernel, mingo,
	linuxppc-dev

On Monday, May 11, 2015 04:13:37 PM Sudeep Holla wrote:
> 
> On 10/05/15 00:15, Rafael J. Wysocki wrote:
> > On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
> >> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
> >>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
> >>>> Hi Rafael,
> >>>>
> >>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
> >>>
> >>> [cut]
> >>>
> >>>>>>
> >>>>>> +	/* Take note of the planned idle state. */
> >>>>>> +	idle_set_state(smp_processor_id(), target_state);
> >>>>>
> >>>>> And I wouldn't do this either.
> >>>>>
> >>>>> The behavior here is pretty much as though the driver demoted the state chosen
> >>>>> by the governor and we don't call idle_set_state() again in those cases.
> >>>>
> >>>> Why is this wrong?
> >>>
> >>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
> >>> should also be taken into account in the same way.
> >>>
> >>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
> >>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
> >>> significantly as to what state the CPU is in.  I'll drop that one for now.
> >>
> >> OK, done.
> >>
> >> So after I've dropped it I think we need to do three things:
> >> (1) Move the idle_set_state() calls to cpuidle_enter_state().
> >> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
> >>      do that *before* it has called idle_set_state() for target_state.
> >> (3) Introduce demotion as per my last patch.
> >>
> >> Let me cut patches for that.
> >
> > Done as per the above and the patches follow in replies to this messge.
> >
> > All on top of the current linux-next branch of the linux-pm.git tree.
> >
> 
> Tested on ARM Vexpress platforms with one of the CPU in broadcast mode
> and also with broadcast timer. So, you can add:
> 
> Tested-by: Sudeep Holla <sudeep.holla@arm.com>

Thanks!


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-11 17:40           ` Daniel Lezcano
@ 2015-05-11 23:31             ` Rafael J. Wysocki
  2015-05-12  8:41               ` Daniel Lezcano
  0 siblings, 1 reply; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-11 23:31 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: Preeti U Murthy, peterz, tglx, rafael.j.wysocki, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev, Kevin Hilman, Lina Iyer, Ulf Hansson

On Monday, May 11, 2015 07:40:41 PM Daniel Lezcano wrote:
> On 05/10/2015 01:15 AM, Rafael J. Wysocki wrote:
> > On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
> >> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
> >>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
> >>>> Hi Rafael,
> >>>>
> >>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
> >>>
> >>> [cut]
> >>>
> >>>>>>
> >>>>>> +	/* Take note of the planned idle state. */
> >>>>>> +	idle_set_state(smp_processor_id(), target_state);
> >>>>>
> >>>>> And I wouldn't do this either.
> >>>>>
> >>>>> The behavior here is pretty much as though the driver demoted the state chosen
> >>>>> by the governor and we don't call idle_set_state() again in those cases.
> >>>>
> >>>> Why is this wrong?
> >>>
> >>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
> >>> should also be taken into account in the same way.
> >>>
> >>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
> >>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
> >>> significantly as to what state the CPU is in.  I'll drop that one for now.
> >>
> >> OK, done.
> >>
> >> So after I've dropped it I think we need to do three things:
> >> (1) Move the idle_set_state() calls to cpuidle_enter_state().
> >> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
> >>      do that *before* it has called idle_set_state() for target_state.
> >> (3) Introduce demotion as per my last patch.
> >>
> >> Let me cut patches for that.
> >
> > Done as per the above and the patches follow in replies to this messge.
> >
> > All on top of the current linux-next branch of the linux-pm.git tree.
> 
> IMO the resulting code is more and more confusing.

Why is it confusing?

What part of it is confusing?

Patches [1-2/3] simply replace https://patchwork.kernel.org/patch/6326761/
and I'm not sure why that would be confusing.

Patch [3/3] simply causes cpuidle_enter_state() to pick up a more suitable
state if tick_broadcast_enter() fails instead of returning an error code
in that case.  What exactly is confusing in that?

> Except I miss something, the tick_broadcast_enter can fail only if the 
> local timer of the current cpu is used as a broadcast timer (which is 
> the case today for PPC only).

well, why does this matter?

> The correct fix would be to tie this local timer with the cpu power 
> domain and disable the idle state powering down this domain like it was 
> done for the renesas cpuidle driver.
> 
> IOW, the cpu power domain is in use (because of its local timer), so we 
> shouldn't shut it down.
> 
> No ?

Sorry, I'm not sure what you're talking about.

The problem at hand is that tick_broadcast_enter() can fail and we need to
handle that.  If we can prevent it from ever failing, that would be awesome,
but quite honestly I don't see how to do that ATM.

> I am aware this is not easily fixable because the genpd framework is 
> incomplete and has some restrictions but I believe it is worth to have a 
> discussion. Add Kevin and Ulf in Cc.

So I'm going to queue up these patches for 4.2 and we can have a discussion
just fine regardless.


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-11 23:31             ` Rafael J. Wysocki
@ 2015-05-12  8:41               ` Daniel Lezcano
  2015-05-12 13:23                 ` Rafael J. Wysocki
  0 siblings, 1 reply; 29+ messages in thread
From: Daniel Lezcano @ 2015-05-12  8:41 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Preeti U Murthy, peterz, tglx, rafael.j.wysocki, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev, Kevin Hilman, Lina Iyer, Ulf Hansson

On 05/12/2015 01:31 AM, Rafael J. Wysocki wrote:
> On Monday, May 11, 2015 07:40:41 PM Daniel Lezcano wrote:
>> On 05/10/2015 01:15 AM, Rafael J. Wysocki wrote:
>>> On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
>>>> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
>>>>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
>>>>>> Hi Rafael,
>>>>>>
>>>>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
>>>>>
>>>>> [cut]
>>>>>
>>>>>>>>
>>>>>>>> +	/* Take note of the planned idle state. */
>>>>>>>> +	idle_set_state(smp_processor_id(), target_state);
>>>>>>>
>>>>>>> And I wouldn't do this either.
>>>>>>>
>>>>>>> The behavior here is pretty much as though the driver demoted the state chosen
>>>>>>> by the governor and we don't call idle_set_state() again in those cases.
>>>>>>
>>>>>> Why is this wrong?
>>>>>
>>>>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
>>>>> should also be taken into account in the same way.
>>>>>
>>>>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
>>>>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
>>>>> significantly as to what state the CPU is in.  I'll drop that one for now.
>>>>
>>>> OK, done.
>>>>
>>>> So after I've dropped it I think we need to do three things:
>>>> (1) Move the idle_set_state() calls to cpuidle_enter_state().
>>>> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
>>>>       do that *before* it has called idle_set_state() for target_state.
>>>> (3) Introduce demotion as per my last patch.
>>>>
>>>> Let me cut patches for that.
>>>
>>> Done as per the above and the patches follow in replies to this messge.
>>>
>>> All on top of the current linux-next branch of the linux-pm.git tree.
>>
>> IMO the resulting code is more and more confusing.
>
> Why is it confusing?
>
> What part of it is confusing?
>
> Patches [1-2/3] simply replace https://patchwork.kernel.org/patch/6326761/
> and I'm not sure why that would be confusing.
>
> Patch [3/3] simply causes cpuidle_enter_state() to pick up a more suitable
> state if tick_broadcast_enter() fails instead of returning an error code
> in that case.  What exactly is confusing in that?
>
>> Except I miss something, the tick_broadcast_enter can fail only if the
>> local timer of the current cpu is used as a broadcast timer (which is
>> the case today for PPC only).
>
> well, why does this matter?
>
>> The correct fix would be to tie this local timer with the cpu power
>> domain and disable the idle state powering down this domain like it was
>> done for the renesas cpuidle driver.
>>
>> IOW, the cpu power domain is in use (because of its local timer), so we
>> shouldn't shut it down.
>>
>> No ?
>
> Sorry, I'm not sure what you're talking about.
>
> The problem at hand is that tick_broadcast_enter() can fail and we need to
> handle that.  If we can prevent it from ever failing, that would be awesome,
> but quite honestly I don't see how to do that ATM.

Ok, sorry. Let me clarify.

You did a mechanism two years ago with pm_genpd_attach_cpuidle and 
power_on/off. That disables a cpuidle state when a power domain is in use.

The idea I was proposing is to reuse this approach.

The logic is:

"The local timer is in use, this idle state power downs this timer, then 
disable it".

So it is when the broadcast timer is 'bound_on' a cpu, we disable the 
idle states. That could be done via a loop looking for the TIMER_STOP 
flag or via the power domain.

Hence the cpuidle_select will never return a state which powers downs 
the local cpu (because they are disabled) and tick_broadcast_enter can't 
fail because it is never called.

Does it make more sense ?

>> I am aware this is not easily fixable because the genpd framework is
>> incomplete and has some restrictions but I believe it is worth to have a
>> discussion. Add Kevin and Ulf in Cc.
>
> So I'm going to queue up these patches for 4.2 and we can have a discussion
> just fine regardless.



-- 
  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-12  8:41               ` Daniel Lezcano
@ 2015-05-12 13:23                 ` Rafael J. Wysocki
  2015-05-12 18:04                   ` Daniel Lezcano
  2015-05-13 22:59                   ` Kevin Hilman
  0 siblings, 2 replies; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-12 13:23 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: Preeti U Murthy, peterz, tglx, rafael.j.wysocki, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev, Kevin Hilman, Lina Iyer, Ulf Hansson

On Tuesday, May 12, 2015 10:41:35 AM Daniel Lezcano wrote:
> On 05/12/2015 01:31 AM, Rafael J. Wysocki wrote:
> > On Monday, May 11, 2015 07:40:41 PM Daniel Lezcano wrote:
> >> On 05/10/2015 01:15 AM, Rafael J. Wysocki wrote:
> >>> On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
> >>>> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
> >>>>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
> >>>>>> Hi Rafael,
> >>>>>>
> >>>>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
> >>>>>
> >>>>> [cut]
> >>>>>
> >>>>>>>>
> >>>>>>>> +	/* Take note of the planned idle state. */
> >>>>>>>> +	idle_set_state(smp_processor_id(), target_state);
> >>>>>>>
> >>>>>>> And I wouldn't do this either.
> >>>>>>>
> >>>>>>> The behavior here is pretty much as though the driver demoted the state chosen
> >>>>>>> by the governor and we don't call idle_set_state() again in those cases.
> >>>>>>
> >>>>>> Why is this wrong?
> >>>>>
> >>>>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
> >>>>> should also be taken into account in the same way.
> >>>>>
> >>>>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
> >>>>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
> >>>>> significantly as to what state the CPU is in.  I'll drop that one for now.
> >>>>
> >>>> OK, done.
> >>>>
> >>>> So after I've dropped it I think we need to do three things:
> >>>> (1) Move the idle_set_state() calls to cpuidle_enter_state().
> >>>> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
> >>>>       do that *before* it has called idle_set_state() for target_state.
> >>>> (3) Introduce demotion as per my last patch.
> >>>>
> >>>> Let me cut patches for that.
> >>>
> >>> Done as per the above and the patches follow in replies to this messge.
> >>>
> >>> All on top of the current linux-next branch of the linux-pm.git tree.
> >>
> >> IMO the resulting code is more and more confusing.
> >
> > Why is it confusing?
> >
> > What part of it is confusing?
> >
> > Patches [1-2/3] simply replace https://patchwork.kernel.org/patch/6326761/
> > and I'm not sure why that would be confusing.
> >
> > Patch [3/3] simply causes cpuidle_enter_state() to pick up a more suitable
> > state if tick_broadcast_enter() fails instead of returning an error code
> > in that case.  What exactly is confusing in that?
> >
> >> Except I miss something, the tick_broadcast_enter can fail only if the
> >> local timer of the current cpu is used as a broadcast timer (which is
> >> the case today for PPC only).
> >
> > well, why does this matter?
> >
> >> The correct fix would be to tie this local timer with the cpu power
> >> domain and disable the idle state powering down this domain like it was
> >> done for the renesas cpuidle driver.
> >>
> >> IOW, the cpu power domain is in use (because of its local timer), so we
> >> shouldn't shut it down.
> >>
> >> No ?
> >
> > Sorry, I'm not sure what you're talking about.
> >
> > The problem at hand is that tick_broadcast_enter() can fail and we need to
> > handle that.  If we can prevent it from ever failing, that would be awesome,
> > but quite honestly I don't see how to do that ATM.
> 
> Ok, sorry. Let me clarify.
> 
> You did a mechanism two years ago with pm_genpd_attach_cpuidle and 
> power_on/off. That disables a cpuidle state when a power domain is in use.
> 
> The idea I was proposing is to reuse this approach.
> 
> The logic is:
> 
> "The local timer is in use, this idle state power downs this timer, then 
> disable it".

I'm not sure it's about powering down.  Stopping rather (which may or may
not involve powering down).

> So it is when the broadcast timer is 'bound_on' a cpu, we disable the 
> idle states. That could be done via a loop looking for the TIMER_STOP 
> flag or via the power domain.
> 
> Hence the cpuidle_select will never return a state which powers downs 
> the local cpu (because they are disabled) and tick_broadcast_enter can't 
> fail because it is never called.
> 
> Does it make more sense ?

Well, you've not explained what's confusing in the code after this series
in the first place. :-)

Second, quite honestly, I don't see a connection to genpd here.

What you seem to be saying is "maybe we can eliminate the need to check the
return value of tick_broadcast_enter() in the idle loop if we proactively
disable the TIMER_STOP idle states of a CPU when we start to use that CPU's
timer as a broadcast one".

So this seems to be about the timekeeping rather than power domains, because
that's where the broadcast thing is done.  So the code setting up the CPU's
timer for broadcast would pretty much need to pause cpuidle, go through the
CPU's idle states and disable the TIMER_STOP ones.  And do the reverse when the
timer is not going the be used for broadcast any more.  So question is whether
or not this is actually really more straightforward than checking the return
value of tick_broadcast_enter() in the idle loop after all.


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-12 13:23                 ` Rafael J. Wysocki
@ 2015-05-12 18:04                   ` Daniel Lezcano
  2015-05-13 22:59                   ` Kevin Hilman
  1 sibling, 0 replies; 29+ messages in thread
From: Daniel Lezcano @ 2015-05-12 18:04 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Preeti U Murthy, peterz, tglx, rafael.j.wysocki, rlippert,
	linux-pm, linus.walleij, linux-kernel, mingo, sudeep.holla,
	linuxppc-dev, Kevin Hilman, Lina Iyer, Ulf Hansson,
	Lorenzo Pieralisi

On 05/12/2015 03:23 PM, Rafael J. Wysocki wrote:
> On Tuesday, May 12, 2015 10:41:35 AM Daniel Lezcano wrote:
>> On 05/12/2015 01:31 AM, Rafael J. Wysocki wrote:
>>> On Monday, May 11, 2015 07:40:41 PM Daniel Lezcano wrote:
>>>> On 05/10/2015 01:15 AM, Rafael J. Wysocki wrote:
>>>>> On Saturday, May 09, 2015 10:33:05 PM Rafael J. Wysocki wrote:
>>>>>> On Saturday, May 09, 2015 10:11:41 PM Rafael J. Wysocki wrote:
>>>>>>> On Saturday, May 09, 2015 11:19:16 AM Preeti U Murthy wrote:
>>>>>>>> Hi Rafael,
>>>>>>>>
>>>>>>>> On 05/08/2015 07:48 PM, Rafael J. Wysocki wrote:
>>>>>>>
>>>>>>> [cut]
>>>>>>>
>>>>>>>>>>
>>>>>>>>>> +	/* Take note of the planned idle state. */
>>>>>>>>>> +	idle_set_state(smp_processor_id(), target_state);
>>>>>>>>>
>>>>>>>>> And I wouldn't do this either.
>>>>>>>>>
>>>>>>>>> The behavior here is pretty much as though the driver demoted the state chosen
>>>>>>>>> by the governor and we don't call idle_set_state() again in those cases.
>>>>>>>>
>>>>>>>> Why is this wrong?
>>>>>>>
>>>>>>> It is not "wrong", but incomplete, because demotions done by the cpuidle driver
>>>>>>> should also be taken into account in the same way.
>>>>>>>
>>>>>>> But I'm seeing that the recent patch of mine that made cpuidle_enter_state()
>>>>>>> call default_idle_call() was a mistake, because it might confuse find_idlest_cpu()
>>>>>>> significantly as to what state the CPU is in.  I'll drop that one for now.
>>>>>>
>>>>>> OK, done.
>>>>>>
>>>>>> So after I've dropped it I think we need to do three things:
>>>>>> (1) Move the idle_set_state() calls to cpuidle_enter_state().
>>>>>> (2) Make cpuidle_enter_state() call default_idle_call() again, but this time
>>>>>>        do that *before* it has called idle_set_state() for target_state.
>>>>>> (3) Introduce demotion as per my last patch.
>>>>>>
>>>>>> Let me cut patches for that.
>>>>>
>>>>> Done as per the above and the patches follow in replies to this messge.
>>>>>
>>>>> All on top of the current linux-next branch of the linux-pm.git tree.
>>>>
>>>> IMO the resulting code is more and more confusing.
>>>
>>> Why is it confusing?
>>>
>>> What part of it is confusing?
>>>
>>> Patches [1-2/3] simply replace https://patchwork.kernel.org/patch/6326761/
>>> and I'm not sure why that would be confusing.
>>>
>>> Patch [3/3] simply causes cpuidle_enter_state() to pick up a more suitable
>>> state if tick_broadcast_enter() fails instead of returning an error code
>>> in that case.  What exactly is confusing in that?
>>>
>>>> Except I miss something, the tick_broadcast_enter can fail only if the
>>>> local timer of the current cpu is used as a broadcast timer (which is
>>>> the case today for PPC only).
>>>
>>> well, why does this matter?
>>>
>>>> The correct fix would be to tie this local timer with the cpu power
>>>> domain and disable the idle state powering down this domain like it was
>>>> done for the renesas cpuidle driver.
>>>>
>>>> IOW, the cpu power domain is in use (because of its local timer), so we
>>>> shouldn't shut it down.
>>>>
>>>> No ?
>>>
>>> Sorry, I'm not sure what you're talking about.
>>>
>>> The problem at hand is that tick_broadcast_enter() can fail and we need to
>>> handle that.  If we can prevent it from ever failing, that would be awesome,
>>> but quite honestly I don't see how to do that ATM.
>>
>> Ok, sorry. Let me clarify.
>>
>> You did a mechanism two years ago with pm_genpd_attach_cpuidle and
>> power_on/off. That disables a cpuidle state when a power domain is in use.
>>
>> The idea I was proposing is to reuse this approach.
>>
>> The logic is:
>>
>> "The local timer is in use, this idle state power downs this timer, then
>> disable it".
>
> I'm not sure it's about powering down.  Stopping rather (which may or may
> not involve powering down).
>
>> So it is when the broadcast timer is 'bound_on' a cpu, we disable the
>> idle states. That could be done via a loop looking for the TIMER_STOP
>> flag or via the power domain.
>>
>> Hence the cpuidle_select will never return a state which powers downs
>> the local cpu (because they are disabled) and tick_broadcast_enter can't
>> fail because it is never called.
>>
>> Does it make more sense ?
>
> Well, you've not explained what's confusing in the code after this series
> in the first place. :-)

It is not the series itself but the sum of the recent changes in this 
area makes the overall more and more difficult to maintain. But that's a 
personal opinion. Sounds like we are trying to catch the corner cases 
each time there is a change somewhere.

> Second, quite honestly, I don't see a connection to genpd here.

Probably I am not clear :)

The connection we have is the local timer and the cpuidle framework 
shutting it down. Why ? Because the local timer belongs to the cpu's 
power domain.

Using the genpd to describe this relation between an idle state and the 
devices impacted by via a power domain is, in my opinion, a nice 
abstraction and a good opportunity to integrate the different 
subsystems. Furthermore it is consistent with Kevin's investigation 
around the power domain and SoC idle.

Kevin ?

> What you seem to be saying is "maybe we can eliminate the need to check the
> return value of tick_broadcast_enter() in the idle loop if we proactively
> disable the TIMER_STOP idle states of a CPU when we start to use that CPU's
> timer as a broadcast one".

Well, not exactly. That's the consequence.

I meant, using any devices in a specific power domain makes impossible 
to shut it down.

The timer and the cpu belong to the same power domain, hence it should 
be impossible to reach an idle state where it is shut down.

The consequence is the tick_broadcast_enter *can't* fail by using this 
approach and we don't have to handle a corner case.

There are boards where some idle states are powering down the console 
because the power controller has larger power domain including cpu and 
console or i2c or dma. In order to handle this, there is some specific 
code in the cpuidle driver to check if the bus is in use.

IMO, we have exactly the same constraint here with the timer and we are 
handling it in another different way.

> So this seems to be about the timekeeping rather than power domains, because
> that's where the broadcast thing is done.

I disagree. It should be done via a common API (eg. pm_runtime_get/put).

> So the code setting up the CPU's
> timer for broadcast would pretty much need to pause cpuidle, go through the
> CPU's idle states and disable the TIMER_STOP ones.  And do the reverse when the
> timer is not going the be used for broadcast any more.  So question is whether
> or not this is actually really more straightforward than checking the return
> value of tick_broadcast_enter() in the idle loop after all.

I agree this is not straightforward and your changes are valid, even I 
think we can do something better by redesigning a bit.

But I guess I would have to provide a patchset for that ... :)

Thanks
   -- Daniel

-- 
  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-12 13:23                 ` Rafael J. Wysocki
  2015-05-12 18:04                   ` Daniel Lezcano
@ 2015-05-13 22:59                   ` Kevin Hilman
  2015-05-14  0:16                     ` Rafael J. Wysocki
  2015-05-14  3:59                     ` Preeti U Murthy
  1 sibling, 2 replies; 29+ messages in thread
From: Kevin Hilman @ 2015-05-13 22:59 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Daniel Lezcano, Preeti U Murthy, peterz, tglx, rafael.j.wysocki,
	rlippert, linux-pm, linus.walleij, linux-kernel, mingo,
	sudeep.holla, linuxppc-dev, Lina Iyer, Ulf Hansson

"Rafael J. Wysocki" <rjw@rjwysocki.net> writes:

[...]

> Second, quite honestly, I don't see a connection to genpd here.

The connection with genpd is because the *reason* the timer was
shutdown/stopped is because it shares power with the CPU, which is why
the timer stops when the CPU hits ceratin low power states.  IOW, it's
in the same power domain as the CPU.

> What you seem to be saying is "maybe we can eliminate the need to check the
> return value of tick_broadcast_enter() in the idle loop if we proactively
> disable the TIMER_STOP idle states of a CPU when we start to use that CPU's
> timer as a broadcast one".
>
> So this seems to be about the timekeeping rather than power domains, because
> that's where the broadcast thing is done.  So the code setting up the CPU's
> timer for broadcast would pretty much need to pause cpuidle, go through the
> CPU's idle states and disable the TIMER_STOP ones.  And do the reverse when the
> timer is not going the be used for broadcast any more.  

Or..., modify the timer subystem to use runtime PM on the timer devices,
create a genpd that includes the timer device, and use
pm_genpd_attach_cpuidle() to attach that genpd so that whenever that
timer is runtime PM active, the deeper C-states cannot be hit.

> So question is whether or not this is actually really more
> straightforward than checking the return value of
> tick_broadcast_enter() in the idle loop after all.

Unfortunetly this problem doesn't only affect timers.

Daniel's broader point is that $SUBJECT series only handles this for the
timer, but there's actually a more general problem to solve for *any*
device that shares a power domain with a CPU (e.g. CPU-local
timers, interrupt controllers, performance monitoring units, floating
point units, etc. etc.)

If we keep adding checks to the idle loop for all those devices, we're
heading for a mess.  (In fact, this is exactly what CPUidle drivers in
lots of vendor trees are doing, and it is indeed quite messy, and very
vendor specific.)

Also, solving this more general problem was the primary motivation for
adding the gnpd _attach_cpuidle() feature in the first place, so why not
use that?

Longer term, IMO, these dependencies between CPUs and all these "extras"
logic that share a power domain should be modeled by a genpd.  If all
those devices are using runtime PM, including the CPUs, and they are
grouped into a genpd, then we we can very easily know at the genpd level
whether or not the CPU could be powered down, and to what level.  This
longer-term solution is what I want to discuss at LPC this year in my
"Unifiy idle management of CPUs and IO devices" topic[1].  ( Also FYI,
using a genpd to model a CPU and connected logic is part of the
motivation behind the recent proposals to add support for multiple
states to genpd by Axel Haslam. )

Anyways I digress...

In the short term, while your patches look fine to me, the objection I
have is that it's only a band-aid fix that handles timers, but none of
the other "extras" that might share a power rail with the CPU.  So,
until we have the long-term stuff sorted out, the better
short-term solution IMO is the _attach_cpuidle() one above.

Kevin

[1] http://wiki.linuxplumbersconf.org/2015:energy-aware_scheduling

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-14  0:16                     ` Rafael J. Wysocki
@ 2015-05-14  0:13                       ` Kevin Hilman
  2015-05-14  0:42                         ` Rafael J. Wysocki
  0 siblings, 1 reply; 29+ messages in thread
From: Kevin Hilman @ 2015-05-14  0:13 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Kevin Hilman, Daniel Lezcano, Preeti U Murthy, Peter Zijlstra,
	Thomas Gleixner, Rafael J. Wysocki, rlippert, linux-pm,
	Linus Walleij, lkml, Ingo Molnar, Sudeep Holla, linuxppc-dev,
	Lina Iyer, Ulf Hansson

On Wed, May 13, 2015 at 5:16 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> On Wednesday, May 13, 2015 03:59:55 PM Kevin Hilman wrote:
>> "Rafael J. Wysocki" <rjw@rjwysocki.net> writes:
>>
>> [...]
>>
>> > Second, quite honestly, I don't see a connection to genpd here.
>>
>> The connection with genpd is because the *reason* the timer was
>> shutdown/stopped is because it shares power with the CPU, which is why
>> the timer stops when the CPU hits ceratin low power states.  IOW, it's
>> in the same power domain as the CPU.
>
> Well, what if you don't have genpd on that system?  Is the problem at hand not
> relevant then magically?

Well, if you're not using genpd to model hardware power domain
dependencies, then yes you'll definitely need a different solution.

And, as we discussed on IRC.  If you only care about timers, and genpd
is not in use, then $SUBJECT series is a fine approach, and I have no
objections.  But for SoCs where there are several other things that
share power with CPU, we need a more generic, genpd based solution,
which it seems we're in agreement on.  And since the two approaches
are not mutually exclusive, then I have real objections to applying
this series.

Acked-by: Kevin Hilman <khilman@linaro.org>

Kevin

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-13 22:59                   ` Kevin Hilman
@ 2015-05-14  0:16                     ` Rafael J. Wysocki
  2015-05-14  0:13                       ` Kevin Hilman
  2015-05-14  3:59                     ` Preeti U Murthy
  1 sibling, 1 reply; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-14  0:16 UTC (permalink / raw)
  To: Kevin Hilman
  Cc: Daniel Lezcano, Preeti U Murthy, peterz, tglx, rafael.j.wysocki,
	rlippert, linux-pm, linus.walleij, linux-kernel, mingo,
	sudeep.holla, linuxppc-dev, Lina Iyer, Ulf Hansson

On Wednesday, May 13, 2015 03:59:55 PM Kevin Hilman wrote:
> "Rafael J. Wysocki" <rjw@rjwysocki.net> writes:
> 
> [...]
> 
> > Second, quite honestly, I don't see a connection to genpd here.
> 
> The connection with genpd is because the *reason* the timer was
> shutdown/stopped is because it shares power with the CPU, which is why
> the timer stops when the CPU hits ceratin low power states.  IOW, it's
> in the same power domain as the CPU.

Well, what if you don't have genpd on that system?  Is the problem at hand not
relevant then magically?

Rafael


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-14  0:42                         ` Rafael J. Wysocki
@ 2015-05-14  0:31                           ` Kevin Hilman
  0 siblings, 0 replies; 29+ messages in thread
From: Kevin Hilman @ 2015-05-14  0:31 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Daniel Lezcano, Preeti U Murthy, Peter Zijlstra, Thomas Gleixner,
	Rafael J. Wysocki, rlippert, linux-pm, Linus Walleij, lkml,
	Ingo Molnar, Sudeep Holla, linuxppc-dev, Lina Iyer, Ulf Hansson

"Rafael J. Wysocki" <rjw@rjwysocki.net> writes:

> On Wednesday, May 13, 2015 05:13:27 PM Kevin Hilman wrote:
>> On Wed, May 13, 2015 at 5:16 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>> > On Wednesday, May 13, 2015 03:59:55 PM Kevin Hilman wrote:
>> >> "Rafael J. Wysocki" <rjw@rjwysocki.net> writes:
>> >>
>> >> [...]
>> >>
>> >> > Second, quite honestly, I don't see a connection to genpd here.
>> >>
>> >> The connection with genpd is because the *reason* the timer was
>> >> shutdown/stopped is because it shares power with the CPU, which is why
>> >> the timer stops when the CPU hits ceratin low power states.  IOW, it's
>> >> in the same power domain as the CPU.
>> >
>> > Well, what if you don't have genpd on that system?  Is the problem at hand not
>> > relevant then magically?
>> 
>> Well, if you're not using genpd to model hardware power domain
>> dependencies, then yes you'll definitely need a different solution.
>> 
>> And, as we discussed on IRC.  If you only care about timers, and genpd
>> is not in use, then $SUBJECT series is a fine approach, and I have no
>> objections.  But for SoCs where there are several other things that
>> share power with CPU, we need a more generic, genpd based solution,
>> which it seems we're in agreement on.  And since the two approaches
>> are not mutually exclusive, then I have real objections to applying
>> this series.
>
> I guess a "no" is missing in the last sentence. ;-)

Correct.  I have *no* real objections to applying this series.

Kevin


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-14  0:13                       ` Kevin Hilman
@ 2015-05-14  0:42                         ` Rafael J. Wysocki
  2015-05-14  0:31                           ` Kevin Hilman
  0 siblings, 1 reply; 29+ messages in thread
From: Rafael J. Wysocki @ 2015-05-14  0:42 UTC (permalink / raw)
  To: Kevin Hilman
  Cc: Daniel Lezcano, Preeti U Murthy, Peter Zijlstra, Thomas Gleixner,
	Rafael J. Wysocki, rlippert, linux-pm, Linus Walleij, lkml,
	Ingo Molnar, Sudeep Holla, linuxppc-dev, Lina Iyer, Ulf Hansson

On Wednesday, May 13, 2015 05:13:27 PM Kevin Hilman wrote:
> On Wed, May 13, 2015 at 5:16 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> > On Wednesday, May 13, 2015 03:59:55 PM Kevin Hilman wrote:
> >> "Rafael J. Wysocki" <rjw@rjwysocki.net> writes:
> >>
> >> [...]
> >>
> >> > Second, quite honestly, I don't see a connection to genpd here.
> >>
> >> The connection with genpd is because the *reason* the timer was
> >> shutdown/stopped is because it shares power with the CPU, which is why
> >> the timer stops when the CPU hits ceratin low power states.  IOW, it's
> >> in the same power domain as the CPU.
> >
> > Well, what if you don't have genpd on that system?  Is the problem at hand not
> > relevant then magically?
> 
> Well, if you're not using genpd to model hardware power domain
> dependencies, then yes you'll definitely need a different solution.
> 
> And, as we discussed on IRC.  If you only care about timers, and genpd
> is not in use, then $SUBJECT series is a fine approach, and I have no
> objections.  But for SoCs where there are several other things that
> share power with CPU, we need a more generic, genpd based solution,
> which it seems we're in agreement on.  And since the two approaches
> are not mutually exclusive, then I have real objections to applying
> this series.

I guess a "no" is missing in the last sentence. ;-)

> Acked-by: Kevin Hilman <khilman@linaro.org>

Thanks!

Rafael


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures
  2015-05-13 22:59                   ` Kevin Hilman
  2015-05-14  0:16                     ` Rafael J. Wysocki
@ 2015-05-14  3:59                     ` Preeti U Murthy
  1 sibling, 0 replies; 29+ messages in thread
From: Preeti U Murthy @ 2015-05-14  3:59 UTC (permalink / raw)
  To: Kevin Hilman, Rafael J. Wysocki
  Cc: rlippert, Ulf Hansson, rafael.j.wysocki, linux-pm, peterz,
	linus.walleij, Daniel Lezcano, linux-kernel, mingo, sudeep.holla,
	tglx, linuxppc-dev, Lina Iyer

On 05/14/2015 04:29 AM, Kevin Hilman wrote:
> "Rafael J. Wysocki" <rjw@rjwysocki.net> writes:
> 
> [...]
> 
>> Second, quite honestly, I don't see a connection to genpd here.
> 
> The connection with genpd is because the *reason* the timer was
> shutdown/stopped is because it shares power with the CPU, which is why
> the timer stops when the CPU hits ceratin low power states.  IOW, it's
> in the same power domain as the CPU.
> 
>> What you seem to be saying is "maybe we can eliminate the need to check the
>> return value of tick_broadcast_enter() in the idle loop if we proactively
>> disable the TIMER_STOP idle states of a CPU when we start to use that CPU's
>> timer as a broadcast one".
>>
>> So this seems to be about the timekeeping rather than power domains, because
>> that's where the broadcast thing is done.  So the code setting up the CPU's
>> timer for broadcast would pretty much need to pause cpuidle, go through the
>> CPU's idle states and disable the TIMER_STOP ones.  And do the reverse when the
>> timer is not going the be used for broadcast any more.  
> 
> Or..., modify the timer subystem to use runtime PM on the timer devices,
> create a genpd that includes the timer device, and use
> pm_genpd_attach_cpuidle() to attach that genpd so that whenever that
> timer is runtime PM active, the deeper C-states cannot be hit.

I think you are missing a point here. If such a solution were possible,
the tick broadcast framework would not have been designed to support
deep cpu idle states. One reason we cannot go this way of course, is not
all archs may support genpd as was pointed out. But the second reason
IMO is that a timer is runtime PM active as long as there is some
deferred work, either in the near or far future.

The point behind the broadcast framework is let these CPUs go to deeper
idle states when the timers are in the "far" future. We can potentially
save power by doing so and don't need to keep the entire power domain
active just because the timer is supposed to fire 5 minutes from now,
which is precisely what happens if we go the genpd way.

Hence I don't think we can trivially club timers with genpd unless we
have a way to power the timer PM domain down, depending on when it is
supposed to fire, in which case we will merely be replicating the
cpuidle governor code.

Regards
Preeti U Murthy

> 
>> So question is whether or not this is actually really more
>> straightforward than checking the return value of
>> tick_broadcast_enter() in the idle loop after all.
> 
> Unfortunetly this problem doesn't only affect timers.
> 
> Daniel's broader point is that $SUBJECT series only handles this for the
> timer, but there's actually a more general problem to solve for *any*
> device that shares a power domain with a CPU (e.g. CPU-local
> timers, interrupt controllers, performance monitoring units, floating
> point units, etc. etc.)
> 
> If we keep adding checks to the idle loop for all those devices, we're
> heading for a mess.  (In fact, this is exactly what CPUidle drivers in
> lots of vendor trees are doing, and it is indeed quite messy, and very
> vendor specific.)
> 
> Also, solving this more general problem was the primary motivation for
> adding the gnpd _attach_cpuidle() feature in the first place, so why not
> use that?
> 
> Longer term, IMO, these dependencies between CPUs and all these "extras"
> logic that share a power domain should be modeled by a genpd.  If all
> those devices are using runtime PM, including the CPUs, and they are
> grouped into a genpd, then we we can very easily know at the genpd level
> whether or not the CPU could be powered down, and to what level.  This
> longer-term solution is what I want to discuss at LPC this year in my
> "Unifiy idle management of CPUs and IO devices" topic[1].  ( Also FYI,
> using a genpd to model a CPU and connected logic is part of the
> motivation behind the recent proposals to add support for multiple
> states to genpd by Axel Haslam. )
> 
> Anyways I digress...
> 
> In the short term, while your patches look fine to me, the objection I
> have is that it's only a band-aid fix that handles timers, but none of
> the other "extras" that might share a power rail with the CPU.  So,
> until we have the long-term stuff sorted out, the better
> short-term solution IMO is the _attach_cpuidle() one above.
> 
> Kevin
> 
> [1] http://wiki.linuxplumbersconf.org/2015:energy-aware_scheduling
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 


^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2015-05-14  3:59 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-08  7:35 [PATCH V3] cpuidle: Handle tick_broadcast_enter() failure gracefully Preeti U Murthy
2015-05-08 12:43 ` Sudeep Holla
2015-05-08 14:18 ` Rafael J. Wysocki
2015-05-08 21:51   ` Rafael J. Wysocki
2015-05-09  5:49   ` Preeti U Murthy
2015-05-09 18:46     ` Rafael J. Wysocki
2015-05-09 18:48       ` Rafael J. Wysocki
2015-05-09 20:11     ` Rafael J. Wysocki
2015-05-09 20:33       ` Rafael J. Wysocki
2015-05-09 23:15         ` [PATCH 0/3] cpuidle: updates related to tick_broadcast_enter() failures Rafael J. Wysocki
2015-05-09 23:18           ` [PATCH 1/3] sched / idle: Call idle_set_state() from cpuidle_enter_state() Rafael J. Wysocki
2015-05-09 23:18           ` [PATCH 2/3] sched / idle: Call default_idle_call() " Rafael J. Wysocki
2015-05-09 23:19           ` [PATCH 3/3] cpuidle: Select a different state on tick_broadcast_enter() failures Rafael J. Wysocki
2015-05-11  3:48           ` [PATCH 0/3] cpuidle: updates related to " Preeti U Murthy
2015-05-11  5:21           ` Preeti U Murthy
2015-05-11 23:13             ` Rafael J. Wysocki
2015-05-11 15:13           ` Sudeep Holla
2015-05-11 23:14             ` Rafael J. Wysocki
2015-05-11 17:40           ` Daniel Lezcano
2015-05-11 23:31             ` Rafael J. Wysocki
2015-05-12  8:41               ` Daniel Lezcano
2015-05-12 13:23                 ` Rafael J. Wysocki
2015-05-12 18:04                   ` Daniel Lezcano
2015-05-13 22:59                   ` Kevin Hilman
2015-05-14  0:16                     ` Rafael J. Wysocki
2015-05-14  0:13                       ` Kevin Hilman
2015-05-14  0:42                         ` Rafael J. Wysocki
2015-05-14  0:31                           ` Kevin Hilman
2015-05-14  3:59                     ` Preeti U Murthy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).