[v2] cpuidle: Use nanoseconds as the unit of time
diff mbox series

Message ID 3269796.AzLOQfDnpo@kreacher
State New, archived
Headers show
Series
  • [v2] cpuidle: Use nanoseconds as the unit of time
Related show

Commit Message

Rafael J. Wysocki Nov. 7, 2019, 2:25 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Currently, the cpuidle subsystem uses microseconds as the unit of
time which (among other things) causes the idle loop to incur some
integer division overhead for no clear benefit.

In order to allow cpuidle to measure time in nanoseconds, add two
additional fields, exit_latency_ns and target_residency_ns, to
represent the exit latency and target residency of an idle state
in nanoseconds, respectively, to struct cpuidle_state_usage and
initialize them with the help of the corresponding values in
microseconds provided by drivers.  In addition to that, change
cpuidle_governor_latency_req() to return the idle state exit
latency constraint in nanoseconds.

With that, meeasure idle state residency (last_residency_ns in
struct cpuidle_device and time_ns in struct cpuidle_driver) in
nanoseconds and update the cpuidle core and governors accordingly.

However, the menu governor still computes typical intervals in
microseconds to avoid integer overflows.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

-> v2:
 - Fix the direction of the comparison in cpuidle_governor_latency_req() (was
   "less than", should be "greater than").
 - Use predicted_ns and TICK_NSEC instead of predicted_us and TICK_USEC,
   respectively, in one leftover location in menu_select().
 - Use target_residency_ns and exit_latency_ns when reading the
   "residency" and "latency" sysfs attributes, respectively (in case the
   values in us are not set by the driver).

Changes since RFC:
 - Rework the predicted_us and predicted_ns computation in the
   menu governor (Peter).
 - Rebase on top of the current linux-next with
   https://patchwork.kernel.org/patch/11225501/ applied.

---
 drivers/cpuidle/cpuidle.c            |   36 +++++------
 drivers/cpuidle/driver.c             |   29 ++++++--
 drivers/cpuidle/governor.c           |    7 +-
 drivers/cpuidle/governors/haltpoll.c |    7 --
 drivers/cpuidle/governors/ladder.c   |   25 ++++---
 drivers/cpuidle/governors/menu.c     |  113 +++++++++++++++++------------------
 drivers/cpuidle/governors/teo.c      |   76 +++++++++++------------
 drivers/cpuidle/sysfs.c              |   20 +++++-
 include/linux/cpuidle.h              |    8 +-
 kernel/sched/idle.c                  |    2 
 10 files changed, 172 insertions(+), 151 deletions(-)

Comments

Rafael J. Wysocki Nov. 8, 2019, 1:44 a.m. UTC | #1
On Thu, Nov 7, 2019 at 3:25 PM Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> Currently, the cpuidle subsystem uses microseconds as the unit of
> time which (among other things) causes the idle loop to incur some
> integer division overhead for no clear benefit.
>
> In order to allow cpuidle to measure time in nanoseconds, add two
> additional fields, exit_latency_ns and target_residency_ns, to
> represent the exit latency and target residency of an idle state
> in nanoseconds, respectively, to struct cpuidle_state_usage and
> initialize them with the help of the corresponding values in
> microseconds provided by drivers.  In addition to that, change
> cpuidle_governor_latency_req() to return the idle state exit
> latency constraint in nanoseconds.
>
> With that, meeasure idle state residency (last_residency_ns in
> struct cpuidle_device and time_ns in struct cpuidle_driver) in
> nanoseconds and update the cpuidle core and governors accordingly.
>
> However, the menu governor still computes typical intervals in
> microseconds to avoid integer overflows.

Since this addresses all of the comments received by the RFC version
that was posted over a month ago and I don't see any more issues with
it, I'm tempted to simply queue it up for 5.5 unless somebody sees a
good enough reason why that would be a bad idea.

> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>
> -> v2:
>  - Fix the direction of the comparison in cpuidle_governor_latency_req() (was
>    "less than", should be "greater than").
>  - Use predicted_ns and TICK_NSEC instead of predicted_us and TICK_USEC,
>    respectively, in one leftover location in menu_select().
>  - Use target_residency_ns and exit_latency_ns when reading the
>    "residency" and "latency" sysfs attributes, respectively (in case the
>    values in us are not set by the driver).
>
> Changes since RFC:
>  - Rework the predicted_us and predicted_ns computation in the
>    menu governor (Peter).
>  - Rebase on top of the current linux-next with
>    https://patchwork.kernel.org/patch/11225501/ applied.
>
> ---
>  drivers/cpuidle/cpuidle.c            |   36 +++++------
>  drivers/cpuidle/driver.c             |   29 ++++++--
>  drivers/cpuidle/governor.c           |    7 +-
>  drivers/cpuidle/governors/haltpoll.c |    7 --
>  drivers/cpuidle/governors/ladder.c   |   25 ++++---
>  drivers/cpuidle/governors/menu.c     |  113 +++++++++++++++++------------------
>  drivers/cpuidle/governors/teo.c      |   76 +++++++++++------------
>  drivers/cpuidle/sysfs.c              |   20 +++++-
>  include/linux/cpuidle.h              |    8 +-
>  kernel/sched/idle.c                  |    2
>  10 files changed, 172 insertions(+), 151 deletions(-)
>
> Index: linux-pm/include/linux/cpuidle.h
> ===================================================================
> --- linux-pm.orig/include/linux/cpuidle.h
> +++ linux-pm/include/linux/cpuidle.h
> @@ -35,7 +35,7 @@ struct cpuidle_driver;
>  struct cpuidle_state_usage {
>         unsigned long long      disable;
>         unsigned long long      usage;
> -       unsigned long long      time; /* in US */
> +       u64                     time_ns;
>         unsigned long long      above; /* Number of times it's been too deep */
>         unsigned long long      below; /* Number of times it's been too shallow */
>  #ifdef CONFIG_SUSPEND
> @@ -48,6 +48,8 @@ struct cpuidle_state {
>         char            name[CPUIDLE_NAME_LEN];
>         char            desc[CPUIDLE_DESC_LEN];
>
> +       u64             exit_latency_ns;
> +       u64             target_residency_ns;
>         unsigned int    flags;
>         unsigned int    exit_latency; /* in US */
>         int             power_usage; /* in mW */
> @@ -89,7 +91,7 @@ struct cpuidle_device {
>         ktime_t                 next_hrtimer;
>
>         int                     last_state_idx;
> -       int                     last_residency;
> +       u64                     last_residency_ns;
>         u64                     poll_limit_ns;
>         struct cpuidle_state_usage      states_usage[CPUIDLE_STATE_MAX];
>         struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
> @@ -263,7 +265,7 @@ struct cpuidle_governor {
>
>  #ifdef CONFIG_CPU_IDLE
>  extern int cpuidle_register_governor(struct cpuidle_governor *gov);
> -extern int cpuidle_governor_latency_req(unsigned int cpu);
> +extern s64 cpuidle_governor_latency_req(unsigned int cpu);
>  #else
>  static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
>  {return 0;}
> Index: linux-pm/drivers/cpuidle/driver.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/driver.c
> +++ linux-pm/drivers/cpuidle/driver.c
> @@ -165,16 +165,27 @@ static void __cpuidle_driver_init(struct
>         if (!drv->cpumask)
>                 drv->cpumask = (struct cpumask *)cpu_possible_mask;
>
> -       /*
> -        * Look for the timer stop flag in the different states, so that we know
> -        * if the broadcast timer has to be set up.  The loop is in the reverse
> -        * order, because usually one of the deeper states have this flag set.
> -        */
> -       for (i = drv->state_count - 1; i >= 0 ; i--) {
> -               if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) {
> +       for (i = 0; i < drv->state_count; i++) {
> +               struct cpuidle_state *s = &drv->states[i];
> +
> +               /*
> +                * Look for the timer stop flag in the different states and if
> +                * it is found, indicate that the broadcast timer has to be set
> +                * up.
> +                */
> +               if (s->flags & CPUIDLE_FLAG_TIMER_STOP)
>                         drv->bctimer = 1;
> -                       break;
> -               }
> +
> +               /*
> +                * The core will use the target residency and exit latency
> +                * values in nanoseconds, but allow drivers to provide them in
> +                * microseconds too.
> +                */
> +               if (s->target_residency > 0)
> +                       s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
> +
> +               if (s->exit_latency > 0)
> +                       s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
>         }
>  }
>
> Index: linux-pm/drivers/cpuidle/cpuidle.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/cpuidle.c
> +++ linux-pm/drivers/cpuidle/cpuidle.c
> @@ -75,24 +75,24 @@ int cpuidle_play_dead(void)
>
>  static int find_deepest_state(struct cpuidle_driver *drv,
>                               struct cpuidle_device *dev,
> -                             unsigned int max_latency,
> +                             u64 max_latency_ns,
>                               unsigned int forbidden_flags,
>                               bool s2idle)
>  {
> -       unsigned int latency_req = 0;
> +       u64 latency_req = 0;
>         int i, ret = 0;
>
>         for (i = 1; i < drv->state_count; i++) {
>                 struct cpuidle_state *s = &drv->states[i];
>
>                 if (dev->states_usage[i].disable ||
> -                   s->exit_latency <= latency_req ||
> -                   s->exit_latency > max_latency ||
> +                   s->exit_latency_ns <= latency_req ||
> +                   s->exit_latency_ns > max_latency_ns ||
>                     (s->flags & forbidden_flags) ||
>                     (s2idle && !s->enter_s2idle))
>                         continue;
>
> -               latency_req = s->exit_latency;
> +               latency_req = s->exit_latency_ns;
>                 ret = i;
>         }
>         return ret;
> @@ -124,7 +124,7 @@ void cpuidle_use_deepest_state(bool enab
>  int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
>                                struct cpuidle_device *dev)
>  {
> -       return find_deepest_state(drv, dev, UINT_MAX, 0, false);
> +       return find_deepest_state(drv, dev, U64_MAX, 0, false);
>  }
>
>  #ifdef CONFIG_SUSPEND
> @@ -180,7 +180,7 @@ int cpuidle_enter_s2idle(struct cpuidle_
>          * that interrupts won't be enabled when it exits and allows the tick to
>          * be frozen safely.
>          */
> -       index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
> +       index = find_deepest_state(drv, dev, U64_MAX, 0, true);
>         if (index > 0)
>                 enter_s2idle_proper(drv, dev, index);
>
> @@ -209,7 +209,7 @@ int cpuidle_enter_state(struct cpuidle_d
>          * CPU as a broadcast timer, this call may fail if it is not available.
>          */
>         if (broadcast && tick_broadcast_enter()) {
> -               index = find_deepest_state(drv, dev, target_state->exit_latency,
> +               index = find_deepest_state(drv, dev, target_state->exit_latency_ns,
>                                            CPUIDLE_FLAG_TIMER_STOP, false);
>                 if (index < 0) {
>                         default_idle_call();
> @@ -247,7 +247,7 @@ int cpuidle_enter_state(struct cpuidle_d
>                 local_irq_enable();
>
>         if (entered_state >= 0) {
> -               s64 diff, delay = drv->states[entered_state].exit_latency;
> +               s64 diff, delay = drv->states[entered_state].exit_latency_ns;
>                 int i;
>
>                 /*
> @@ -255,15 +255,13 @@ int cpuidle_enter_state(struct cpuidle_d
>                  * This can be moved to within driver enter routine,
>                  * but that results in multiple copies of same code.
>                  */
> -               diff = ktime_us_delta(time_end, time_start);
> -               if (diff > INT_MAX)
> -                       diff = INT_MAX;
> +               diff = ktime_sub(time_end, time_start);
>
> -               dev->last_residency = (int)diff;
> -               dev->states_usage[entered_state].time += dev->last_residency;
> +               dev->last_residency_ns = diff;
> +               dev->states_usage[entered_state].time_ns += diff;
>                 dev->states_usage[entered_state].usage++;
>
> -               if (diff < drv->states[entered_state].target_residency) {
> +               if (diff < drv->states[entered_state].target_residency_ns) {
>                         for (i = entered_state - 1; i >= 0; i--) {
>                                 if (dev->states_usage[i].disable)
>                                         continue;
> @@ -281,14 +279,14 @@ int cpuidle_enter_state(struct cpuidle_d
>                                  * Update if a deeper state would have been a
>                                  * better match for the observed idle duration.
>                                  */
> -                               if (diff - delay >= drv->states[i].target_residency)
> +                               if (diff - delay >= drv->states[i].target_residency_ns)
>                                         dev->states_usage[entered_state].below++;
>
>                                 break;
>                         }
>                 }
>         } else {
> -               dev->last_residency = 0;
> +               dev->last_residency_ns = 0;
>         }
>
>         return entered_state;
> @@ -381,7 +379,7 @@ u64 cpuidle_poll_time(struct cpuidle_dri
>                 if (dev->states_usage[i].disable)
>                         continue;
>
> -               limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
> +               limit_ns = (u64)drv->states[i].target_residency_ns;
>         }
>
>         dev->poll_limit_ns = limit_ns;
> @@ -552,7 +550,7 @@ static void __cpuidle_unregister_device(
>  static void __cpuidle_device_init(struct cpuidle_device *dev)
>  {
>         memset(dev->states_usage, 0, sizeof(dev->states_usage));
> -       dev->last_residency = 0;
> +       dev->last_residency_ns = 0;
>         dev->next_hrtimer = 0;
>  }
>
> Index: linux-pm/drivers/cpuidle/sysfs.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/sysfs.c
> +++ linux-pm/drivers/cpuidle/sysfs.c
> @@ -273,16 +273,30 @@ static ssize_t show_state_##_name(struct
>         return sprintf(buf, "%s\n", state->_name);\
>  }
>
> -define_show_state_function(exit_latency)
> -define_show_state_function(target_residency)
> +#define define_show_state_time_function(_name) \
> +static ssize_t show_state_##_name(struct cpuidle_state *state, \
> +                                 struct cpuidle_state_usage *state_usage, \
> +                                 char *buf) \
> +{ \
> +       return sprintf(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \
> +}
> +
> +define_show_state_time_function(exit_latency)
> +define_show_state_time_function(target_residency)
>  define_show_state_function(power_usage)
>  define_show_state_ull_function(usage)
> -define_show_state_ull_function(time)
>  define_show_state_str_function(name)
>  define_show_state_str_function(desc)
>  define_show_state_ull_function(above)
>  define_show_state_ull_function(below)
>
> +static ssize_t show_state_time(struct cpuidle_state *state,
> +                              struct cpuidle_state_usage *state_usage,
> +                              char *buf)
> +{
> +       return sprintf(buf, "%llu\n", ktime_to_us(state_usage->time_ns));
> +}
> +
>  static ssize_t show_state_disable(struct cpuidle_state *state,
>                                   struct cpuidle_state_usage *state_usage,
>                                   char *buf)
> Index: linux-pm/drivers/cpuidle/governor.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/governor.c
> +++ linux-pm/drivers/cpuidle/governor.c
> @@ -107,11 +107,14 @@ int cpuidle_register_governor(struct cpu
>   * cpuidle_governor_latency_req - Compute a latency constraint for CPU
>   * @cpu: Target CPU
>   */
> -int cpuidle_governor_latency_req(unsigned int cpu)
> +s64 cpuidle_governor_latency_req(unsigned int cpu)
>  {
>         int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
>         struct device *device = get_cpu_device(cpu);
>         int device_req = dev_pm_qos_raw_resume_latency(device);
>
> -       return device_req < global_req ? device_req : global_req;
> +       if (device_req > global_req)
> +               device_req = global_req;
> +
> +       return (s64)device_req * NSEC_PER_USEC;
>  }
> Index: linux-pm/drivers/cpuidle/governors/ladder.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/governors/ladder.c
> +++ linux-pm/drivers/cpuidle/governors/ladder.c
> @@ -27,8 +27,8 @@ struct ladder_device_state {
>         struct {
>                 u32 promotion_count;
>                 u32 demotion_count;
> -               u32 promotion_time;
> -               u32 demotion_time;
> +               u64 promotion_time_ns;
> +               u64 demotion_time_ns;
>         } threshold;
>         struct {
>                 int promotion_count;
> @@ -68,9 +68,10 @@ static int ladder_select_state(struct cp
>  {
>         struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
>         struct ladder_device_state *last_state;
> -       int last_residency, last_idx = dev->last_state_idx;
> +       int last_idx = dev->last_state_idx;
>         int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
> -       int latency_req = cpuidle_governor_latency_req(dev->cpu);
> +       s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
> +       s64 last_residency;
>
>         /* Special case when user has set very strict latency requirement */
>         if (unlikely(latency_req == 0)) {
> @@ -80,13 +81,13 @@ static int ladder_select_state(struct cp
>
>         last_state = &ldev->states[last_idx];
>
> -       last_residency = dev->last_residency - drv->states[last_idx].exit_latency;
> +       last_residency = dev->last_residency_ns - drv->states[last_idx].exit_latency_ns;
>
>         /* consider promotion */
>         if (last_idx < drv->state_count - 1 &&
>             !dev->states_usage[last_idx + 1].disable &&
> -           last_residency > last_state->threshold.promotion_time &&
> -           drv->states[last_idx + 1].exit_latency <= latency_req) {
> +           last_residency > last_state->threshold.promotion_time_ns &&
> +           drv->states[last_idx + 1].exit_latency_ns <= latency_req) {
>                 last_state->stats.promotion_count++;
>                 last_state->stats.demotion_count = 0;
>                 if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
> @@ -98,11 +99,11 @@ static int ladder_select_state(struct cp
>         /* consider demotion */
>         if (last_idx > first_idx &&
>             (dev->states_usage[last_idx].disable ||
> -           drv->states[last_idx].exit_latency > latency_req)) {
> +           drv->states[last_idx].exit_latency_ns > latency_req)) {
>                 int i;
>
>                 for (i = last_idx - 1; i > first_idx; i--) {
> -                       if (drv->states[i].exit_latency <= latency_req)
> +                       if (drv->states[i].exit_latency_ns <= latency_req)
>                                 break;
>                 }
>                 ladder_do_selection(dev, ldev, last_idx, i);
> @@ -110,7 +111,7 @@ static int ladder_select_state(struct cp
>         }
>
>         if (last_idx > first_idx &&
> -           last_residency < last_state->threshold.demotion_time) {
> +           last_residency < last_state->threshold.demotion_time_ns) {
>                 last_state->stats.demotion_count++;
>                 last_state->stats.promotion_count = 0;
>                 if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
> @@ -150,9 +151,9 @@ static int ladder_enable_device(struct c
>                 lstate->threshold.demotion_count = DEMOTION_COUNT;
>
>                 if (i < drv->state_count - 1)
> -                       lstate->threshold.promotion_time = state->exit_latency;
> +                       lstate->threshold.promotion_time_ns = state->exit_latency_ns;
>                 if (i > first_idx)
> -                       lstate->threshold.demotion_time = state->exit_latency;
> +                       lstate->threshold.demotion_time_ns = state->exit_latency_ns;
>         }
>
>         return 0;
> Index: linux-pm/drivers/cpuidle/governors/menu.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/governors/menu.c
> +++ linux-pm/drivers/cpuidle/governors/menu.c
> @@ -33,7 +33,7 @@
>  #define INTERVALS (1UL << INTERVAL_SHIFT)
>  #define RESOLUTION 1024
>  #define DECAY 8
> -#define MAX_INTERESTING 50000
> +#define MAX_INTERESTING (50000 * NSEC_PER_USEC)
>
>
>  /*
> @@ -120,14 +120,14 @@ struct menu_device {
>         int             needs_update;
>         int             tick_wakeup;
>
> -       unsigned int    next_timer_us;
> +       u64             next_timer_ns;
>         unsigned int    bucket;
>         unsigned int    correction_factor[BUCKETS];
>         unsigned int    intervals[INTERVALS];
>         int             interval_ptr;
>  };
>
> -static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
> +static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
>  {
>         int bucket = 0;
>
> @@ -140,15 +140,15 @@ static inline int which_bucket(unsigned
>         if (nr_iowaiters)
>                 bucket = BUCKETS/2;
>
> -       if (duration < 10)
> +       if (duration_ns < 10ULL * NSEC_PER_USEC)
>                 return bucket;
> -       if (duration < 100)
> +       if (duration_ns < 100ULL * NSEC_PER_USEC)
>                 return bucket + 1;
> -       if (duration < 1000)
> +       if (duration_ns < 1000ULL * NSEC_PER_USEC)
>                 return bucket + 2;
> -       if (duration < 10000)
> +       if (duration_ns < 10000ULL * NSEC_PER_USEC)
>                 return bucket + 3;
> -       if (duration < 100000)
> +       if (duration_ns < 100000ULL * NSEC_PER_USEC)
>                 return bucket + 4;
>         return bucket + 5;
>  }
> @@ -276,13 +276,13 @@ static int menu_select(struct cpuidle_dr
>                        bool *stop_tick)
>  {
>         struct menu_device *data = this_cpu_ptr(&menu_devices);
> -       int latency_req = cpuidle_governor_latency_req(dev->cpu);
> -       int i;
> -       int idx;
> -       unsigned int interactivity_req;
> +       s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
>         unsigned int predicted_us;
> +       u64 predicted_ns;
> +       u64 interactivity_req;
>         unsigned long nr_iowaiters;
>         ktime_t delta_next;
> +       int i, idx;
>
>         if (data->needs_update) {
>                 menu_update(drv, dev);
> @@ -290,14 +290,14 @@ static int menu_select(struct cpuidle_dr
>         }
>
>         /* determine the expected residency time, round up */
> -       data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
> +       data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next);
>
>         nr_iowaiters = nr_iowait_cpu(dev->cpu);
> -       data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
> +       data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
>
>         if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
> -           ((data->next_timer_us < drv->states[1].target_residency ||
> -             latency_req < drv->states[1].exit_latency) &&
> +           ((data->next_timer_ns < drv->states[1].target_residency_ns ||
> +             latency_req < drv->states[1].exit_latency_ns) &&
>              !dev->states_usage[0].disable)) {
>                 /*
>                  * In this case state[0] will be used no matter what, so return
> @@ -308,18 +308,15 @@ static int menu_select(struct cpuidle_dr
>                 return 0;
>         }
>
> -       /*
> -        * Force the result of multiplication to be 64 bits even if both
> -        * operands are 32 bits.
> -        * Make sure to round up for half microseconds.
> -        */
> -       predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
> -                                        data->correction_factor[data->bucket],
> -                                        RESOLUTION * DECAY);
> -       /*
> -        * Use the lowest expected idle interval to pick the idle state.
> -        */
> -       predicted_us = min(predicted_us, get_typical_interval(data, predicted_us));
> +       /* Round up the result for half microseconds. */
> +       predicted_us = div_u64(data->next_timer_ns *
> +                              data->correction_factor[data->bucket] +
> +                              (RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
> +                              RESOLUTION * DECAY * NSEC_PER_USEC);
> +       /* Use the lowest expected idle interval to pick the idle state. */
> +       predicted_ns = (u64)min(predicted_us,
> +                               get_typical_interval(data, predicted_us)) *
> +                               NSEC_PER_USEC;
>
>         if (tick_nohz_tick_stopped()) {
>                 /*
> @@ -330,14 +327,15 @@ static int menu_select(struct cpuidle_dr
>                  * the known time till the closest timer event for the idle
>                  * state selection.
>                  */
> -               if (predicted_us < TICK_USEC)
> -                       predicted_us = ktime_to_us(delta_next);
> +               if (predicted_ns < TICK_NSEC)
> +                       predicted_ns = delta_next;
>         } else {
>                 /*
>                  * Use the performance multiplier and the user-configurable
>                  * latency_req to determine the maximum exit latency.
>                  */
> -               interactivity_req = predicted_us / performance_multiplier(nr_iowaiters);
> +               interactivity_req = div64_u64(predicted_ns,
> +                                             performance_multiplier(nr_iowaiters));
>                 if (latency_req > interactivity_req)
>                         latency_req = interactivity_req;
>         }
> @@ -356,19 +354,19 @@ static int menu_select(struct cpuidle_dr
>                 if (idx == -1)
>                         idx = i; /* first enabled state */
>
> -               if (s->target_residency > predicted_us) {
> +               if (s->target_residency_ns > predicted_ns) {
>                         /*
>                          * Use a physical idle state, not busy polling, unless
>                          * a timer is going to trigger soon enough.
>                          */
>                         if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
> -                           s->exit_latency <= latency_req &&
> -                           s->target_residency <= data->next_timer_us) {
> -                               predicted_us = s->target_residency;
> +                           s->exit_latency_ns <= latency_req &&
> +                           s->target_residency_ns <= data->next_timer_ns) {
> +                               predicted_ns = s->target_residency_ns;
>                                 idx = i;
>                                 break;
>                         }
> -                       if (predicted_us < TICK_USEC)
> +                       if (predicted_ns < TICK_NSEC)
>                                 break;
>
>                         if (!tick_nohz_tick_stopped()) {
> @@ -378,7 +376,7 @@ static int menu_select(struct cpuidle_dr
>                                  * tick in that case and let the governor run
>                                  * again in the next iteration of the loop.
>                                  */
> -                               predicted_us = drv->states[idx].target_residency;
> +                               predicted_ns = drv->states[idx].target_residency_ns;
>                                 break;
>                         }
>
> @@ -388,13 +386,13 @@ static int menu_select(struct cpuidle_dr
>                          * closest timer event, select this one to avoid getting
>                          * stuck in the shallow one for too long.
>                          */
> -                       if (drv->states[idx].target_residency < TICK_USEC &&
> -                           s->target_residency <= ktime_to_us(delta_next))
> +                       if (drv->states[idx].target_residency_ns < TICK_NSEC &&
> +                           s->target_residency_ns <= delta_next)
>                                 idx = i;
>
>                         return idx;
>                 }
> -               if (s->exit_latency > latency_req)
> +               if (s->exit_latency_ns > latency_req)
>                         break;
>
>                 idx = i;
> @@ -408,12 +406,10 @@ static int menu_select(struct cpuidle_dr
>          * expected idle duration is shorter than the tick period length.
>          */
>         if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
> -            predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
> -               unsigned int delta_next_us = ktime_to_us(delta_next);
> -
> +            predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
>                 *stop_tick = false;
>
> -               if (idx > 0 && drv->states[idx].target_residency > delta_next_us) {
> +               if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) {
>                         /*
>                          * The tick is not going to be stopped and the target
>                          * residency of the state to be returned is not within
> @@ -425,7 +421,7 @@ static int menu_select(struct cpuidle_dr
>                                         continue;
>
>                                 idx = i;
> -                               if (drv->states[i].target_residency <= delta_next_us)
> +                               if (drv->states[i].target_residency_ns <= delta_next)
>                                         break;
>                         }
>                 }
> @@ -461,7 +457,7 @@ static void menu_update(struct cpuidle_d
>         struct menu_device *data = this_cpu_ptr(&menu_devices);
>         int last_idx = dev->last_state_idx;
>         struct cpuidle_state *target = &drv->states[last_idx];
> -       unsigned int measured_us;
> +       u64 measured_ns;
>         unsigned int new_factor;
>
>         /*
> @@ -479,7 +475,7 @@ static void menu_update(struct cpuidle_d
>          * assume the state was never reached and the exit latency is 0.
>          */
>
> -       if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
> +       if (data->tick_wakeup && data->next_timer_ns > TICK_NSEC) {
>                 /*
>                  * The nohz code said that there wouldn't be any events within
>                  * the tick boundary (if the tick was stopped), but the idle
> @@ -489,7 +485,7 @@ static void menu_update(struct cpuidle_d
>                  * have been idle long (but not forever) to help the idle
>                  * duration predictor do a better job next time.
>                  */
> -               measured_us = 9 * MAX_INTERESTING / 10;
> +               measured_ns = 9 * MAX_INTERESTING / 10;
>         } else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
>                    dev->poll_time_limit) {
>                 /*
> @@ -499,28 +495,29 @@ static void menu_update(struct cpuidle_d
>                  * the CPU might have been woken up from idle by the next timer.
>                  * Assume that to be the case.
>                  */
> -               measured_us = data->next_timer_us;
> +               measured_ns = data->next_timer_ns;
>         } else {
>                 /* measured value */
> -               measured_us = dev->last_residency;
> +               measured_ns = dev->last_residency_ns;
>
>                 /* Deduct exit latency */
> -               if (measured_us > 2 * target->exit_latency)
> -                       measured_us -= target->exit_latency;
> +               if (measured_ns > 2 * target->exit_latency_ns)
> +                       measured_ns -= target->exit_latency_ns;
>                 else
> -                       measured_us /= 2;
> +                       measured_ns /= 2;
>         }
>
>         /* Make sure our coefficients do not exceed unity */
> -       if (measured_us > data->next_timer_us)
> -               measured_us = data->next_timer_us;
> +       if (measured_ns > data->next_timer_ns)
> +               measured_ns = data->next_timer_ns;
>
>         /* Update our correction ratio */
>         new_factor = data->correction_factor[data->bucket];
>         new_factor -= new_factor / DECAY;
>
> -       if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
> -               new_factor += RESOLUTION * measured_us / data->next_timer_us;
> +       if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
> +               new_factor += RESOLUTION * div64_u64(measured_ns,
> +                                                    data->next_timer_ns);
>         else
>                 /*
>                  * we were idle so long that we count it as a perfect
> @@ -540,7 +537,7 @@ static void menu_update(struct cpuidle_d
>         data->correction_factor[data->bucket] = new_factor;
>
>         /* update the repeating-pattern data */
> -       data->intervals[data->interval_ptr++] = measured_us;
> +       data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns);
>         if (data->interval_ptr >= INTERVALS)
>                 data->interval_ptr = 0;
>  }
> Index: linux-pm/drivers/cpuidle/governors/teo.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/governors/teo.c
> +++ linux-pm/drivers/cpuidle/governors/teo.c
> @@ -104,7 +104,7 @@ struct teo_cpu {
>         u64 sleep_length_ns;
>         struct teo_idle_state states[CPUIDLE_STATE_MAX];
>         int interval_idx;
> -       unsigned int intervals[INTERVALS];
> +       u64 intervals[INTERVALS];
>  };
>
>  static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
> @@ -117,9 +117,8 @@ static DEFINE_PER_CPU(struct teo_cpu, te
>  static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
>  {
>         struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
> -       unsigned int sleep_length_us = ktime_to_us(cpu_data->sleep_length_ns);
>         int i, idx_hit = -1, idx_timer = -1;
> -       unsigned int measured_us;
> +       u64 measured_ns;
>
>         if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
>                 /*
> @@ -127,23 +126,21 @@ static void teo_update(struct cpuidle_dr
>                  * enough to the closest timer event expected at the idle state
>                  * selection time to be discarded.
>                  */
> -               measured_us = UINT_MAX;
> +               measured_ns = U64_MAX;
>         } else {
> -               unsigned int lat;
> +               u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
>
> -               lat = drv->states[dev->last_state_idx].exit_latency;
> -
> -               measured_us = ktime_to_us(cpu_data->time_span_ns);
> +               measured_ns = cpu_data->time_span_ns;
>                 /*
>                  * The delay between the wakeup and the first instruction
>                  * executed by the CPU is not likely to be worst-case every
>                  * time, so take 1/2 of the exit latency as a very rough
>                  * approximation of the average of it.
>                  */
> -               if (measured_us >= lat)
> -                       measured_us -= lat / 2;
> +               if (measured_ns >= lat_ns)
> +                       measured_ns -= lat_ns / 2;
>                 else
> -                       measured_us /= 2;
> +                       measured_ns /= 2;
>         }
>
>         /*
> @@ -155,9 +152,9 @@ static void teo_update(struct cpuidle_dr
>
>                 cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
>
> -               if (drv->states[i].target_residency <= sleep_length_us) {
> +               if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) {
>                         idx_timer = i;
> -                       if (drv->states[i].target_residency <= measured_us)
> +                       if (drv->states[i].target_residency_ns <= measured_ns)
>                                 idx_hit = i;
>                 }
>         }
> @@ -193,7 +190,7 @@ static void teo_update(struct cpuidle_dr
>          * Save idle duration values corresponding to non-timer wakeups for
>          * pattern detection.
>          */
> -       cpu_data->intervals[cpu_data->interval_idx++] = measured_us;
> +       cpu_data->intervals[cpu_data->interval_idx++] = measured_ns;
>         if (cpu_data->interval_idx > INTERVALS)
>                 cpu_data->interval_idx = 0;
>  }
> @@ -203,11 +200,11 @@ static void teo_update(struct cpuidle_dr
>   * @drv: cpuidle driver containing state data.
>   * @dev: Target CPU.
>   * @state_idx: Index of the capping idle state.
> - * @duration_us: Idle duration value to match.
> + * @duration_ns: Idle duration value to match.
>   */
>  static int teo_find_shallower_state(struct cpuidle_driver *drv,
>                                     struct cpuidle_device *dev, int state_idx,
> -                                   unsigned int duration_us)
> +                                   u64 duration_ns)
>  {
>         int i;
>
> @@ -216,7 +213,7 @@ static int teo_find_shallower_state(stru
>                         continue;
>
>                 state_idx = i;
> -               if (drv->states[i].target_residency <= duration_us)
> +               if (drv->states[i].target_residency_ns <= duration_ns)
>                         break;
>         }
>         return state_idx;
> @@ -232,8 +229,9 @@ static int teo_select(struct cpuidle_dri
>                       bool *stop_tick)
>  {
>         struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
> -       int latency_req = cpuidle_governor_latency_req(dev->cpu);
> -       unsigned int duration_us, hits, misses, early_hits;
> +       s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
> +       u64 duration_ns;
> +       unsigned int hits, misses, early_hits;
>         int max_early_idx, constraint_idx, idx, i;
>         ktime_t delta_tick;
>
> @@ -244,8 +242,8 @@ static int teo_select(struct cpuidle_dri
>
>         cpu_data->time_span_ns = local_clock();
>
> -       cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick);
> -       duration_us = ktime_to_us(cpu_data->sleep_length_ns);
> +       duration_ns = tick_nohz_get_sleep_length(&delta_tick);
> +       cpu_data->sleep_length_ns = duration_ns;
>
>         hits = 0;
>         misses = 0;
> @@ -262,7 +260,7 @@ static int teo_select(struct cpuidle_dri
>                          * Ignore disabled states with target residencies beyond
>                          * the anticipated idle duration.
>                          */
> -                       if (s->target_residency > duration_us)
> +                       if (s->target_residency_ns > duration_ns)
>                                 continue;
>
>                         /*
> @@ -301,7 +299,7 @@ static int teo_select(struct cpuidle_dri
>                          * shallow for that role.
>                          */
>                         if (!(tick_nohz_tick_stopped() &&
> -                             drv->states[idx].target_residency < TICK_USEC)) {
> +                             drv->states[idx].target_residency_ns < TICK_NSEC)) {
>                                 early_hits = cpu_data->states[i].early_hits;
>                                 max_early_idx = idx;
>                         }
> @@ -315,10 +313,10 @@ static int teo_select(struct cpuidle_dri
>                         misses = cpu_data->states[i].misses;
>                 }
>
> -               if (s->target_residency > duration_us)
> +               if (s->target_residency_ns > duration_ns)
>                         break;
>
> -               if (s->exit_latency > latency_req && constraint_idx > i)
> +               if (s->exit_latency_ns > latency_req && constraint_idx > i)
>                         constraint_idx = i;
>
>                 idx = i;
> @@ -327,7 +325,7 @@ static int teo_select(struct cpuidle_dri
>
>                 if (early_hits < cpu_data->states[i].early_hits &&
>                     !(tick_nohz_tick_stopped() &&
> -                     drv->states[i].target_residency < TICK_USEC)) {
> +                     drv->states[i].target_residency_ns < TICK_NSEC)) {
>                         early_hits = cpu_data->states[i].early_hits;
>                         max_early_idx = i;
>                 }
> @@ -343,7 +341,7 @@ static int teo_select(struct cpuidle_dri
>          */
>         if (hits <= misses && max_early_idx >= 0) {
>                 idx = max_early_idx;
> -               duration_us = drv->states[idx].target_residency;
> +               duration_ns = drv->states[idx].target_residency_ns;
>         }
>
>         /*
> @@ -364,9 +362,9 @@ static int teo_select(struct cpuidle_dri
>                  * the current expected idle duration value.
>                  */
>                 for (i = 0; i < INTERVALS; i++) {
> -                       unsigned int val = cpu_data->intervals[i];
> +                       u64 val = cpu_data->intervals[i];
>
> -                       if (val >= duration_us)
> +                       if (val >= duration_ns)
>                                 continue;
>
>                         count++;
> @@ -378,17 +376,17 @@ static int teo_select(struct cpuidle_dri
>                  * values are in the interesting range.
>                  */
>                 if (count > INTERVALS / 2) {
> -                       unsigned int avg_us = div64_u64(sum, count);
> +                       u64 avg_ns = div64_u64(sum, count);
>
>                         /*
>                          * Avoid spending too much time in an idle state that
>                          * would be too shallow.
>                          */
> -                       if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) {
> -                               duration_us = avg_us;
> -                               if (drv->states[idx].target_residency > avg_us)
> +                       if (!(tick_nohz_tick_stopped() && avg_ns < TICK_NSEC)) {
> +                               duration_ns = avg_ns;
> +                               if (drv->states[idx].target_residency_ns > avg_ns)
>                                         idx = teo_find_shallower_state(drv, dev,
> -                                                                      idx, avg_us);
> +                                                                      idx, avg_ns);
>                         }
>                 }
>         }
> @@ -398,9 +396,7 @@ static int teo_select(struct cpuidle_dri
>          * expected idle duration is shorter than the tick period length.
>          */
>         if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
> -           duration_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
> -               unsigned int delta_tick_us = ktime_to_us(delta_tick);
> -
> +           duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
>                 *stop_tick = false;
>
>                 /*
> @@ -409,8 +405,8 @@ static int teo_select(struct cpuidle_dri
>                  * till the closest timer including the tick, try to correct
>                  * that.
>                  */
> -               if (idx > 0 && drv->states[idx].target_residency > delta_tick_us)
> -                       idx = teo_find_shallower_state(drv, dev, idx, delta_tick_us);
> +               if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick)
> +                       idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
>         }
>
>         return idx;
> @@ -454,7 +450,7 @@ static int teo_enable_device(struct cpui
>         memset(cpu_data, 0, sizeof(*cpu_data));
>
>         for (i = 0; i < INTERVALS; i++)
> -               cpu_data->intervals[i] = UINT_MAX;
> +               cpu_data->intervals[i] = U64_MAX;
>
>         return 0;
>  }
> Index: linux-pm/drivers/cpuidle/governors/haltpoll.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/governors/haltpoll.c
> +++ linux-pm/drivers/cpuidle/governors/haltpoll.c
> @@ -49,7 +49,7 @@ static int haltpoll_select(struct cpuidl
>                            struct cpuidle_device *dev,
>                            bool *stop_tick)
>  {
> -       int latency_req = cpuidle_governor_latency_req(dev->cpu);
> +       s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
>
>         if (!drv->state_count || latency_req == 0) {
>                 *stop_tick = false;
> @@ -75,10 +75,9 @@ static int haltpoll_select(struct cpuidl
>         return 0;
>  }
>
> -static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us)
> +static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
>  {
>         unsigned int val;
> -       u64 block_ns = block_us*NSEC_PER_USEC;
>
>         /* Grow cpu_halt_poll_us if
>          * cpu_halt_poll_us < block_ns < guest_halt_poll_us
> @@ -115,7 +114,7 @@ static void haltpoll_reflect(struct cpui
>         dev->last_state_idx = index;
>
>         if (index != 0)
> -               adjust_poll_limit(dev, dev->last_residency);
> +               adjust_poll_limit(dev, dev->last_residency_ns);
>  }
>
>  /**
> Index: linux-pm/kernel/sched/idle.c
> ===================================================================
> --- linux-pm.orig/kernel/sched/idle.c
> +++ linux-pm/kernel/sched/idle.c
> @@ -104,7 +104,7 @@ static int call_cpuidle(struct cpuidle_d
>          * update no idle residency and return.
>          */
>         if (current_clr_polling_and_test()) {
> -               dev->last_residency = 0;
> +               dev->last_residency_ns = 0;
>                 local_irq_enable();
>                 return -EBUSY;
>         }
>
>
>
Doug Smythies Nov. 8, 2019, 8:45 a.m. UTC | #2
On 2019.11.07 17:44 Rafael J. Wysocki wrote:
> On Thu, Nov 7, 2019 at 3:25 PM Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>>
>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>
>> Currently, the cpuidle subsystem uses microseconds as the unit of
>> time which (among other things) causes the idle loop to incur some
>> integer division overhead for no clear benefit.
>>
>> In order to allow cpuidle to measure time in nanoseconds, add two
>> additional fields, exit_latency_ns and target_residency_ns, to
>> represent the exit latency and target residency of an idle state
>> in nanoseconds, respectively, to struct cpuidle_state_usage and
>> initialize them with the help of the corresponding values in
>> microseconds provided by drivers.  In addition to that, change
>> cpuidle_governor_latency_req() to return the idle state exit
>> latency constraint in nanoseconds.
>>
>> With that, meeasure idle state residency (last_residency_ns in
>> struct cpuidle_device and time_ns in struct cpuidle_driver) in
>> nanoseconds and update the cpuidle core and governors accordingly.
>>
>> However, the menu governor still computes typical intervals in
>> microseconds to avoid integer overflows.
>
> Since this addresses all of the comments received by the RFC version
> that was posted over a month ago and I don't see any more issues with
> it, I'm tempted to simply queue it up for 5.5 unless somebody sees a
> good enough reason why that would be a bad idea.

Could I please have another day or two?

I did try the RFC version, but not much as I went off on those
teo issues and backtracked pretty quickly.

I have been running this v2 today, with both menu and teo
governors. Acquiring some baseline reference data to compare
to now. The menu governor response seems different (Supporting
information/graphs will come later).
teo just started.

I lost a bunch of time due to being somewhat linux-next challenged.

... Doug
Peter Zijlstra Nov. 8, 2019, 9:39 a.m. UTC | #3
On Fri, Nov 08, 2019 at 02:44:13AM +0100, Rafael J. Wysocki wrote:
> On Thu, Nov 7, 2019 at 3:25 PM Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> >
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > Currently, the cpuidle subsystem uses microseconds as the unit of
> > time which (among other things) causes the idle loop to incur some
> > integer division overhead for no clear benefit.
> >
> > In order to allow cpuidle to measure time in nanoseconds, add two
> > additional fields, exit_latency_ns and target_residency_ns, to
> > represent the exit latency and target residency of an idle state
> > in nanoseconds, respectively, to struct cpuidle_state_usage and
> > initialize them with the help of the corresponding values in
> > microseconds provided by drivers.  In addition to that, change
> > cpuidle_governor_latency_req() to return the idle state exit
> > latency constraint in nanoseconds.
> >
> > With that, meeasure idle state residency (last_residency_ns in
> > struct cpuidle_device and time_ns in struct cpuidle_driver) in
> > nanoseconds and update the cpuidle core and governors accordingly.
> >
> > However, the menu governor still computes typical intervals in
> > microseconds to avoid integer overflows.
> 
> Since this addresses all of the comments received by the RFC version
> that was posted over a month ago and I don't see any more issues with
> it, I'm tempted to simply queue it up for 5.5 unless somebody sees a
> good enough reason why that would be a bad idea.

Nah, have at.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Rafael J. Wysocki Nov. 8, 2019, 9:45 a.m. UTC | #4
On Fri, Nov 8, 2019 at 9:45 AM Doug Smythies <dsmythies@telus.net> wrote:
>
> On 2019.11.07 17:44 Rafael J. Wysocki wrote:
> > On Thu, Nov 7, 2019 at 3:25 PM Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> >>
> >> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>
> >> Currently, the cpuidle subsystem uses microseconds as the unit of
> >> time which (among other things) causes the idle loop to incur some
> >> integer division overhead for no clear benefit.
> >>
> >> In order to allow cpuidle to measure time in nanoseconds, add two
> >> additional fields, exit_latency_ns and target_residency_ns, to
> >> represent the exit latency and target residency of an idle state
> >> in nanoseconds, respectively, to struct cpuidle_state_usage and
> >> initialize them with the help of the corresponding values in
> >> microseconds provided by drivers.  In addition to that, change
> >> cpuidle_governor_latency_req() to return the idle state exit
> >> latency constraint in nanoseconds.
> >>
> >> With that, meeasure idle state residency (last_residency_ns in
> >> struct cpuidle_device and time_ns in struct cpuidle_driver) in
> >> nanoseconds and update the cpuidle core and governors accordingly.
> >>
> >> However, the menu governor still computes typical intervals in
> >> microseconds to avoid integer overflows.
> >
> > Since this addresses all of the comments received by the RFC version
> > that was posted over a month ago and I don't see any more issues with
> > it, I'm tempted to simply queue it up for 5.5 unless somebody sees a
> > good enough reason why that would be a bad idea.
>
> Could I please have another day or two?

Sure, it won't go straight into linux-next anyway. :-)

> I did try the RFC version, but not much as I went off on those
> teo issues and backtracked pretty quickly.
>
> I have been running this v2 today, with both menu and teo
> governors. Acquiring some baseline reference data to compare
> to now. The menu governor response seems different (Supporting
> information/graphs will come later).

That may be good or bad, depending in what way it is different. :-)

> teo just started.
>
> I lost a bunch of time due to being somewhat linux-next challenged.

No worries, please take your time!

I very much appreciate the testing work you are doing.

Cheers!
Doug Smythies Nov. 8, 2019, 5:04 p.m. UTC | #5
On 2019.11.08 01:45 Rafael J. Wysocki wrote:
> On Fri, Nov 8, 2019 at 9:45 AM Doug Smythies <dsmythies@telus.net> wrote:
>
...
>> I have been running this v2 today, with both menu and teo
>> governors. Acquiring some baseline reference data to compare
>> to now. The menu governor response seems different (Supporting
>> information/graphs will come later).
>
> That may be good or bad, depending in what way it is different. :-)

My thinking was that the differences should be minimal between
the baseline (linux-next as of 2019.11.07) and plus your two patches.
Because this was a change of units, but not functionality.
Such is the case with the teo governor, but not the menu governor.
I have not tried the ladder or haltpoll governors, and don't intend to.

Now to attempt to isolate the issue in the code, which might take
considerable time.

The test in question was that sweep one developed during the
powernightmare stuff. It is a crude attempt to sweep through
a preference for all idle states. While not a great test, who cares
if it reveals something.

Reference:
http://www.smythies.com/~doug/linux/idle/nano-second-conversion/sweep/index.html

Legend:
teo-base : linux-next 2019.11.07
menu-base: linux-next 2019.11.07
teo-v2   : linux-next 2019.11.07 + cpuidle: Consolidate disabled state checks + this v2
menu-v2  : linux-next 2019.11.07 + cpuidle: Consolidate disabled state checks + this v2

Note: I only tossed the graphs there, without a way to navigate to the location
from the parent web pages.

... Doug
Rafael J. Wysocki Nov. 10, 2019, 4:48 p.m. UTC | #6
On Fri, Nov 8, 2019 at 6:04 PM Doug Smythies <dsmythies@telus.net> wrote:
>
> On 2019.11.08 01:45 Rafael J. Wysocki wrote:
> > On Fri, Nov 8, 2019 at 9:45 AM Doug Smythies <dsmythies@telus.net> wrote:
> >
> ...
> >> I have been running this v2 today, with both menu and teo
> >> governors. Acquiring some baseline reference data to compare
> >> to now. The menu governor response seems different (Supporting
> >> information/graphs will come later).
> >
> > That may be good or bad, depending in what way it is different. :-)
>
> My thinking was that the differences should be minimal between
> the baseline (linux-next as of 2019.11.07) and plus your two patches.
> Because this was a change of units, but not functionality.
> Such is the case with the teo governor, but not the menu governor.
> I have not tried the ladder or haltpoll governors, and don't intend to.
>
> Now to attempt to isolate the issue in the code, which might take
> considerable time.

Thanks!

It looks like I have overlooked a unit conversion in menu or done a
unit conversion twice somewhere.
Rafael J. Wysocki Nov. 10, 2019, 5:24 p.m. UTC | #7
On Sunday, November 10, 2019 5:48:21 PM CET Rafael J. Wysocki wrote:
> On Fri, Nov 8, 2019 at 6:04 PM Doug Smythies <dsmythies@telus.net> wrote:
> >
> > On 2019.11.08 01:45 Rafael J. Wysocki wrote:
> > > On Fri, Nov 8, 2019 at 9:45 AM Doug Smythies <dsmythies@telus.net> wrote:
> > >
> > ...
> > >> I have been running this v2 today, with both menu and teo
> > >> governors. Acquiring some baseline reference data to compare
> > >> to now. The menu governor response seems different (Supporting
> > >> information/graphs will come later).
> > >
> > > That may be good or bad, depending in what way it is different. :-)
> >
> > My thinking was that the differences should be minimal between
> > the baseline (linux-next as of 2019.11.07) and plus your two patches.
> > Because this was a change of units, but not functionality.
> > Such is the case with the teo governor, but not the menu governor.
> > I have not tried the ladder or haltpoll governors, and don't intend to.
> >
> > Now to attempt to isolate the issue in the code, which might take
> > considerable time.
> 
> Thanks!
> 
> It looks like I have overlooked a unit conversion in menu or done a
> unit conversion twice somewhere.

I have found a bug, which should be addressed by the patch below.

If it still doesn't reduce the discrepancy, we'll need to look further.

---
 drivers/cpuidle/governors/menu.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux-pm/drivers/cpuidle/governors/menu.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/governors/menu.c
+++ linux-pm/drivers/cpuidle/governors/menu.c
@@ -516,8 +516,8 @@ static void menu_update(struct cpuidle_d
 	new_factor -= new_factor / DECAY;
 
 	if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
-		new_factor += RESOLUTION * div64_u64(measured_ns,
-						     data->next_timer_ns);
+		new_factor += div64_u64(RESOLUTION * measured_ns,
+					data->next_timer_ns);
 	else
 		/*
 		 * we were idle so long that we count it as a perfect
Doug Smythies Nov. 10, 2019, 6:09 p.m. UTC | #8
On 2019.11.10 09:24 Rafael J. Wysocki wrote:
> On Sunday, November 10, 2019 5:48:21 PM CET Rafael J. Wysocki wrote:
>> On Fri, Nov 8, 2019 at 6:04 PM Doug Smythies <dsmythies@telus.net> wrote:
>>>
>>> On 2019.11.08 01:45 Rafael J. Wysocki wrote:
>>>> On Fri, Nov 8, 2019 at 9:45 AM Doug Smythies <dsmythies@telus.net> wrote:
>>>>
>>> ...
>>>>> I have been running this v2 today, with both menu and teo
>>>>> governors. Acquiring some baseline reference data to compare
>>>>> to now. The menu governor response seems different (Supporting
>>>>> information/graphs will come later).
>>>>
>>>> That may be good or bad, depending in what way it is different. :-)
>>>
>>> My thinking was that the differences should be minimal between
>>> the baseline (linux-next as of 2019.11.07) and plus your two patches.
>>> Because this was a change of units, but not functionality.
>>> Such is the case with the teo governor, but not the menu governor.
>>> I have not tried the ladder or haltpoll governors, and don't intend to.
>>>
>>> Now to attempt to isolate the issue in the code, which might take
>>> considerable time.
>> 
>> Thanks!
>> 
>> It looks like I have overlooked a unit conversion in menu or done a
>> unit conversion twice somewhere.
>
> I have found a bug, which should be addressed by the patch below.
>
> If it still doesn't reduce the discrepancy, we'll need to look further.
>
> ---
> drivers/cpuidle/governors/menu.c |    4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> Index: linux-pm/drivers/cpuidle/governors/menu.c
> ===================================================================
> --- linux-pm.orig/drivers/cpuidle/governors/menu.c
> +++ linux-pm/drivers/cpuidle/governors/menu.c
> @@ -516,8 +516,8 @@ static void menu_update(struct cpuidle_d
> 	new_factor -= new_factor / DECAY;
> 
> 	if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
> -		new_factor += RESOLUTION * div64_u64(measured_ns,
> -						     data->next_timer_ns);
> +		new_factor += div64_u64(RESOLUTION * measured_ns,
> +					data->next_timer_ns);
> 	else
> 		/*
> 		 * we were idle so long that we count it as a perfect

Yes, that was the exact bit of code I focused on yesterday.
However, my attempt to fix was different, and made no difference,
with the new graph being exactly on top of the old bad one.
I had defined new_factor as u64 and RESOLUTION as ULL.
Note: I didn't care about the most efficient code, in these attempts
to just get to the answer.

I'll try yours and report back.

... Doug
Doug Smythies Nov. 10, 2019, 10:12 p.m. UTC | #9
On 2019.11.10 10:10 Doug Smythies wrote:
> On 2019.11.10 09:24 Rafael J. Wysocki wrote:
>> On Sunday, November 10, 2019 5:48:21 PM CET Rafael J. Wysocki wrote:
>>
>> I have found a bug, which should be addressed by the patch below.
>>
>> If it still doesn't reduce the discrepancy, we'll need to look further.
>>
>> ---
>> drivers/cpuidle/governors/menu.c |    4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> Index: linux-pm/drivers/cpuidle/governors/menu.c
>> ===================================================================
>> --- linux-pm.orig/drivers/cpuidle/governors/menu.c
>> +++ linux-pm/drivers/cpuidle/governors/menu.c
>> @@ -516,8 +516,8 @@ static void menu_update(struct cpuidle_d
>> 	new_factor -= new_factor / DECAY;
>> 
>> 	if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
>> -		new_factor += RESOLUTION * div64_u64(measured_ns,
>> -						     data->next_timer_ns);
>> +		new_factor += div64_u64(RESOLUTION * measured_ns,
>> +					data->next_timer_ns);
>> 	else
>> 		/*
>> 		 * we were idle so long that we count it as a perfect
>
> Yes, that was the exact bit of code I focused on yesterday.
> However, my attempt to fix was different, and made no difference,
> with the new graph being exactly on top of the old bad one.
> I had defined new_factor as u64 and RESOLUTION as ULL.

Your patch does fix the problem.
I now also understand why my attempt did not.

New data added to previous graphs. For those that don't
want to go to the graphs, the nanosecond menu graphs are now
almost identical to the microsecond based one.

http://www.smythies.com/~doug/linux/idle/nano-second-conversion/sweep/index.html

Legend:
teo-base : linux-next 2019.11.07
menu-base: linux-next 2019.11.07
teo-v2   : linux-next 2019.11.07 + cpuidle: Consolidate disabled state checks + this v2
menu-v2  : linux-next 2019.11.07 + cpuidle: Consolidate disabled state checks + this v2
rjw1     : menu-v2 + above patch.

Acked-by and tested-by Doug Smythies <dsmythies@telus.net>

Disclaimer: Only teo and menu, not ladder or haltpoll governors.

Additional suggestions:

Header comments:

> microseconds provided by drivers.  In addition to that, change
> cpuidle_governor_latency_req() to return the idle state exit
> latency constraint in nanoseconds.

Suggest:

microseconds provided by drivers.  Additionally, change
cpuidle_governor_latency_req() to return the idle state exit
latency constraint in nanoseconds.

> With that, meeasure idle state residency (last_residency_ns in
             ^^^^^^^^
Suggest:

Also measure idle state residency (last_residency_ns in

Code:
Suggest deletion of this note:

/*
 * Please note when changing the tuning values:
 * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
 * a scaling operation multiplication may overflow on 32 bit platforms.
 * In that case, #define RESOLUTION as ULL to get 64 bit result:
 * #define RESOLUTION 1024ULL
 *
 * The default values do not overflow.
 */

Because you have managed the extra bit requirements as part of the patch.

... Doug
Rafael J. Wysocki Nov. 11, 2019, 9:29 p.m. UTC | #10
On Sun, Nov 10, 2019 at 11:12 PM Doug Smythies <dsmythies@telus.net> wrote:
>
> On 2019.11.10 10:10 Doug Smythies wrote:
> > On 2019.11.10 09:24 Rafael J. Wysocki wrote:
> >> On Sunday, November 10, 2019 5:48:21 PM CET Rafael J. Wysocki wrote:
> >>
> >> I have found a bug, which should be addressed by the patch below.
> >>
> >> If it still doesn't reduce the discrepancy, we'll need to look further.
> >>
> >> ---
> >> drivers/cpuidle/governors/menu.c |    4 ++--
> >> 1 file changed, 2 insertions(+), 2 deletions(-)
> >>
> >> Index: linux-pm/drivers/cpuidle/governors/menu.c
> >> ===================================================================
> >> --- linux-pm.orig/drivers/cpuidle/governors/menu.c
> >> +++ linux-pm/drivers/cpuidle/governors/menu.c
> >> @@ -516,8 +516,8 @@ static void menu_update(struct cpuidle_d
> >>      new_factor -= new_factor / DECAY;
> >>
> >>      if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
> >> -            new_factor += RESOLUTION * div64_u64(measured_ns,
> >> -                                                 data->next_timer_ns);
> >> +            new_factor += div64_u64(RESOLUTION * measured_ns,
> >> +                                    data->next_timer_ns);
> >>      else
> >>              /*
> >>               * we were idle so long that we count it as a perfect
> >
> > Yes, that was the exact bit of code I focused on yesterday.
> > However, my attempt to fix was different, and made no difference,
> > with the new graph being exactly on top of the old bad one.
> > I had defined new_factor as u64 and RESOLUTION as ULL.
>
> Your patch does fix the problem.
> I now also understand why my attempt did not.
>
> New data added to previous graphs. For those that don't
> want to go to the graphs, the nanosecond menu graphs are now
> almost identical to the microsecond based one.
>
> http://www.smythies.com/~doug/linux/idle/nano-second-conversion/sweep/index.html
>
> Legend:
> teo-base : linux-next 2019.11.07
> menu-base: linux-next 2019.11.07
> teo-v2   : linux-next 2019.11.07 + cpuidle: Consolidate disabled state checks + this v2
> menu-v2  : linux-next 2019.11.07 + cpuidle: Consolidate disabled state checks + this v2
> rjw1     : menu-v2 + above patch.
>
> Acked-by and tested-by Doug Smythies <dsmythies@telus.net>

Thanks a lot!

> Disclaimer: Only teo and menu, not ladder or haltpoll governors.
>
> Additional suggestions:
>
> Header comments:
>
> > microseconds provided by drivers.  In addition to that, change
> > cpuidle_governor_latency_req() to return the idle state exit
> > latency constraint in nanoseconds.
>
> Suggest:
>
> microseconds provided by drivers.  Additionally, change
> cpuidle_governor_latency_req() to return the idle state exit
> latency constraint in nanoseconds.
>
> > With that, meeasure idle state residency (last_residency_ns in
>              ^^^^^^^^
> Suggest:
>
> Also measure idle state residency (last_residency_ns in
>
> Code:
> Suggest deletion of this note:
>
> /*
>  * Please note when changing the tuning values:
>  * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
>  * a scaling operation multiplication may overflow on 32 bit platforms.
>  * In that case, #define RESOLUTION as ULL to get 64 bit result:
>  * #define RESOLUTION 1024ULL
>  *
>  * The default values do not overflow.
>  */
>
> Because you have managed the extra bit requirements as part of the patch.

Good suggestion! :-)

I have folded the fix and the removal of the comment as suggested
above into to v2 and applied the resulting patch with tags from you
and Peter.

Also the changelog has been updated as suggested.

Thanks!

Patch
diff mbox series

Index: linux-pm/include/linux/cpuidle.h
===================================================================
--- linux-pm.orig/include/linux/cpuidle.h
+++ linux-pm/include/linux/cpuidle.h
@@ -35,7 +35,7 @@  struct cpuidle_driver;
 struct cpuidle_state_usage {
 	unsigned long long	disable;
 	unsigned long long	usage;
-	unsigned long long	time; /* in US */
+	u64			time_ns;
 	unsigned long long	above; /* Number of times it's been too deep */
 	unsigned long long	below; /* Number of times it's been too shallow */
 #ifdef CONFIG_SUSPEND
@@ -48,6 +48,8 @@  struct cpuidle_state {
 	char		name[CPUIDLE_NAME_LEN];
 	char		desc[CPUIDLE_DESC_LEN];
 
+	u64		exit_latency_ns;
+	u64		target_residency_ns;
 	unsigned int	flags;
 	unsigned int	exit_latency; /* in US */
 	int		power_usage; /* in mW */
@@ -89,7 +91,7 @@  struct cpuidle_device {
 	ktime_t			next_hrtimer;
 
 	int			last_state_idx;
-	int			last_residency;
+	u64			last_residency_ns;
 	u64			poll_limit_ns;
 	struct cpuidle_state_usage	states_usage[CPUIDLE_STATE_MAX];
 	struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
@@ -263,7 +265,7 @@  struct cpuidle_governor {
 
 #ifdef CONFIG_CPU_IDLE
 extern int cpuidle_register_governor(struct cpuidle_governor *gov);
-extern int cpuidle_governor_latency_req(unsigned int cpu);
+extern s64 cpuidle_governor_latency_req(unsigned int cpu);
 #else
 static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 {return 0;}
Index: linux-pm/drivers/cpuidle/driver.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/driver.c
+++ linux-pm/drivers/cpuidle/driver.c
@@ -165,16 +165,27 @@  static void __cpuidle_driver_init(struct
 	if (!drv->cpumask)
 		drv->cpumask = (struct cpumask *)cpu_possible_mask;
 
-	/*
-	 * Look for the timer stop flag in the different states, so that we know
-	 * if the broadcast timer has to be set up.  The loop is in the reverse
-	 * order, because usually one of the deeper states have this flag set.
-	 */
-	for (i = drv->state_count - 1; i >= 0 ; i--) {
-		if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) {
+	for (i = 0; i < drv->state_count; i++) {
+		struct cpuidle_state *s = &drv->states[i];
+
+		/*
+		 * Look for the timer stop flag in the different states and if
+		 * it is found, indicate that the broadcast timer has to be set
+		 * up.
+		 */
+		if (s->flags & CPUIDLE_FLAG_TIMER_STOP)
 			drv->bctimer = 1;
-			break;
-		}
+
+		/*
+		 * The core will use the target residency and exit latency
+		 * values in nanoseconds, but allow drivers to provide them in
+		 * microseconds too.
+		 */
+		if (s->target_residency > 0)
+			s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
+
+		if (s->exit_latency > 0)
+			s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
 	}
 }
 
Index: linux-pm/drivers/cpuidle/cpuidle.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/cpuidle.c
+++ linux-pm/drivers/cpuidle/cpuidle.c
@@ -75,24 +75,24 @@  int cpuidle_play_dead(void)
 
 static int find_deepest_state(struct cpuidle_driver *drv,
 			      struct cpuidle_device *dev,
-			      unsigned int max_latency,
+			      u64 max_latency_ns,
 			      unsigned int forbidden_flags,
 			      bool s2idle)
 {
-	unsigned int latency_req = 0;
+	u64 latency_req = 0;
 	int i, ret = 0;
 
 	for (i = 1; i < drv->state_count; i++) {
 		struct cpuidle_state *s = &drv->states[i];
 
 		if (dev->states_usage[i].disable ||
-		    s->exit_latency <= latency_req ||
-		    s->exit_latency > max_latency ||
+		    s->exit_latency_ns <= latency_req ||
+		    s->exit_latency_ns > max_latency_ns ||
 		    (s->flags & forbidden_flags) ||
 		    (s2idle && !s->enter_s2idle))
 			continue;
 
-		latency_req = s->exit_latency;
+		latency_req = s->exit_latency_ns;
 		ret = i;
 	}
 	return ret;
@@ -124,7 +124,7 @@  void cpuidle_use_deepest_state(bool enab
 int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 			       struct cpuidle_device *dev)
 {
-	return find_deepest_state(drv, dev, UINT_MAX, 0, false);
+	return find_deepest_state(drv, dev, U64_MAX, 0, false);
 }
 
 #ifdef CONFIG_SUSPEND
@@ -180,7 +180,7 @@  int cpuidle_enter_s2idle(struct cpuidle_
 	 * that interrupts won't be enabled when it exits and allows the tick to
 	 * be frozen safely.
 	 */
-	index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
+	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
 	if (index > 0)
 		enter_s2idle_proper(drv, dev, index);
 
@@ -209,7 +209,7 @@  int cpuidle_enter_state(struct cpuidle_d
 	 * CPU as a broadcast timer, this call may fail if it is not available.
 	 */
 	if (broadcast && tick_broadcast_enter()) {
-		index = find_deepest_state(drv, dev, target_state->exit_latency,
+		index = find_deepest_state(drv, dev, target_state->exit_latency_ns,
 					   CPUIDLE_FLAG_TIMER_STOP, false);
 		if (index < 0) {
 			default_idle_call();
@@ -247,7 +247,7 @@  int cpuidle_enter_state(struct cpuidle_d
 		local_irq_enable();
 
 	if (entered_state >= 0) {
-		s64 diff, delay = drv->states[entered_state].exit_latency;
+		s64 diff, delay = drv->states[entered_state].exit_latency_ns;
 		int i;
 
 		/*
@@ -255,15 +255,13 @@  int cpuidle_enter_state(struct cpuidle_d
 		 * This can be moved to within driver enter routine,
 		 * but that results in multiple copies of same code.
 		 */
-		diff = ktime_us_delta(time_end, time_start);
-		if (diff > INT_MAX)
-			diff = INT_MAX;
+		diff = ktime_sub(time_end, time_start);
 
-		dev->last_residency = (int)diff;
-		dev->states_usage[entered_state].time += dev->last_residency;
+		dev->last_residency_ns = diff;
+		dev->states_usage[entered_state].time_ns += diff;
 		dev->states_usage[entered_state].usage++;
 
-		if (diff < drv->states[entered_state].target_residency) {
+		if (diff < drv->states[entered_state].target_residency_ns) {
 			for (i = entered_state - 1; i >= 0; i--) {
 				if (dev->states_usage[i].disable)
 					continue;
@@ -281,14 +279,14 @@  int cpuidle_enter_state(struct cpuidle_d
 				 * Update if a deeper state would have been a
 				 * better match for the observed idle duration.
 				 */
-				if (diff - delay >= drv->states[i].target_residency)
+				if (diff - delay >= drv->states[i].target_residency_ns)
 					dev->states_usage[entered_state].below++;
 
 				break;
 			}
 		}
 	} else {
-		dev->last_residency = 0;
+		dev->last_residency_ns = 0;
 	}
 
 	return entered_state;
@@ -381,7 +379,7 @@  u64 cpuidle_poll_time(struct cpuidle_dri
 		if (dev->states_usage[i].disable)
 			continue;
 
-		limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
+		limit_ns = (u64)drv->states[i].target_residency_ns;
 	}
 
 	dev->poll_limit_ns = limit_ns;
@@ -552,7 +550,7 @@  static void __cpuidle_unregister_device(
 static void __cpuidle_device_init(struct cpuidle_device *dev)
 {
 	memset(dev->states_usage, 0, sizeof(dev->states_usage));
-	dev->last_residency = 0;
+	dev->last_residency_ns = 0;
 	dev->next_hrtimer = 0;
 }
 
Index: linux-pm/drivers/cpuidle/sysfs.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/sysfs.c
+++ linux-pm/drivers/cpuidle/sysfs.c
@@ -273,16 +273,30 @@  static ssize_t show_state_##_name(struct
 	return sprintf(buf, "%s\n", state->_name);\
 }
 
-define_show_state_function(exit_latency)
-define_show_state_function(target_residency)
+#define define_show_state_time_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, \
+				  struct cpuidle_state_usage *state_usage, \
+				  char *buf) \
+{ \
+	return sprintf(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \
+}
+
+define_show_state_time_function(exit_latency)
+define_show_state_time_function(target_residency)
 define_show_state_function(power_usage)
 define_show_state_ull_function(usage)
-define_show_state_ull_function(time)
 define_show_state_str_function(name)
 define_show_state_str_function(desc)
 define_show_state_ull_function(above)
 define_show_state_ull_function(below)
 
+static ssize_t show_state_time(struct cpuidle_state *state,
+			       struct cpuidle_state_usage *state_usage,
+			       char *buf)
+{
+	return sprintf(buf, "%llu\n", ktime_to_us(state_usage->time_ns));
+}
+
 static ssize_t show_state_disable(struct cpuidle_state *state,
 				  struct cpuidle_state_usage *state_usage,
 				  char *buf)
Index: linux-pm/drivers/cpuidle/governor.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/governor.c
+++ linux-pm/drivers/cpuidle/governor.c
@@ -107,11 +107,14 @@  int cpuidle_register_governor(struct cpu
  * cpuidle_governor_latency_req - Compute a latency constraint for CPU
  * @cpu: Target CPU
  */
-int cpuidle_governor_latency_req(unsigned int cpu)
+s64 cpuidle_governor_latency_req(unsigned int cpu)
 {
 	int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	struct device *device = get_cpu_device(cpu);
 	int device_req = dev_pm_qos_raw_resume_latency(device);
 
-	return device_req < global_req ? device_req : global_req;
+	if (device_req > global_req)
+		device_req = global_req;
+
+	return (s64)device_req * NSEC_PER_USEC;
 }
Index: linux-pm/drivers/cpuidle/governors/ladder.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/governors/ladder.c
+++ linux-pm/drivers/cpuidle/governors/ladder.c
@@ -27,8 +27,8 @@  struct ladder_device_state {
 	struct {
 		u32 promotion_count;
 		u32 demotion_count;
-		u32 promotion_time;
-		u32 demotion_time;
+		u64 promotion_time_ns;
+		u64 demotion_time_ns;
 	} threshold;
 	struct {
 		int promotion_count;
@@ -68,9 +68,10 @@  static int ladder_select_state(struct cp
 {
 	struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
 	struct ladder_device_state *last_state;
-	int last_residency, last_idx = dev->last_state_idx;
+	int last_idx = dev->last_state_idx;
 	int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
-	int latency_req = cpuidle_governor_latency_req(dev->cpu);
+	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+	s64 last_residency;
 
 	/* Special case when user has set very strict latency requirement */
 	if (unlikely(latency_req == 0)) {
@@ -80,13 +81,13 @@  static int ladder_select_state(struct cp
 
 	last_state = &ldev->states[last_idx];
 
-	last_residency = dev->last_residency - drv->states[last_idx].exit_latency;
+	last_residency = dev->last_residency_ns - drv->states[last_idx].exit_latency_ns;
 
 	/* consider promotion */
 	if (last_idx < drv->state_count - 1 &&
 	    !dev->states_usage[last_idx + 1].disable &&
-	    last_residency > last_state->threshold.promotion_time &&
-	    drv->states[last_idx + 1].exit_latency <= latency_req) {
+	    last_residency > last_state->threshold.promotion_time_ns &&
+	    drv->states[last_idx + 1].exit_latency_ns <= latency_req) {
 		last_state->stats.promotion_count++;
 		last_state->stats.demotion_count = 0;
 		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
@@ -98,11 +99,11 @@  static int ladder_select_state(struct cp
 	/* consider demotion */
 	if (last_idx > first_idx &&
 	    (dev->states_usage[last_idx].disable ||
-	    drv->states[last_idx].exit_latency > latency_req)) {
+	    drv->states[last_idx].exit_latency_ns > latency_req)) {
 		int i;
 
 		for (i = last_idx - 1; i > first_idx; i--) {
-			if (drv->states[i].exit_latency <= latency_req)
+			if (drv->states[i].exit_latency_ns <= latency_req)
 				break;
 		}
 		ladder_do_selection(dev, ldev, last_idx, i);
@@ -110,7 +111,7 @@  static int ladder_select_state(struct cp
 	}
 
 	if (last_idx > first_idx &&
-	    last_residency < last_state->threshold.demotion_time) {
+	    last_residency < last_state->threshold.demotion_time_ns) {
 		last_state->stats.demotion_count++;
 		last_state->stats.promotion_count = 0;
 		if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
@@ -150,9 +151,9 @@  static int ladder_enable_device(struct c
 		lstate->threshold.demotion_count = DEMOTION_COUNT;
 
 		if (i < drv->state_count - 1)
-			lstate->threshold.promotion_time = state->exit_latency;
+			lstate->threshold.promotion_time_ns = state->exit_latency_ns;
 		if (i > first_idx)
-			lstate->threshold.demotion_time = state->exit_latency;
+			lstate->threshold.demotion_time_ns = state->exit_latency_ns;
 	}
 
 	return 0;
Index: linux-pm/drivers/cpuidle/governors/menu.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/governors/menu.c
+++ linux-pm/drivers/cpuidle/governors/menu.c
@@ -33,7 +33,7 @@ 
 #define INTERVALS (1UL << INTERVAL_SHIFT)
 #define RESOLUTION 1024
 #define DECAY 8
-#define MAX_INTERESTING 50000
+#define MAX_INTERESTING (50000 * NSEC_PER_USEC)
 
 
 /*
@@ -120,14 +120,14 @@  struct menu_device {
 	int             needs_update;
 	int             tick_wakeup;
 
-	unsigned int	next_timer_us;
+	u64		next_timer_ns;
 	unsigned int	bucket;
 	unsigned int	correction_factor[BUCKETS];
 	unsigned int	intervals[INTERVALS];
 	int		interval_ptr;
 };
 
-static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
+static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
 {
 	int bucket = 0;
 
@@ -140,15 +140,15 @@  static inline int which_bucket(unsigned
 	if (nr_iowaiters)
 		bucket = BUCKETS/2;
 
-	if (duration < 10)
+	if (duration_ns < 10ULL * NSEC_PER_USEC)
 		return bucket;
-	if (duration < 100)
+	if (duration_ns < 100ULL * NSEC_PER_USEC)
 		return bucket + 1;
-	if (duration < 1000)
+	if (duration_ns < 1000ULL * NSEC_PER_USEC)
 		return bucket + 2;
-	if (duration < 10000)
+	if (duration_ns < 10000ULL * NSEC_PER_USEC)
 		return bucket + 3;
-	if (duration < 100000)
+	if (duration_ns < 100000ULL * NSEC_PER_USEC)
 		return bucket + 4;
 	return bucket + 5;
 }
@@ -276,13 +276,13 @@  static int menu_select(struct cpuidle_dr
 		       bool *stop_tick)
 {
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
-	int latency_req = cpuidle_governor_latency_req(dev->cpu);
-	int i;
-	int idx;
-	unsigned int interactivity_req;
+	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
 	unsigned int predicted_us;
+	u64 predicted_ns;
+	u64 interactivity_req;
 	unsigned long nr_iowaiters;
 	ktime_t delta_next;
+	int i, idx;
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -290,14 +290,14 @@  static int menu_select(struct cpuidle_dr
 	}
 
 	/* determine the expected residency time, round up */
-	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
+	data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next);
 
 	nr_iowaiters = nr_iowait_cpu(dev->cpu);
-	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
+	data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
 
 	if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
-	    ((data->next_timer_us < drv->states[1].target_residency ||
-	      latency_req < drv->states[1].exit_latency) &&
+	    ((data->next_timer_ns < drv->states[1].target_residency_ns ||
+	      latency_req < drv->states[1].exit_latency_ns) &&
 	     !dev->states_usage[0].disable)) {
 		/*
 		 * In this case state[0] will be used no matter what, so return
@@ -308,18 +308,15 @@  static int menu_select(struct cpuidle_dr
 		return 0;
 	}
 
-	/*
-	 * Force the result of multiplication to be 64 bits even if both
-	 * operands are 32 bits.
-	 * Make sure to round up for half microseconds.
-	 */
-	predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
-					 data->correction_factor[data->bucket],
-					 RESOLUTION * DECAY);
-	/*
-	 * Use the lowest expected idle interval to pick the idle state.
-	 */
-	predicted_us = min(predicted_us, get_typical_interval(data, predicted_us));
+	/* Round up the result for half microseconds. */
+	predicted_us = div_u64(data->next_timer_ns *
+			       data->correction_factor[data->bucket] +
+			       (RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
+			       RESOLUTION * DECAY * NSEC_PER_USEC);
+	/* Use the lowest expected idle interval to pick the idle state. */
+	predicted_ns = (u64)min(predicted_us,
+				get_typical_interval(data, predicted_us)) *
+				NSEC_PER_USEC;
 
 	if (tick_nohz_tick_stopped()) {
 		/*
@@ -330,14 +327,15 @@  static int menu_select(struct cpuidle_dr
 		 * the known time till the closest timer event for the idle
 		 * state selection.
 		 */
-		if (predicted_us < TICK_USEC)
-			predicted_us = ktime_to_us(delta_next);
+		if (predicted_ns < TICK_NSEC)
+			predicted_ns = delta_next;
 	} else {
 		/*
 		 * Use the performance multiplier and the user-configurable
 		 * latency_req to determine the maximum exit latency.
 		 */
-		interactivity_req = predicted_us / performance_multiplier(nr_iowaiters);
+		interactivity_req = div64_u64(predicted_ns,
+					      performance_multiplier(nr_iowaiters));
 		if (latency_req > interactivity_req)
 			latency_req = interactivity_req;
 	}
@@ -356,19 +354,19 @@  static int menu_select(struct cpuidle_dr
 		if (idx == -1)
 			idx = i; /* first enabled state */
 
-		if (s->target_residency > predicted_us) {
+		if (s->target_residency_ns > predicted_ns) {
 			/*
 			 * Use a physical idle state, not busy polling, unless
 			 * a timer is going to trigger soon enough.
 			 */
 			if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
-			    s->exit_latency <= latency_req &&
-			    s->target_residency <= data->next_timer_us) {
-				predicted_us = s->target_residency;
+			    s->exit_latency_ns <= latency_req &&
+			    s->target_residency_ns <= data->next_timer_ns) {
+				predicted_ns = s->target_residency_ns;
 				idx = i;
 				break;
 			}
-			if (predicted_us < TICK_USEC)
+			if (predicted_ns < TICK_NSEC)
 				break;
 
 			if (!tick_nohz_tick_stopped()) {
@@ -378,7 +376,7 @@  static int menu_select(struct cpuidle_dr
 				 * tick in that case and let the governor run
 				 * again in the next iteration of the loop.
 				 */
-				predicted_us = drv->states[idx].target_residency;
+				predicted_ns = drv->states[idx].target_residency_ns;
 				break;
 			}
 
@@ -388,13 +386,13 @@  static int menu_select(struct cpuidle_dr
 			 * closest timer event, select this one to avoid getting
 			 * stuck in the shallow one for too long.
 			 */
-			if (drv->states[idx].target_residency < TICK_USEC &&
-			    s->target_residency <= ktime_to_us(delta_next))
+			if (drv->states[idx].target_residency_ns < TICK_NSEC &&
+			    s->target_residency_ns <= delta_next)
 				idx = i;
 
 			return idx;
 		}
-		if (s->exit_latency > latency_req)
+		if (s->exit_latency_ns > latency_req)
 			break;
 
 		idx = i;
@@ -408,12 +406,10 @@  static int menu_select(struct cpuidle_dr
 	 * expected idle duration is shorter than the tick period length.
 	 */
 	if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
-	     predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
-		unsigned int delta_next_us = ktime_to_us(delta_next);
-
+	     predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
 		*stop_tick = false;
 
-		if (idx > 0 && drv->states[idx].target_residency > delta_next_us) {
+		if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) {
 			/*
 			 * The tick is not going to be stopped and the target
 			 * residency of the state to be returned is not within
@@ -425,7 +421,7 @@  static int menu_select(struct cpuidle_dr
 					continue;
 
 				idx = i;
-				if (drv->states[i].target_residency <= delta_next_us)
+				if (drv->states[i].target_residency_ns <= delta_next)
 					break;
 			}
 		}
@@ -461,7 +457,7 @@  static void menu_update(struct cpuidle_d
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
 	int last_idx = dev->last_state_idx;
 	struct cpuidle_state *target = &drv->states[last_idx];
-	unsigned int measured_us;
+	u64 measured_ns;
 	unsigned int new_factor;
 
 	/*
@@ -479,7 +475,7 @@  static void menu_update(struct cpuidle_d
 	 * assume the state was never reached and the exit latency is 0.
 	 */
 
-	if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
+	if (data->tick_wakeup && data->next_timer_ns > TICK_NSEC) {
 		/*
 		 * The nohz code said that there wouldn't be any events within
 		 * the tick boundary (if the tick was stopped), but the idle
@@ -489,7 +485,7 @@  static void menu_update(struct cpuidle_d
 		 * have been idle long (but not forever) to help the idle
 		 * duration predictor do a better job next time.
 		 */
-		measured_us = 9 * MAX_INTERESTING / 10;
+		measured_ns = 9 * MAX_INTERESTING / 10;
 	} else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
 		   dev->poll_time_limit) {
 		/*
@@ -499,28 +495,29 @@  static void menu_update(struct cpuidle_d
 		 * the CPU might have been woken up from idle by the next timer.
 		 * Assume that to be the case.
 		 */
-		measured_us = data->next_timer_us;
+		measured_ns = data->next_timer_ns;
 	} else {
 		/* measured value */
-		measured_us = dev->last_residency;
+		measured_ns = dev->last_residency_ns;
 
 		/* Deduct exit latency */
-		if (measured_us > 2 * target->exit_latency)
-			measured_us -= target->exit_latency;
+		if (measured_ns > 2 * target->exit_latency_ns)
+			measured_ns -= target->exit_latency_ns;
 		else
-			measured_us /= 2;
+			measured_ns /= 2;
 	}
 
 	/* Make sure our coefficients do not exceed unity */
-	if (measured_us > data->next_timer_us)
-		measured_us = data->next_timer_us;
+	if (measured_ns > data->next_timer_ns)
+		measured_ns = data->next_timer_ns;
 
 	/* Update our correction ratio */
 	new_factor = data->correction_factor[data->bucket];
 	new_factor -= new_factor / DECAY;
 
-	if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
-		new_factor += RESOLUTION * measured_us / data->next_timer_us;
+	if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING)
+		new_factor += RESOLUTION * div64_u64(measured_ns,
+						     data->next_timer_ns);
 	else
 		/*
 		 * we were idle so long that we count it as a perfect
@@ -540,7 +537,7 @@  static void menu_update(struct cpuidle_d
 	data->correction_factor[data->bucket] = new_factor;
 
 	/* update the repeating-pattern data */
-	data->intervals[data->interval_ptr++] = measured_us;
+	data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns);
 	if (data->interval_ptr >= INTERVALS)
 		data->interval_ptr = 0;
 }
Index: linux-pm/drivers/cpuidle/governors/teo.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/governors/teo.c
+++ linux-pm/drivers/cpuidle/governors/teo.c
@@ -104,7 +104,7 @@  struct teo_cpu {
 	u64 sleep_length_ns;
 	struct teo_idle_state states[CPUIDLE_STATE_MAX];
 	int interval_idx;
-	unsigned int intervals[INTERVALS];
+	u64 intervals[INTERVALS];
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -117,9 +117,8 @@  static DEFINE_PER_CPU(struct teo_cpu, te
 static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
-	unsigned int sleep_length_us = ktime_to_us(cpu_data->sleep_length_ns);
 	int i, idx_hit = -1, idx_timer = -1;
-	unsigned int measured_us;
+	u64 measured_ns;
 
 	if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
 		/*
@@ -127,23 +126,21 @@  static void teo_update(struct cpuidle_dr
 		 * enough to the closest timer event expected at the idle state
 		 * selection time to be discarded.
 		 */
-		measured_us = UINT_MAX;
+		measured_ns = U64_MAX;
 	} else {
-		unsigned int lat;
+		u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
 
-		lat = drv->states[dev->last_state_idx].exit_latency;
-
-		measured_us = ktime_to_us(cpu_data->time_span_ns);
+		measured_ns = cpu_data->time_span_ns;
 		/*
 		 * The delay between the wakeup and the first instruction
 		 * executed by the CPU is not likely to be worst-case every
 		 * time, so take 1/2 of the exit latency as a very rough
 		 * approximation of the average of it.
 		 */
-		if (measured_us >= lat)
-			measured_us -= lat / 2;
+		if (measured_ns >= lat_ns)
+			measured_ns -= lat_ns / 2;
 		else
-			measured_us /= 2;
+			measured_ns /= 2;
 	}
 
 	/*
@@ -155,9 +152,9 @@  static void teo_update(struct cpuidle_dr
 
 		cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
 
-		if (drv->states[i].target_residency <= sleep_length_us) {
+		if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) {
 			idx_timer = i;
-			if (drv->states[i].target_residency <= measured_us)
+			if (drv->states[i].target_residency_ns <= measured_ns)
 				idx_hit = i;
 		}
 	}
@@ -193,7 +190,7 @@  static void teo_update(struct cpuidle_dr
 	 * Save idle duration values corresponding to non-timer wakeups for
 	 * pattern detection.
 	 */
-	cpu_data->intervals[cpu_data->interval_idx++] = measured_us;
+	cpu_data->intervals[cpu_data->interval_idx++] = measured_ns;
 	if (cpu_data->interval_idx > INTERVALS)
 		cpu_data->interval_idx = 0;
 }
@@ -203,11 +200,11 @@  static void teo_update(struct cpuidle_dr
  * @drv: cpuidle driver containing state data.
  * @dev: Target CPU.
  * @state_idx: Index of the capping idle state.
- * @duration_us: Idle duration value to match.
+ * @duration_ns: Idle duration value to match.
  */
 static int teo_find_shallower_state(struct cpuidle_driver *drv,
 				    struct cpuidle_device *dev, int state_idx,
-				    unsigned int duration_us)
+				    u64 duration_ns)
 {
 	int i;
 
@@ -216,7 +213,7 @@  static int teo_find_shallower_state(stru
 			continue;
 
 		state_idx = i;
-		if (drv->states[i].target_residency <= duration_us)
+		if (drv->states[i].target_residency_ns <= duration_ns)
 			break;
 	}
 	return state_idx;
@@ -232,8 +229,9 @@  static int teo_select(struct cpuidle_dri
 		      bool *stop_tick)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
-	int latency_req = cpuidle_governor_latency_req(dev->cpu);
-	unsigned int duration_us, hits, misses, early_hits;
+	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+	u64 duration_ns;
+	unsigned int hits, misses, early_hits;
 	int max_early_idx, constraint_idx, idx, i;
 	ktime_t delta_tick;
 
@@ -244,8 +242,8 @@  static int teo_select(struct cpuidle_dri
 
 	cpu_data->time_span_ns = local_clock();
 
-	cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick);
-	duration_us = ktime_to_us(cpu_data->sleep_length_ns);
+	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
+	cpu_data->sleep_length_ns = duration_ns;
 
 	hits = 0;
 	misses = 0;
@@ -262,7 +260,7 @@  static int teo_select(struct cpuidle_dri
 			 * Ignore disabled states with target residencies beyond
 			 * the anticipated idle duration.
 			 */
-			if (s->target_residency > duration_us)
+			if (s->target_residency_ns > duration_ns)
 				continue;
 
 			/*
@@ -301,7 +299,7 @@  static int teo_select(struct cpuidle_dri
 			 * shallow for that role.
 			 */
 			if (!(tick_nohz_tick_stopped() &&
-			      drv->states[idx].target_residency < TICK_USEC)) {
+			      drv->states[idx].target_residency_ns < TICK_NSEC)) {
 				early_hits = cpu_data->states[i].early_hits;
 				max_early_idx = idx;
 			}
@@ -315,10 +313,10 @@  static int teo_select(struct cpuidle_dri
 			misses = cpu_data->states[i].misses;
 		}
 
-		if (s->target_residency > duration_us)
+		if (s->target_residency_ns > duration_ns)
 			break;
 
-		if (s->exit_latency > latency_req && constraint_idx > i)
+		if (s->exit_latency_ns > latency_req && constraint_idx > i)
 			constraint_idx = i;
 
 		idx = i;
@@ -327,7 +325,7 @@  static int teo_select(struct cpuidle_dri
 
 		if (early_hits < cpu_data->states[i].early_hits &&
 		    !(tick_nohz_tick_stopped() &&
-		      drv->states[i].target_residency < TICK_USEC)) {
+		      drv->states[i].target_residency_ns < TICK_NSEC)) {
 			early_hits = cpu_data->states[i].early_hits;
 			max_early_idx = i;
 		}
@@ -343,7 +341,7 @@  static int teo_select(struct cpuidle_dri
 	 */
 	if (hits <= misses && max_early_idx >= 0) {
 		idx = max_early_idx;
-		duration_us = drv->states[idx].target_residency;
+		duration_ns = drv->states[idx].target_residency_ns;
 	}
 
 	/*
@@ -364,9 +362,9 @@  static int teo_select(struct cpuidle_dri
 		 * the current expected idle duration value.
 		 */
 		for (i = 0; i < INTERVALS; i++) {
-			unsigned int val = cpu_data->intervals[i];
+			u64 val = cpu_data->intervals[i];
 
-			if (val >= duration_us)
+			if (val >= duration_ns)
 				continue;
 
 			count++;
@@ -378,17 +376,17 @@  static int teo_select(struct cpuidle_dri
 		 * values are in the interesting range.
 		 */
 		if (count > INTERVALS / 2) {
-			unsigned int avg_us = div64_u64(sum, count);
+			u64 avg_ns = div64_u64(sum, count);
 
 			/*
 			 * Avoid spending too much time in an idle state that
 			 * would be too shallow.
 			 */
-			if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) {
-				duration_us = avg_us;
-				if (drv->states[idx].target_residency > avg_us)
+			if (!(tick_nohz_tick_stopped() && avg_ns < TICK_NSEC)) {
+				duration_ns = avg_ns;
+				if (drv->states[idx].target_residency_ns > avg_ns)
 					idx = teo_find_shallower_state(drv, dev,
-								       idx, avg_us);
+								       idx, avg_ns);
 			}
 		}
 	}
@@ -398,9 +396,7 @@  static int teo_select(struct cpuidle_dri
 	 * expected idle duration is shorter than the tick period length.
 	 */
 	if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
-	    duration_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
-		unsigned int delta_tick_us = ktime_to_us(delta_tick);
-
+	    duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
 		*stop_tick = false;
 
 		/*
@@ -409,8 +405,8 @@  static int teo_select(struct cpuidle_dri
 		 * till the closest timer including the tick, try to correct
 		 * that.
 		 */
-		if (idx > 0 && drv->states[idx].target_residency > delta_tick_us)
-			idx = teo_find_shallower_state(drv, dev, idx, delta_tick_us);
+		if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick)
+			idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
 	}
 
 	return idx;
@@ -454,7 +450,7 @@  static int teo_enable_device(struct cpui
 	memset(cpu_data, 0, sizeof(*cpu_data));
 
 	for (i = 0; i < INTERVALS; i++)
-		cpu_data->intervals[i] = UINT_MAX;
+		cpu_data->intervals[i] = U64_MAX;
 
 	return 0;
 }
Index: linux-pm/drivers/cpuidle/governors/haltpoll.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/governors/haltpoll.c
+++ linux-pm/drivers/cpuidle/governors/haltpoll.c
@@ -49,7 +49,7 @@  static int haltpoll_select(struct cpuidl
 			   struct cpuidle_device *dev,
 			   bool *stop_tick)
 {
-	int latency_req = cpuidle_governor_latency_req(dev->cpu);
+	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
 
 	if (!drv->state_count || latency_req == 0) {
 		*stop_tick = false;
@@ -75,10 +75,9 @@  static int haltpoll_select(struct cpuidl
 	return 0;
 }
 
-static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us)
+static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
 {
 	unsigned int val;
-	u64 block_ns = block_us*NSEC_PER_USEC;
 
 	/* Grow cpu_halt_poll_us if
 	 * cpu_halt_poll_us < block_ns < guest_halt_poll_us
@@ -115,7 +114,7 @@  static void haltpoll_reflect(struct cpui
 	dev->last_state_idx = index;
 
 	if (index != 0)
-		adjust_poll_limit(dev, dev->last_residency);
+		adjust_poll_limit(dev, dev->last_residency_ns);
 }
 
 /**
Index: linux-pm/kernel/sched/idle.c
===================================================================
--- linux-pm.orig/kernel/sched/idle.c
+++ linux-pm/kernel/sched/idle.c
@@ -104,7 +104,7 @@  static int call_cpuidle(struct cpuidle_d
 	 * update no idle residency and return.
 	 */
 	if (current_clr_polling_and_test()) {
-		dev->last_residency = 0;
+		dev->last_residency_ns = 0;
 		local_irq_enable();
 		return -EBUSY;
 	}