All of lore.kernel.org
 help / color / mirror / Atom feed
* [V2] x86/cpuidle: get accurate C0 value with xenpm tool
@ 2015-05-04  6:27 Huaitong Han
  2015-05-04  8:33 ` Jan Beulich
  0 siblings, 1 reply; 5+ messages in thread
From: Huaitong Han @ 2015-05-04  6:27 UTC (permalink / raw)
  To: jbeulich; +Cc: Huaitong Han, xen-devel

When checking the ACPI funciton of C-status, after 100 seconds sleep,
the sampling value of C0 status from the xenpm tool decreases.
Because C0=NOW()-C1-C2-C3-C4, when NOW() value is during idle time,
NOW() value is bigger than last C-status update time, and C0 value
is also bigger than ture value. if margin of the second error cannot
make up for margin of the first error, the value of C0 would decrease.

Signed-off-by: Huaitong Han <huaitong.han@intel.com>

diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index e639c99..e5fffe8 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -254,9 +254,10 @@ static char* acpi_cstate_method_name[] =
 
 static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
 {
-    uint32_t i, idle_usage = 0;
-    uint64_t res, idle_res = 0;
-    u32 usage;
+    uint64_t idle_res = 0, idle_usage = 0, last_state_update_time = 0, now = 0;
+    uint64_t usage[ACPI_PROCESSOR_MAX_POWER] = { 0 };
+    uint64_t res[ACPI_PROCESSOR_MAX_POWER] = { 0 };
+    uint32_t i;
     u8 last_state_idx;
 
     printk("==cpu%d==\n", cpu);
@@ -264,28 +265,36 @@ static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
     printk("active state:\t\tC%d\n", last_state_idx);
     printk("max_cstate:\t\tC%d\n", max_cstate);
     printk("states:\n");
-    
+
+    spin_lock_irq(&power->stat_lock);
+    now = NOW();
     for ( i = 1; i < power->count; i++ )
     {
-        spin_lock_irq(&power->stat_lock);	
-        res = tick_to_ns(power->states[i].time);
-        usage = power->states[i].usage;
-        spin_unlock_irq(&power->stat_lock);
+        res[i] = tick_to_ns(power->states[i].time);
+        usage[i] = power->states[i].usage;
+    }
+    last_state_update_time = tick_to_ns(power->last_state_update_tick);
+    spin_unlock_irq(&power->stat_lock);
 
-        idle_usage += usage;
-        idle_res += res;
+    res[last_state_idx] += now - last_state_update_time;
+    usage[last_state_idx] += 1;
+
+    for ( i = 1; i < power->count; i++ )
+    {
+        idle_usage += usage[i];
+        idle_res += res[i];
 
         printk((last_state_idx == i) ? "   *" : "    ");
         printk("C%d:\t", i);
         printk("type[C%d] ", power->states[i].type);
         printk("latency[%03d] ", power->states[i].latency);
-        printk("usage[%08d] ", usage);
+        printk("usage[%"PRIu64"] ", usage[i]);
         printk("method[%5s] ", acpi_cstate_method_name[power->states[i].entry_method]);
-        printk("duration[%"PRId64"]\n", res);
+		printk("duration[%"PRIu64"]\n", res[i]);
     }
     printk((last_state_idx == 0) ? "   *" : "    ");
-    printk("C0:\tusage[%08d] duration[%"PRId64"]\n",
-           idle_usage, NOW() - idle_res);
+    printk("C0:\tusage[%"PRIu64"] duration[%"PRIu64"]\n",
+           usage[0] + idle_usage, res[0] + last_state_update_time - idle_res);
 
     print_hw_residencies(cpu);
 }
@@ -486,6 +495,15 @@ bool_t errata_c6_eoi_workaround(void)
     return (fix_needed && cpu_has_pending_apic_eoi());
 }
 
+void update_last_cx_stat(struct acpi_processor_power *power,
+                         struct acpi_processor_cx *cx, uint64_t ticks)
+{
+	spin_lock(&power->stat_lock);
+	power->last_state = cx;
+	power->last_state_update_tick = ticks;
+	spin_unlock(&power->stat_lock);
+}
+
 void update_idle_stats(struct acpi_processor_power *power,
                        struct acpi_processor_cx *cx,
                        uint64_t before, uint64_t after)
@@ -501,6 +519,8 @@ void update_idle_stats(struct acpi_processor_power *power,
         power->last_residency = tick_to_ns(sleep_ticks) / 1000UL;
         cx->time += sleep_ticks;
     }
+    power->last_state = &power->states[0];
+    power->last_state_update_tick = after;
 
     spin_unlock(&power->stat_lock);
 }
@@ -557,7 +577,6 @@ static void acpi_processor_idle(void)
     if ( (cx->type == ACPI_STATE_C3) && errata_c6_eoi_workaround() )
         cx = power->safe_state;
 
-    power->last_state = cx;
 
     /*
      * Sleep:
@@ -574,6 +593,7 @@ static void acpi_processor_idle(void)
             t1 = cpuidle_get_tick();
             /* Trace cpu idle entry */
             TRACE_4D(TRC_PM_IDLE_ENTRY, cx->idx, t1, exp, pred);
+            update_last_cx_stat(power, cx, t1);
             /* Invoke C2 */
             acpi_idle_do_entry(cx);
             /* Get end time (ticks) */
@@ -602,7 +622,7 @@ static void acpi_processor_idle(void)
         t1 = cpuidle_get_tick();
         /* Trace cpu idle entry */
         TRACE_4D(TRC_PM_IDLE_ENTRY, cx->idx, t1, exp, pred);
-
+        update_last_cx_stat(power, cx, t1);
         /*
          * disable bus master
          * bm_check implies we need ARB_DIS
@@ -1171,7 +1191,7 @@ uint32_t pmstat_get_cx_nr(uint32_t cpuid)
 int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
 {
     struct acpi_processor_power *power = processor_powers[cpuid];
-    uint64_t idle_usage = 0, idle_res = 0;
+    uint64_t idle_usage = 0, idle_res = 0, last_state_update_time = 0, now = 0;
     uint64_t usage[ACPI_PROCESSOR_MAX_POWER], res[ACPI_PROCESSOR_MAX_POWER];
     unsigned int i, nr, nr_pc = 0, nr_cc = 0;
 
@@ -1185,7 +1205,6 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
         return 0;
     }
 
-    stat->last = power->last_state ? power->last_state->idx : 0;
     stat->idle_time = get_cpu_idle_time(cpuid);
     nr = min(stat->nr, power->count);
 
@@ -1193,9 +1212,11 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
     if ( pm_idle_save == NULL )
     {
         stat->nr = 2;
+        stat->last = power->last_state ? power->last_state->idx : 0;
 
         usage[1] = idle_usage = 1;
         res[1] = idle_res = stat->idle_time;
+        last_state_update_time = now = NOW();
     }
     else
     {
@@ -1203,13 +1224,19 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
 
         stat->nr = power->count;
 
+        spin_lock_irq(&power->stat_lock);
+        now = NOW();
         for ( i = 1; i < nr; i++ )
         {
-            spin_lock_irq(&power->stat_lock);
             usage[i] = power->states[i].usage;
             res[i] = tick_to_ns(power->states[i].time);
-            spin_unlock_irq(&power->stat_lock);
+        }
+        last_state_update_time = tick_to_ns(power->last_state_update_tick);
+        stat->last = power->last_state ? power->last_state->idx : 0;
+        spin_unlock_irq(&power->stat_lock);
 
+        for( i = 1; i < nr; i++ )
+        {
             idle_usage += usage[i];
             idle_res += res[i];
         }
@@ -1243,7 +1270,10 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
     }
 
     usage[0] = idle_usage;
-    res[0] = NOW() - idle_res;
+    usage[stat->last] += 1;
+
+    res[0] = last_state_update_time - idle_res;
+    res[stat->last] += now - last_state_update_time;
 
     if ( copy_to_guest(stat->triggers, usage, nr) ||
          copy_to_guest(stat->residencies, res, nr) )
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 6dd5822..f07f5ff 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -536,7 +536,6 @@ static void mwait_idle(void)
 		return;
 	}
 
-	power->last_state = cx;
 	eax = cx->address;
 	cstate = ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
 
@@ -554,6 +553,7 @@ static void mwait_idle(void)
 
 	before = cpuidle_get_tick();
 	TRACE_4D(TRC_PM_IDLE_ENTRY, cx->type, before, exp, pred);
+	update_last_cx_stat(power, cx, before);
 
 	if (cpu_is_haltable(cpu))
 		mwait_idle_with_hints(eax, MWAIT_ECX_INTERRUPT_BREAK);
@@ -571,9 +571,6 @@ static void mwait_idle(void)
 	if (!(lapic_timer_reliable_states & (1 << cstate)))
 		lapic_timer_on();
 
-	/* Now back in C0. */
-	power->last_state = &power->states[0];
-
 	sched_tick_resume();
 	cpufreq_dbs_timer_resume();
 
diff --git a/xen/include/asm-x86/cpuidle.h b/xen/include/asm-x86/cpuidle.h
index 4d70677..46e614b 100644
--- a/xen/include/asm-x86/cpuidle.h
+++ b/xen/include/asm-x86/cpuidle.h
@@ -23,6 +23,8 @@ void acpi_dead_idle(void);
 void trace_exit_reason(u32 *irq_traced);
 void update_idle_stats(struct acpi_processor_power *,
                        struct acpi_processor_cx *, uint64_t, uint64_t);
+void update_last_cx_stat(struct acpi_processor_power *,
+                         struct acpi_processor_cx *, uint64_t);
 
 /*
  * vcpu is urgent if vcpu is polling event channel
diff --git a/xen/include/xen/cpuidle.h b/xen/include/xen/cpuidle.h
index b7b9e8c..342f4fe 100644
--- a/xen/include/xen/cpuidle.h
+++ b/xen/include/xen/cpuidle.h
@@ -66,6 +66,7 @@ struct acpi_processor_power
     struct acpi_processor_cx *last_state;
     struct acpi_processor_cx *safe_state;
     void *gdata; /* governor specific data */
+    u64 last_state_update_tick;
     u32 last_residency;
     u32 count;
     spinlock_t stat_lock;
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [V2] x86/cpuidle: get accurate C0 value with xenpm tool
  2015-05-04  6:27 [V2] x86/cpuidle: get accurate C0 value with xenpm tool Huaitong Han
@ 2015-05-04  8:33 ` Jan Beulich
  2015-05-04 13:23   ` Han, Huaitong
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2015-05-04  8:33 UTC (permalink / raw)
  To: Huaitong Han; +Cc: xen-devel

>>> On 04.05.15 at 08:27, <huaitong.han@intel.com> wrote:
> When checking the ACPI funciton of C-status, after 100 seconds sleep,
> the sampling value of C0 status from the xenpm tool decreases.
> Because C0=NOW()-C1-C2-C3-C4, when NOW() value is during idle time,
> NOW() value is bigger than last C-status update time, and C0 value
> is also bigger than ture value. if margin of the second error cannot
> make up for margin of the first error, the value of C0 would decrease.

This doesn't seem to explain all the changes done in this patch. And
also please help reviewers by stating (after a --- separator) what
changed compared to the previous version.

> --- a/xen/arch/x86/acpi/cpu_idle.c
> +++ b/xen/arch/x86/acpi/cpu_idle.c
> @@ -254,9 +254,10 @@ static char* acpi_cstate_method_name[] =
>  
>  static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
>  {
> -    uint32_t i, idle_usage = 0;
> -    uint64_t res, idle_res = 0;
> -    u32 usage;
> +    uint64_t idle_res = 0, idle_usage = 0, last_state_update_time = 0, now = 0;

At least the initializer for "now" seems pointless.

> +    uint64_t usage[ACPI_PROCESSOR_MAX_POWER] = { 0 };
> +    uint64_t res[ACPI_PROCESSOR_MAX_POWER] = { 0 };
> +    uint32_t i;

"unsigned int" please.

> @@ -264,28 +265,36 @@ static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
>      printk("active state:\t\tC%d\n", last_state_idx);
>      printk("max_cstate:\t\tC%d\n", max_cstate);
>      printk("states:\n");
> -    
> +
> +    spin_lock_irq(&power->stat_lock);
> +    now = NOW();
>      for ( i = 1; i < power->count; i++ )
>      {
> -        spin_lock_irq(&power->stat_lock);	
> -        res = tick_to_ns(power->states[i].time);
> -        usage = power->states[i].usage;
> -        spin_unlock_irq(&power->stat_lock);
> +        res[i] = tick_to_ns(power->states[i].time);
> +        usage[i] = power->states[i].usage;
> +    }
> +    last_state_update_time = tick_to_ns(power->last_state_update_tick);
> +    spin_unlock_irq(&power->stat_lock);

It seems to me that doing the tick_to_ns() conversions inside the
locked region isn't really necessary.

> -        idle_usage += usage;
> -        idle_res += res;
> +    res[last_state_idx] += now - last_state_update_time;
> +    usage[last_state_idx] += 1;

++

> +    for ( i = 1; i < power->count; i++ )
> +    {
> +        idle_usage += usage[i];
> +        idle_res += res[i];
>  
>          printk((last_state_idx == i) ? "   *" : "    ");
>          printk("C%d:\t", i);
>          printk("type[C%d] ", power->states[i].type);
>          printk("latency[%03d] ", power->states[i].latency);
> -        printk("usage[%08d] ", usage);
> +        printk("usage[%"PRIu64"] ", usage[i]);

Why is the "08" being lost here (and below)?

>          printk("method[%5s] ", acpi_cstate_method_name[power->states[i].entry_method]);
> -        printk("duration[%"PRId64"]\n", res);
> +		printk("duration[%"PRIu64"]\n", res[i]);

Bad use of hard tabs.

> @@ -486,6 +495,15 @@ bool_t errata_c6_eoi_workaround(void)
>      return (fix_needed && cpu_has_pending_apic_eoi());
>  }
>  
> +void update_last_cx_stat(struct acpi_processor_power *power,
> +                         struct acpi_processor_cx *cx, uint64_t ticks)
> +{
> +	spin_lock(&power->stat_lock);
> +	power->last_state = cx;
> +	power->last_state_update_tick = ticks;
> +	spin_unlock(&power->stat_lock);
> +}

This at least needs a comment (but better an ASSERT()) that IRQs
need to be off on entry.

> @@ -1171,7 +1191,7 @@ uint32_t pmstat_get_cx_nr(uint32_t cpuid)
>  int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
>  {
>      struct acpi_processor_power *power = processor_powers[cpuid];
> -    uint64_t idle_usage = 0, idle_res = 0;
> +    uint64_t idle_usage = 0, idle_res = 0, last_state_update_time = 0, now = 0;

Again at least "now" appears to pointlessly have an initializer.

> @@ -1203,13 +1224,19 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
>  
>          stat->nr = power->count;
>  
> +        spin_lock_irq(&power->stat_lock);
> +        now = NOW();
>          for ( i = 1; i < nr; i++ )
>          {
> -            spin_lock_irq(&power->stat_lock);
>              usage[i] = power->states[i].usage;
>              res[i] = tick_to_ns(power->states[i].time);
> -            spin_unlock_irq(&power->stat_lock);
> +        }
> +        last_state_update_time = tick_to_ns(power->last_state_update_tick);
> +        stat->last = power->last_state ? power->last_state->idx : 0;
> +        spin_unlock_irq(&power->stat_lock);
>  
> +        for( i = 1; i < nr; i++ )

Coding style (you have the same "for" a few lines up for reference).

> @@ -1243,7 +1270,10 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
>      }
>  
>      usage[0] = idle_usage;
> -    res[0] = NOW() - idle_res;
> +    usage[stat->last] += 1;

++

> @@ -571,9 +571,6 @@ static void mwait_idle(void)
>  	if (!(lapic_timer_reliable_states & (1 << cstate)))
>  		lapic_timer_on();
>  
> -	/* Now back in C0. */
> -	power->last_state = &power->states[0];

Please don't delete the comment.

Jan

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [V2] x86/cpuidle: get accurate C0 value with xenpm tool
  2015-05-04  8:33 ` Jan Beulich
@ 2015-05-04 13:23   ` Han, Huaitong
  2015-05-04 14:00     ` Jan Beulich
  0 siblings, 1 reply; 5+ messages in thread
From: Han, Huaitong @ 2015-05-04 13:23 UTC (permalink / raw)
  To: JBeulich; +Cc: xen-devel

On Mon, 2015-05-04 at 09:33 +0100, Jan Beulich wrote:
> >>> On 04.05.15 at 08:27, <huaitong.han@intel.com> wrote:
> > When checking the ACPI funciton of C-status, after 100 seconds sleep,
> > the sampling value of C0 status from the xenpm tool decreases.
> > Because C0=NOW()-C1-C2-C3-C4, when NOW() value is during idle time,
> > NOW() value is bigger than last C-status update time, and C0 value
> > is also bigger than ture value. if margin of the second error cannot
> > make up for margin of the first error, the value of C0 would decrease.
> 
> This doesn't seem to explain all the changes done in this patch. And
> also please help reviewers by stating (after a --- separator) what
> changed compared to the previous version.
V1:C0 = last_cx_update_time-C1-C2-C3-C4, but last_cx_update_time is , so
the C0 value is stale, NOW-last_update_time should be calculated.
V2:C0 = last_cx_update_time-C1-C2-C3-C4, and
C[current_cx_stat]+=NOW-last_update_time. so the CX value is fresh. 
> 
> > --- a/xen/arch/x86/acpi/cpu_idle.c
> > +++ b/xen/arch/x86/acpi/cpu_idle.c
> > @@ -254,9 +254,10 @@ static char* acpi_cstate_method_name[] =
> >  
> >  static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
> >  {
> > -    uint32_t i, idle_usage = 0;
> > -    uint64_t res, idle_res = 0;
> > -    u32 usage;
> > +    uint64_t idle_res = 0, idle_usage = 0, last_state_update_time = 0, now = 0;
> 
> At least the initializer for "now" seems pointless.
variable "now" just because now value should be got with power->states[i].time,
otherwise calculation error occurs in the next step. 
> 
> > +    uint64_t usage[ACPI_PROCESSOR_MAX_POWER] = { 0 };
> > +    uint64_t res[ACPI_PROCESSOR_MAX_POWER] = { 0 };
> > +    uint32_t i;
> 
> "unsigned int" please.
Accepted
> 
> > @@ -264,28 +265,36 @@ static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
> >      printk("active state:\t\tC%d\n", last_state_idx);
> >      printk("max_cstate:\t\tC%d\n", max_cstate);
> >      printk("states:\n");
> > -    
> > +
> > +    spin_lock_irq(&power->stat_lock);
> > +    now = NOW();
> >      for ( i = 1; i < power->count; i++ )
> >      {
> > -        spin_lock_irq(&power->stat_lock);	
> > -        res = tick_to_ns(power->states[i].time);
> > -        usage = power->states[i].usage;
> > -        spin_unlock_irq(&power->stat_lock);
> > +        res[i] = tick_to_ns(power->states[i].time);
> > +        usage[i] = power->states[i].usage;
> > +    }
> > +    last_state_update_time = tick_to_ns(power->last_state_update_tick);
> > +    spin_unlock_irq(&power->stat_lock);
> 
> It seems to me that doing the tick_to_ns() conversions inside the
> locked region isn't really necessary.
doing the tick_to_ns() conversions inside the locked region is better to keep
time value consistency, in case that the spin_unlock_irq of print_acpi_power finish and
the spin_lock_irq of update_idle_stats start.
> 
> > -        idle_usage += usage;
> > -        idle_res += res;
> > +    res[last_state_idx] += now - last_state_update_time;
> > +    usage[last_state_idx] += 1;
> 
> ++
> 
> > +    for ( i = 1; i < power->count; i++ )
> > +    {
> > +        idle_usage += usage[i];
> > +        idle_res += res[i];
> >  
> >          printk((last_state_idx == i) ? "   *" : "    ");
> >          printk("C%d:\t", i);
> >          printk("type[C%d] ", power->states[i].type);
> >          printk("latency[%03d] ", power->states[i].latency);
> > -        printk("usage[%08d] ", usage);
> > +        printk("usage[%"PRIu64"] ", usage[i]);
> 
> Why is the "08" being lost here (and below)?
usage is defined as "unsigned int" in original code, but usage is sum of "unsigned int", uint64 is better.
usage is cx switch times,and it is little in most of the time, 08 is OK, but it seems "08" is no need to
printk.
> 
> >          printk("method[%5s] ", acpi_cstate_method_name[power->states[i].entry_method]);
> > -        printk("duration[%"PRId64"]\n", res);
> > +		printk("duration[%"PRIu64"]\n", res[i]);
> 
> Bad use of hard tabs.
Accepted
> 
> > @@ -486,6 +495,15 @@ bool_t errata_c6_eoi_workaround(void)
> >      return (fix_needed && cpu_has_pending_apic_eoi());
> >  }
> >  
> > +void update_last_cx_stat(struct acpi_processor_power *power,
> > +                         struct acpi_processor_cx *cx, uint64_t ticks)
> > +{
> > +	spin_lock(&power->stat_lock);
> > +	power->last_state = cx;
> > +	power->last_state_update_tick = ticks;
> > +	spin_unlock(&power->stat_lock);
> > +}
> 
> This at least needs a comment (but better an ASSERT()) that IRQs
> need to be off on entry.
Accepted
> 
> > @@ -1171,7 +1191,7 @@ uint32_t pmstat_get_cx_nr(uint32_t cpuid)
> >  int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
> >  {
> >      struct acpi_processor_power *power = processor_powers[cpuid];
> > -    uint64_t idle_usage = 0, idle_res = 0;
> > +    uint64_t idle_usage = 0, idle_res = 0, last_state_update_time = 0, now = 0;
> 
> Again at least "now" appears to pointlessly have an initializer.
> 
> > @@ -1203,13 +1224,19 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
> >  
> >          stat->nr = power->count;
> >  
> > +        spin_lock_irq(&power->stat_lock);
> > +        now = NOW();
> >          for ( i = 1; i < nr; i++ )
> >          {
> > -            spin_lock_irq(&power->stat_lock);
> >              usage[i] = power->states[i].usage;
> >              res[i] = tick_to_ns(power->states[i].time);
> > -            spin_unlock_irq(&power->stat_lock);
> > +        }
> > +        last_state_update_time = tick_to_ns(power->last_state_update_tick);
> > +        stat->last = power->last_state ? power->last_state->idx : 0;
> > +        spin_unlock_irq(&power->stat_lock);
> >  
> > +        for( i = 1; i < nr; i++ )
> 
> Coding style (you have the same "for" a few lines up for reference).
Accepted
> 
> > @@ -1243,7 +1270,10 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
> >      }
> >  
> >      usage[0] = idle_usage;
> > -    res[0] = NOW() - idle_res;
> > +    usage[stat->last] += 1;
> 
> ++
> 
> > @@ -571,9 +571,6 @@ static void mwait_idle(void)
> >  	if (!(lapic_timer_reliable_states & (1 << cstate)))
> >  		lapic_timer_on();
> >  
> > -	/* Now back in C0. */
> > -	power->last_state = &power->states[0];
> 
> Please don't delete the comment.
Accepted,the comment will be added to update_idle_stats.
> 
> Jan

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [V2] x86/cpuidle: get accurate C0 value with xenpm tool
  2015-05-04 13:23   ` Han, Huaitong
@ 2015-05-04 14:00     ` Jan Beulich
  2015-05-05  1:37       ` Han, Huaitong
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2015-05-04 14:00 UTC (permalink / raw)
  To: Huaitong Han; +Cc: xen-devel

>>> On 04.05.15 at 15:23, <huaitong.han@intel.com> wrote:
> On Mon, 2015-05-04 at 09:33 +0100, Jan Beulich wrote:
>> >>> On 04.05.15 at 08:27, <huaitong.han@intel.com> wrote:
>> > --- a/xen/arch/x86/acpi/cpu_idle.c
>> > +++ b/xen/arch/x86/acpi/cpu_idle.c
>> > @@ -254,9 +254,10 @@ static char* acpi_cstate_method_name[] =
>> >  
>> >  static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
>> >  {
>> > -    uint32_t i, idle_usage = 0;
>> > -    uint64_t res, idle_res = 0;
>> > -    u32 usage;
>> > +    uint64_t idle_res = 0, idle_usage = 0, last_state_update_time = 0, now = 0;
>> 
>> At least the initializer for "now" seems pointless.
> variable "now" just because now value should be got with 
> power->states[i].time,
> otherwise calculation error occurs in the next step. 

All understood, and I didn't put the variable's existence in
question, but solely its initializer.

>> > @@ -264,28 +265,36 @@ static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
>> >      printk("active state:\t\tC%d\n", last_state_idx);
>> >      printk("max_cstate:\t\tC%d\n", max_cstate);
>> >      printk("states:\n");
>> > -    
>> > +
>> > +    spin_lock_irq(&power->stat_lock);
>> > +    now = NOW();
>> >      for ( i = 1; i < power->count; i++ )
>> >      {
>> > -        spin_lock_irq(&power->stat_lock);	
>> > -        res = tick_to_ns(power->states[i].time);
>> > -        usage = power->states[i].usage;
>> > -        spin_unlock_irq(&power->stat_lock);
>> > +        res[i] = tick_to_ns(power->states[i].time);
>> > +        usage[i] = power->states[i].usage;
>> > +    }
>> > +    last_state_update_time = tick_to_ns(power->last_state_update_tick);
>> > +    spin_unlock_irq(&power->stat_lock);
>> 
>> It seems to me that doing the tick_to_ns() conversions inside the
>> locked region isn't really necessary.
> doing the tick_to_ns() conversions inside the locked region is better to 
> keep
> time value consistency, in case that the spin_unlock_irq of print_acpi_power 
> finish and
> the spin_lock_irq of update_idle_stats start.

I don't understand this: Why can't you latch the raw tick values into
local variables and do the conversion later? This won't harm accuracy
afaict.

>> > +    for ( i = 1; i < power->count; i++ )
>> > +    {
>> > +        idle_usage += usage[i];
>> > +        idle_res += res[i];
>> >  
>> >          printk((last_state_idx == i) ? "   *" : "    ");
>> >          printk("C%d:\t", i);
>> >          printk("type[C%d] ", power->states[i].type);
>> >          printk("latency[%03d] ", power->states[i].latency);
>> > -        printk("usage[%08d] ", usage);
>> > +        printk("usage[%"PRIu64"] ", usage[i]);
>> 
>> Why is the "08" being lost here (and below)?
> usage is defined as "unsigned int" in original code, but usage is sum of 
> "unsigned int", uint64 is better.
> usage is cx switch times,and it is little in most of the time, 08 is OK, but 
> it seems "08" is no need to
> printk.

Please simply check the old and new output - the question here is
whether readability (perhaps through alignment of fields) is better
with the explicit zero padding. If there's no meaningful difference
I'd be okay with dropping the padding.

>> > @@ -1243,7 +1270,10 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
>> >      }
>> >  
>> >      usage[0] = idle_usage;
>> > -    res[0] = NOW() - idle_res;
>> > +    usage[stat->last] += 1;
>> 
>> ++
>> 
>> > @@ -571,9 +571,6 @@ static void mwait_idle(void)
>> >  	if (!(lapic_timer_reliable_states & (1 << cstate)))
>> >  		lapic_timer_on();
>> >  
>> > -	/* Now back in C0. */
>> > -	power->last_state = &power->states[0];
>> 
>> Please don't delete the comment.
> Accepted,the comment will be added to update_idle_stats.

I think it would better stay here.

Jan

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [V2] x86/cpuidle: get accurate C0 value with xenpm tool
  2015-05-04 14:00     ` Jan Beulich
@ 2015-05-05  1:37       ` Han, Huaitong
  0 siblings, 0 replies; 5+ messages in thread
From: Han, Huaitong @ 2015-05-05  1:37 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

All accepted.

-----Original Message-----
From: Jan Beulich [mailto:JBeulich@suse.com] 
Sent: Monday, May 4, 2015 10:01 PM
To: Han, Huaitong
Cc: xen-devel@lists.xen.org
Subject: Re: [V2] x86/cpuidle: get accurate C0 value with xenpm tool

>>> On 04.05.15 at 15:23, <huaitong.han@intel.com> wrote:
> On Mon, 2015-05-04 at 09:33 +0100, Jan Beulich wrote:
>> >>> On 04.05.15 at 08:27, <huaitong.han@intel.com> wrote:
>> > --- a/xen/arch/x86/acpi/cpu_idle.c
>> > +++ b/xen/arch/x86/acpi/cpu_idle.c
>> > @@ -254,9 +254,10 @@ static char* acpi_cstate_method_name[] =
>> >  
>> >  static void print_acpi_power(uint32_t cpu, struct 
>> > acpi_processor_power *power)  {
>> > -    uint32_t i, idle_usage = 0;
>> > -    uint64_t res, idle_res = 0;
>> > -    u32 usage;
>> > +    uint64_t idle_res = 0, idle_usage = 0, last_state_update_time 
>> > + = 0, now = 0;
>> 
>> At least the initializer for "now" seems pointless.
> variable "now" just because now value should be got with
> power->states[i].time,
> otherwise calculation error occurs in the next step. 

All understood, and I didn't put the variable's existence in question, but solely its initializer.

>> > @@ -264,28 +265,36 @@ static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
>> >      printk("active state:\t\tC%d\n", last_state_idx);
>> >      printk("max_cstate:\t\tC%d\n", max_cstate);
>> >      printk("states:\n");
>> > -    
>> > +
>> > +    spin_lock_irq(&power->stat_lock);
>> > +    now = NOW();
>> >      for ( i = 1; i < power->count; i++ )
>> >      {
>> > -        spin_lock_irq(&power->stat_lock);	
>> > -        res = tick_to_ns(power->states[i].time);
>> > -        usage = power->states[i].usage;
>> > -        spin_unlock_irq(&power->stat_lock);
>> > +        res[i] = tick_to_ns(power->states[i].time);
>> > +        usage[i] = power->states[i].usage;
>> > +    }
>> > +    last_state_update_time = tick_to_ns(power->last_state_update_tick);
>> > +    spin_unlock_irq(&power->stat_lock);
>> 
>> It seems to me that doing the tick_to_ns() conversions inside the 
>> locked region isn't really necessary.
> doing the tick_to_ns() conversions inside the locked region is better 
> to keep time value consistency, in case that the spin_unlock_irq of 
> print_acpi_power finish and the spin_lock_irq of update_idle_stats 
> start.

I don't understand this: Why can't you latch the raw tick values into local variables and do the conversion later? This won't harm accuracy afaict.
Accepted.


>> > +    for ( i = 1; i < power->count; i++ )
>> > +    {
>> > +        idle_usage += usage[i];
>> > +        idle_res += res[i];
>> >  
>> >          printk((last_state_idx == i) ? "   *" : "    ");
>> >          printk("C%d:\t", i);
>> >          printk("type[C%d] ", power->states[i].type);
>> >          printk("latency[%03d] ", power->states[i].latency);
>> > -        printk("usage[%08d] ", usage);
>> > +        printk("usage[%"PRIu64"] ", usage[i]);
>> 
>> Why is the "08" being lost here (and below)?
> usage is defined as "unsigned int" in original code, but usage is sum 
> of "unsigned int", uint64 is better.
> usage is cx switch times,and it is little in most of the time, 08 is 
> OK, but it seems "08" is no need to printk.

Please simply check the old and new output - the question here is whether readability (perhaps through alignment of fields) is better with the explicit zero padding. If there's no meaningful difference I'd be okay with dropping the padding.
Accepted.
>> > @@ -1243,7 +1270,10 @@ int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
>> >      }
>> >  
>> >      usage[0] = idle_usage;
>> > -    res[0] = NOW() - idle_res;
>> > +    usage[stat->last] += 1;
>> 
>> ++
>> 
>> > @@ -571,9 +571,6 @@ static void mwait_idle(void)
>> >  	if (!(lapic_timer_reliable_states & (1 << cstate)))
>> >  		lapic_timer_on();
>> >  
>> > -	/* Now back in C0. */
>> > -	power->last_state = &power->states[0];
>> 
>> Please don't delete the comment.
> Accepted,the comment will be added to update_idle_stats.

I think it would better stay here.
Accepted.

Jan

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-05-05  1:37 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-04  6:27 [V2] x86/cpuidle: get accurate C0 value with xenpm tool Huaitong Han
2015-05-04  8:33 ` Jan Beulich
2015-05-04 13:23   ` Han, Huaitong
2015-05-04 14:00     ` Jan Beulich
2015-05-05  1:37       ` Han, Huaitong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.