All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
@ 2008-12-05  6:22 Wei, Gang
  2008-12-05  8:53 ` Keir Fraser
  0 siblings, 1 reply; 31+ messages in thread
From: Wei, Gang @ 2008-12-05  6:22 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 655 bytes --]

Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt, exit deepC, tsc-restore], the system error is quite easy to be accumulated. Once the workloads between cpus are not balanced, the tsc skew between cpus will eventually become bigger & begger - more than 10 seconds can be observed.

Now, we just keep a initial stamp via cstate_init_stamp during the booting/s3 resuming, which is based on the platform stime. All cpus need only to do tsc-restore relative to the initial stamp after exit deepC. The base is fixed, and is the same for all cpus, so it can avoid accumulated tsc-skew.

Signed-off-by: Wei Gang <gang.wei@intel.com>

[-- Attachment #2: tsc-skew-20081205-2.patch --]
[-- Type: application/octet-stream, Size: 4105 bytes --]

CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus

Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt, exit deepC, tsc-restore], the system error is quite easy to be accumulated. Once the workloads between cpus are not balanced, the tsc skew between cpus will eventually become bigger & begger - more than 10 seconds can be observed.

Now, we just keep a initial stamp via cstate_init_stamp during the booting/s3 resuming, which is based on the platform stime. All cpus need only to do tsc-restore relative to the initial stamp after exit deepC. The base is fixed, and is the same for all cpus, so it can avoid accumulated tsc-skew.

Signed-off-by: Wei Gang <gang.wei@intel.com>

diff -r 1b173394f815 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c	Thu Dec 04 16:36:43 2008 +0000
+++ b/xen/arch/x86/acpi/cpu_idle.c	Fri Dec 05 11:36:23 2008 +0800
@@ -317,8 +317,6 @@ static void acpi_processor_idle(void)
          * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
          * deep C state can't work correctly.
          */
-        /* preparing TSC stop */
-        cstate_save_tsc();
         /* preparing APIC stop */
         lapic_timer_off();
 
diff -r 1b173394f815 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Thu Dec 04 16:36:43 2008 +0000
+++ b/xen/arch/x86/time.c	Fri Dec 05 11:36:23 2008 +0800
@@ -48,11 +48,9 @@ struct time_scale {
 
 struct cpu_time {
     u64 local_tsc_stamp;
-    u64 cstate_tsc_stamp;
     s_time_t stime_local_stamp;
     s_time_t stime_master_stamp;
     struct time_scale tsc_scale;
-    u64 cstate_plt_count_stamp;
 };
 
 struct platform_timesource {
@@ -531,6 +529,10 @@ static u64 plt_stamp;            /* hard
 static u64 plt_stamp;            /* hardware-width platform counter stamp   */
 static struct timer plt_overflow_timer;
 
+/* following 2 variables are for deep C state TSC restore usage */
+static u64 initial_tsc_stamp;    /* initial tsc stamp while plt starting */
+static s_time_t initial_stime_platform_stamp; /* initial stime stamp */
+
 static void plt_overflow(void *unused)
 {
     u64 count;
@@ -644,29 +646,25 @@ static void init_platform_timer(void)
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_save_tsc(void)
+static void cstate_init_stamp(void)
 {
-    struct cpu_time *t = &this_cpu(cpu_time);
+    if ( tsc_invariant )
+        return;
+
+    initial_stime_platform_stamp = read_platform_stime();
+    rdtscll(initial_tsc_stamp);
+}
+ 
+void cstate_restore_tsc(void)
+{
+    u64 tsc_delta;
 
     if ( tsc_invariant )
         return;
 
-    t->cstate_plt_count_stamp = plt_src.read_counter();
-    rdtscll(t->cstate_tsc_stamp);
-}
-
-void cstate_restore_tsc(void)
-{
-    struct cpu_time *t = &this_cpu(cpu_time);
-    u64 plt_count_delta, tsc_delta;
-
-    if ( tsc_invariant )
-        return;
-
-    plt_count_delta = (plt_src.read_counter() -
-                       t->cstate_plt_count_stamp) & plt_mask;
-    tsc_delta = scale_delta(plt_count_delta, &plt_scale) * cpu_khz/1000000UL;
-    wrmsrl(MSR_IA32_TSC, t->cstate_tsc_stamp + tsc_delta);
+    tsc_delta = (read_platform_stime() - initial_stime_platform_stamp)
+       * cpu_khz / 1000000UL;
+    wrmsrl(MSR_IA32_TSC, initial_tsc_stamp + tsc_delta);
 }
 
 /***************************************************************************
@@ -1123,6 +1121,8 @@ int __init init_xen_time(void)
     stime_platform_stamp = 0;
     init_platform_timer();
 
+    cstate_init_stamp();
+
     do_settime(get_cmos_time(), 0, NOW());
 
     return 0;
@@ -1243,6 +1243,8 @@ int time_resume(void)
     disable_pit_irq();
 
     init_percpu_time();
+
+    cstate_init_stamp();
 
     do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
 
diff -r 1b173394f815 xen/include/xen/time.h
--- a/xen/include/xen/time.h	Thu Dec 04 16:36:43 2008 +0000
+++ b/xen/include/xen/time.h	Fri Dec 05 11:36:23 2008 +0800
@@ -13,7 +13,6 @@
 #include <asm/time.h>
 
 extern int init_xen_time(void);
-extern void cstate_save_tsc(void);
 extern void cstate_restore_tsc(void);
 
 extern unsigned long cpu_khz;

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05  6:22 [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus Wei, Gang
@ 2008-12-05  8:53 ` Keir Fraser
  2008-12-05  9:21   ` Wei, Gang
                     ` (2 more replies)
  0 siblings, 3 replies; 31+ messages in thread
From: Keir Fraser @ 2008-12-05  8:53 UTC (permalink / raw)
  To: Wei, Gang, xen-devel

On 05/12/2008 06:22, "Wei, Gang" <gang.wei@intel.com> wrote:

> Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt,
> exit deepC, tsc-restore], the system error is quite easy to be accumulated.
> Once the workloads between cpus are not balanced, the tsc skew between cpus
> will eventually become bigger & begger - more than 10 seconds can be observed.
> 
> Now, we just keep a initial stamp via cstate_init_stamp during the booting/s3
> resuming, which is based on the platform stime. All cpus need only to do
> tsc-restore relative to the initial stamp after exit deepC. The base is fixed,
> and is the same for all cpus, so it can avoid accumulated tsc-skew.
> 
> Signed-off-by: Wei Gang <gang.wei@intel.com>

Synchronising to a start-of-day timestamp and extrapolating with cpu_khz is
not a good idea because of the inherent accumulating inaccuracy in cpu_khz
(it only has a fixed number of bits of precision). So, for example, if
certain CPUs have not deep-C-slept for a long while, they will wander from
your 'true' TSC values and then you will see TSC mismatches when the CPU
does eventually C-sleep, or compared with other CPUs when they do so.

More significantly, cpu_khz is no good as a fixed static estimate of CPU
clock speed when we start playing with P states.

I think your new code structure is correct. That is, work out wakeup TSC
value from read_platform_stime() rather than some saved TSC value. However,
you should extrapolate from that CPU's own t->stime_local_stamp,
t->tsc_scale, and t->local_tsc_stamp. It's probably a pretty simple change
to your new cstate_restore_tsc().

You see what I mean?

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05  8:53 ` Keir Fraser
@ 2008-12-05  9:21   ` Wei, Gang
  2008-12-05  9:59   ` Tian, Kevin
  2008-12-05 11:30   ` [PATCH] CPUIDLE: revise tsc-save/restore to avoid " Wei, Gang
  2 siblings, 0 replies; 31+ messages in thread
From: Wei, Gang @ 2008-12-05  9:21 UTC (permalink / raw)
  To: Keir Fraser, xen-devel

On Friday, December 05, 2008 4:54 PM, Keir Fraser wrote:
> On 05/12/2008 06:22, "Wei, Gang" <gang.wei@intel.com> wrote:
> 
>> Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt,
>> exit deepC, tsc-restore], the system error is quite easy to be accumulated.
>> Once the workloads between cpus are not balanced, the tsc skew between cpus
>> will eventually become bigger & begger - more than 10 seconds can be
>> observed. 
>> 
>> Now, we just keep a initial stamp via cstate_init_stamp during the booting/s3
>> resuming, which is based on the platform stime. All cpus need only to do
>> tsc-restore relative to the initial stamp after exit deepC. The base is
>> fixed, and is the same for all cpus, so it can avoid accumulated tsc-skew.
>> 
>> Signed-off-by: Wei Gang <gang.wei@intel.com>
> 
> Synchronising to a start-of-day timestamp and extrapolating with cpu_khz is
> not a good idea because of the inherent accumulating inaccuracy in cpu_khz
> (it only has a fixed number of bits of precision). So, for example, if
> certain CPUs have not deep-C-slept for a long while, they will wander from
> your 'true' TSC values and then you will see TSC mismatches when the CPU
> does eventually C-sleep, or compared with other CPUs when they do so.
> 
> More significantly, cpu_khz is no good as a fixed static estimate of CPU
> clock speed when we start playing with P states.
> 
> I think your new code structure is correct. That is, work out wakeup TSC
> value from read_platform_stime() rather than some saved TSC value. However,
> you should extrapolate from that CPU's own t->stime_local_stamp,
> t->tsc_scale, and t->local_tsc_stamp. It's probably a pretty simple change
> to your new cstate_restore_tsc().
> 
> You see what I mean?

I tried extrapolating from t->stime_local_stamp, cpu_khz, and t->local_tsc_stamp before I got into the current solution. It would still bring accumulating skew, but in a slower increasing speed. I would like to try it again with  t->tsc_scale instead of cpu_khz. If it is works, it would really be simpler. Allow me some time.

Jimmy

> 
>  -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05  8:53 ` Keir Fraser
  2008-12-05  9:21   ` Wei, Gang
@ 2008-12-05  9:59   ` Tian, Kevin
  2008-12-05 10:05     ` Tian, Kevin
  2008-12-05 11:30   ` [PATCH] CPUIDLE: revise tsc-save/restore to avoid " Wei, Gang
  2 siblings, 1 reply; 31+ messages in thread
From: Tian, Kevin @ 2008-12-05  9:59 UTC (permalink / raw)
  To: 'Keir Fraser', Wei, Gang, xen-devel

>From: Keir Fraser
>Sent: Friday, December 05, 2008 4:54 PM
>Synchronising to a start-of-day timestamp and extrapolating 
>with cpu_khz is
>not a good idea because of the inherent accumulating 
>inaccuracy in cpu_khz
>(it only has a fixed number of bits of precision). So, for example, if
>certain CPUs have not deep-C-slept for a long while, they will 
>wander from
>your 'true' TSC values and then you will see TSC mismatches 
>when the CPU
>does eventually C-sleep, or compared with other CPUs when they do so.
>
>More significantly, cpu_khz is no good as a fixed static 
>estimate of CPU
>clock speed when we start playing with P states.

This is not big issue since most processors today has constant
TSC immune from P-state change. Actually when freq change
does affect TSC count, I don't think time calibration may 
actually help if that scale happens at small interval and it's still 
high possibility for multiple vcpus of one domain to observe 
time weirdness, or one vcpu migrating...

>
>I think your new code structure is correct. That is, work out 
>wakeup TSC
>value from read_platform_stime() rather than some saved TSC 
>value. However,
>you should extrapolate from that CPU's own t->stime_local_stamp,
>t->tsc_scale, and t->local_tsc_stamp. It's probably a pretty 
>simple change
>to your new cstate_restore_tsc().

Based on t->local_tsc_stamp to recover TSC can still accumulate
errors, though in a far slower pace. Here the problem is software
calculation is always inaccurate compared to what TSC would count
if not stopped. We tried to calculate calculation error for each TSC
save/restore. The approach is to use C2 (TSC not stopped) and
then apply restore logic to compare to-be-written value to the real
TSC count by rdtsc. We can see that 2k-3k drift can be observed
when chipset link ASPM is enabled, and hundreds of drift when
ASPM is disabled. As idle percentage on every processor is different,
finally the effect is not only slower than real one (if not stopped), but 
also increasing drift among cpus. The first one normally just affects 
TOD faster or slower, but the latter one can generate severe guest 
problems like softlockup warning or DMA timeout when vcpu is 
migrated between cpus with big TSC/now skew. Recover TSC based
on local calibration stamp reduces error accumulation pace from
per-cstate-entry/exit to per-calibration (ms->s), but after a long
run multiple cpus can still observe big skews.

To avoid accumulating errors, the best way is to align to an 
absolute platform timer without counting last stamp. But one 
drawback as you said is to have intermittent skew when one 
cpu doesn't enter idle for a long time. But this is also true for 
above offset counting approach.

Then if we agree always aligning TSC to absolute platform timer
counter, it doesn't make difference to use cpu_khz or local tsc_scale
since both are using scale factor calculated within a small period
to represent the underlying crystal frequency.

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05  9:59   ` Tian, Kevin
@ 2008-12-05 10:05     ` Tian, Kevin
  2008-12-05 10:13       ` Keir Fraser
  0 siblings, 1 reply; 31+ messages in thread
From: Tian, Kevin @ 2008-12-05 10:05 UTC (permalink / raw)
  To: Tian, Kevin, 'Keir Fraser', Wei, Gang, xen-devel

>From: Tian, Kevin
>Sent: Friday, December 05, 2008 6:00 PM
>
>Then if we agree always aligning TSC to absolute platform timer
>counter, it doesn't make difference to use cpu_khz or local tsc_scale
>since both are using scale factor calculated within a small period
>to represent the underlying crystal frequency.
>

Let me hold back above words. As you said, cpu_khz has lower accuracy
by cutting down lowest bits.

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05 10:05     ` Tian, Kevin
@ 2008-12-05 10:13       ` Keir Fraser
  2008-12-05 11:50         ` Tian, Kevin
  0 siblings, 1 reply; 31+ messages in thread
From: Keir Fraser @ 2008-12-05 10:13 UTC (permalink / raw)
  To: Tian, Kevin, Wei, Gang, xen-devel

On 05/12/2008 10:05, "Tian, Kevin" <kevin.tian@intel.com> wrote:

>> From: Tian, Kevin
>> Sent: Friday, December 05, 2008 6:00 PM
>> 
>> Then if we agree always aligning TSC to absolute platform timer
>> counter, it doesn't make difference to use cpu_khz or local tsc_scale
>> since both are using scale factor calculated within a small period
>> to represent the underlying crystal frequency.
>> 
> 
> Let me hold back above words. As you said, cpu_khz has lower accuracy
> by cutting down lowest bits.

Yes. Also bear in mind that absolute ongoing synchronisation between TSCs
*does not matter*. Xen will happily synchronise system time on top of
(slowly enough, constantly enough) diverging TSCs, and of course HVM VCPUs
re-set their guest TSC offset when moving between host CPUs.

What *does* matter is the possibility of warping a host TSC value on wake
from deep sleep, compared with its value if the sleep had never happened. In
this case, system time will be wrong (since we haven't been through a
calibration step since waking up) and HVM timers will be wrong. And using
start-of-day timestamp plus cpu_khz makes this more likely. The correct
thing to do is obey the most recent set of local calibration values.

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05  8:53 ` Keir Fraser
  2008-12-05  9:21   ` Wei, Gang
  2008-12-05  9:59   ` Tian, Kevin
@ 2008-12-05 11:30   ` Wei, Gang
  2008-12-05 11:47     ` Keir Fraser
  2008-12-05 13:06     ` Keir Fraser
  2 siblings, 2 replies; 31+ messages in thread
From: Wei, Gang @ 2008-12-05 11:30 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

> I tried extrapolating from t->stime_local_stamp, cpu_khz, and
> t->local_tsc_stamp before I got into the current solution. It would still
> bring accumulating skew, but in a slower increasing speed. I would like to
> try it again with  t->tsc_scale instead of cpu_khz. If it is works, it would
> really be simpler. Allow me some time.    

Below patch should be what you expected. It will still bring continuously increasing tsc skew. If I pin all domains' vcpus on 1 pcpu, the skew is increasing faster. 

diff -r 1b173394f815 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c	Thu Dec 04 16:36:43 2008 +0000
+++ b/xen/arch/x86/acpi/cpu_idle.c	Fri Dec 05 19:06:06 2008 +0800
@@ -317,8 +317,6 @@ static void acpi_processor_idle(void)
          * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
          * deep C state can't work correctly.
          */
-        /* preparing TSC stop */
-        cstate_save_tsc();
         /* preparing APIC stop */
         lapic_timer_off();
 
diff -r 1b173394f815 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Thu Dec 04 16:36:43 2008 +0000
+++ b/xen/arch/x86/time.c	Fri Dec 05 19:06:06 2008 +0800
@@ -48,11 +48,9 @@ struct time_scale {
 
 struct cpu_time {
     u64 local_tsc_stamp;
-    u64 cstate_tsc_stamp;
     s_time_t stime_local_stamp;
     s_time_t stime_master_stamp;
     struct time_scale tsc_scale;
-    u64 cstate_plt_count_stamp;
 };
 
 struct platform_timesource {
@@ -149,6 +147,32 @@ static inline u64 scale_delta(u64 delta,
 #endif
 
     return product;
+}
+
+/*
+ * Scale a 32-bit time delta to delta by dividing a 32-bit fraction & scaling,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_time(u32 time_delta, struct time_scale *scale)
+{
+    u64 td64 = time_delta;
+    u64 quotient, remainder;
+
+    td64 <<= 32;
+
+    quotient = td64 / scale->mul_frac;
+    remainder = td64 % scale->mul_frac;
+
+    if ( scale->shift < 0 )
+    {
+        quotient <<= -scale->shift;
+        remainder <<= -scale->shift;
+        quotient += remainder / scale->mul_frac;
+    }
+    else
+        quotient >>= scale->shift;
+
+    return quotient;
 }
 
 /*
@@ -644,29 +668,19 @@ static void init_platform_timer(void)
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_save_tsc(void)
+void cstate_restore_tsc(void)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
+    u64 tsc_delta;
+    s_time_t stime_delta;
 
     if ( tsc_invariant )
         return;
 
-    t->cstate_plt_count_stamp = plt_src.read_counter();
-    rdtscll(t->cstate_tsc_stamp);
-}
-
-void cstate_restore_tsc(void)
-{
-    struct cpu_time *t = &this_cpu(cpu_time);
-    u64 plt_count_delta, tsc_delta;
-
-    if ( tsc_invariant )
-        return;
-
-    plt_count_delta = (plt_src.read_counter() -
-                       t->cstate_plt_count_stamp) & plt_mask;
-    tsc_delta = scale_delta(plt_count_delta, &plt_scale) * cpu_khz/1000000UL;
-    wrmsrl(MSR_IA32_TSC, t->cstate_tsc_stamp + tsc_delta);
+    stime_delta = read_platform_stime() - t->stime_local_stamp;
+    ASSERT(stime_delta < 0x100000000UL);
+    tsc_delta = scale_time((u32)stime_delta, &t->tsc_scale);
+    wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta);
 }
 
 /***************************************************************************
diff -r 1b173394f815 xen/include/xen/time.h
--- a/xen/include/xen/time.h	Thu Dec 04 16:36:43 2008 +0000
+++ b/xen/include/xen/time.h	Fri Dec 05 19:06:06 2008 +0800
@@ -13,7 +13,6 @@
 #include <asm/time.h>
 
 extern int init_xen_time(void);
-extern void cstate_save_tsc(void);
 extern void cstate_restore_tsc(void);
 
 extern unsigned long cpu_khz;

Jimmy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05 11:30   ` [PATCH] CPUIDLE: revise tsc-save/restore to avoid " Wei, Gang
@ 2008-12-05 11:47     ` Keir Fraser
  2008-12-05 11:53       ` Tian, Kevin
  2008-12-05 13:06     ` Keir Fraser
  1 sibling, 1 reply; 31+ messages in thread
From: Keir Fraser @ 2008-12-05 11:47 UTC (permalink / raw)
  To: Wei, Gang; +Cc: xen-devel

On 05/12/2008 11:30, "Wei, Gang" <gang.wei@intel.com> wrote:

>> I tried extrapolating from t->stime_local_stamp, cpu_khz, and
>> t->local_tsc_stamp before I got into the current solution. It would still
>> bring accumulating skew, but in a slower increasing speed. I would like to
>> try it again with  t->tsc_scale instead of cpu_khz. If it is works, it would
>> really be simpler. Allow me some time.
> 
> Below patch should be what you expected. It will still bring continuously
> increasing tsc skew. If I pin all domains' vcpus on 1 pcpu, the skew is
> increasing faster.

It looks about right to me, and better than using cpu_khz as in the current
code and the original patch. How much skew does it introduce? Is the skew a
problem? Bearing in mind that from the point of view of HVM guests, TSC
rates will be all over the place anyway if we are using P states (and the
host TSC is not invariant, which is actually the only time your code is
enabled anyway).

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05 10:13       ` Keir Fraser
@ 2008-12-05 11:50         ` Tian, Kevin
  2008-12-05 12:36           ` Keir Fraser
  0 siblings, 1 reply; 31+ messages in thread
From: Tian, Kevin @ 2008-12-05 11:50 UTC (permalink / raw)
  To: 'Keir Fraser', Wei, Gang, xen-devel

>From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] 
>Sent: Friday, December 05, 2008 6:13 PM
>On 05/12/2008 10:05, "Tian, Kevin" <kevin.tian@intel.com> wrote:
>
>>> From: Tian, Kevin
>>> Sent: Friday, December 05, 2008 6:00 PM
>>> 
>>> Then if we agree always aligning TSC to absolute platform timer
>>> counter, it doesn't make difference to use cpu_khz or local 
>tsc_scale
>>> since both are using scale factor calculated within a small period
>>> to represent the underlying crystal frequency.
>>> 
>> 
>> Let me hold back above words. As you said, cpu_khz has lower accuracy
>> by cutting down lowest bits.
>
>Yes. Also bear in mind that absolute ongoing synchronisation 
>between TSCs
>*does not matter*. Xen will happily synchronise system time on top of
>(slowly enough, constantly enough) diverging TSCs, and of 
>course HVM VCPUs
>re-set their guest TSC offset when moving between host CPUs.

We had measurement on following cases: (4 idle up-hvm-rhel5 with 2 cores)

a) disable deep C-state
b) enable deep C-state, with original tsc save/restore at each C-state entry/exit
c) enable deep C-state, and restore TSC based on local calibration stamp
    and tsc scale
d) enable deep C-state, and restore TSC based on monotonic platform stime
    and cpu_khz

	system time skew	TSC skew
a)	hundred ns		several us
b)	accumulating larger	accumulating larger
c) 	dozens of us		accumulating larger
d)	hundred ns		several us

Large system time skew can impact both pv and hvm domain. pv
domain will complain time went backward when migrating to a cpu 
with slower NOW(). hvm domain will have delayed vpt expiration
when migrating to slower one, or vice versa missed ticks are accounted
by xen for some timer mode. Both c) and d) ensures skew within a stable
range. 

Large TSC skew is normally OK with pv domain, since xen time
stamps are synced at gettimeofday and timer interrupt within pv
guest. Possibly impacted is some user processes which uses 
rdtsc directly. However larget TSC skew is really bad for hvm
guest, especially when guest TSC offset is never adjusted at
vcpu migration. That will cause guest itself to catch up missing
ticks in a batch, which results softlockup warning or DMA time
out. Thus with c) we can still observe guest complains after running
a enough long time.

I'm not sure whether guest TSC offset can be adjusted accurately,
since you need first get TSC skew among cores which may require 
issuing IPI and adds extra overhead. It just gets really messed to
handle an accumulating TSC skew for hvm guest.

That's why we go with option d) which really exposes same level
of constraints compared to disabled case. This is not perfect
solution, but it shows more stable result than others.

>
>What *does* matter is the possibility of warping a host TSC 
>value on wake
>from deep sleep, compared with its value if the sleep had 
>never happened. In
>this case, system time will be wrong (since we haven't been through a
>calibration step since waking up) and HVM timers will be 
>wrong. And using
>start-of-day timestamp plus cpu_khz makes this more likely. The correct
>thing to do is obey the most recent set of local calibration values.
>

I assume you meant S3 for "deep sleep"? If yes, I don't think it
an issue. A sane dom0 S3 flow will only happen after other domains
has been notified with virtual S3 event, and thus after waken up
from dom0 S3, every domain will resume its timekeeping sub-system.

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05 11:47     ` Keir Fraser
@ 2008-12-05 11:53       ` Tian, Kevin
  0 siblings, 0 replies; 31+ messages in thread
From: Tian, Kevin @ 2008-12-05 11:53 UTC (permalink / raw)
  To: 'Keir Fraser', Wei, Gang; +Cc: xen-devel

>From: Keir Fraser
>Sent: Friday, December 05, 2008 7:48 PM
>
>It looks about right to me, and better than using cpu_khz as 
>in the current
>code and the original patch. How much skew does it introduce? 
>Is the skew a
>problem? Bearing in mind that from the point of view of HVM guests, TSC
>rates will be all over the place anyway if we are using P 
>states (and the
>host TSC is not invariant, which is actually the only time your code is
>enabled anyway).
>

'invariant' here means a new feature that tsc is always running
even when clock is stopped at deep C-state entrance. For example,
latest Intel Core i7 has that feature supported. For P-state, it's 
another term called "constant tsc" being used which has nothing
to do with C-state. Also as I wrote earlier, constant tsc is supported
in many processors nowadays.

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05 11:50         ` Tian, Kevin
@ 2008-12-05 12:36           ` Keir Fraser
  2008-12-12  3:36             ` Wei, Gang
  0 siblings, 1 reply; 31+ messages in thread
From: Keir Fraser @ 2008-12-05 12:36 UTC (permalink / raw)
  To: Tian, Kevin, Wei, Gang, xen-devel

On 05/12/2008 11:50, "Tian, Kevin" <kevin.tian@intel.com> wrote:

> That's why we go with option d) which really exposes same level
> of constraints compared to disabled case. This is not perfect
> solution, but it shows more stable result than others.

It does depend on constant_tsc though. That's fine, but I think then it
should be implemented as an alternative method selected only when you detect
constant_tsc at runtime. The fallback should be something like Gang Wei
posted most recently.

Also don't use cpu_khz as it's way less accurate than you need to put up
with. Create a 'struct time_scale' that will convert from nanoseconds to TSC
ticks. I think a reciprocal function for time_scale's would be useful here
(and in Gang Wei's last patch as well) as then you can take the existing
initial TSC->ns scale and flip it.

The other concern I have is that, even with a much more accurate scale
factor, CPUs which do not enter deep-C (because they are running busy loops
for example) will *slowly* wander from the time line determined by your
scale factor (due to unavoidable imprecision in the scale factor). This
could build up to be noticeable over days. It might be an idea therefore to
occasionally force a TSC resync on any CPU that hasn't had it resynced for
other reasons, just to zap any accumulating imprecision that has crept in.
It'd only need to be done maybe once a minute or maybe even much less than
that.

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05 11:30   ` [PATCH] CPUIDLE: revise tsc-save/restore to avoid " Wei, Gang
  2008-12-05 11:47     ` Keir Fraser
@ 2008-12-05 13:06     ` Keir Fraser
  2008-12-05 14:47       ` Keir Fraser
  1 sibling, 1 reply; 31+ messages in thread
From: Keir Fraser @ 2008-12-05 13:06 UTC (permalink / raw)
  To: Wei, Gang; +Cc: Tian, Kevin, xen-devel

On 05/12/2008 11:30, "Wei, Gang" <gang.wei@intel.com> wrote:

>> I tried extrapolating from t->stime_local_stamp, cpu_khz, and
>> t->local_tsc_stamp before I got into the current solution. It would still
>> bring accumulating skew, but in a slower increasing speed. I would like to
>> try it again with  t->tsc_scale instead of cpu_khz. If it is works, it would
>> really be simpler. Allow me some time.
> 
> Below patch should be what you expected. It will still bring continuously
> increasing tsc skew. If I pin all domains' vcpus on 1 pcpu, the skew is
> increasing faster.

I applied a modified version of this as c/s 18873. Primarily I removed
scale_time() and replaced with a simpler reciprocal function which is
applied to scale factors to invert them. Please take a look.

As I replied to Kevin, you can still have a scheme based on exploiting
constant_tsc if you like, but it needs to be enabled only when you really
have constant_tsc, and also subject to other comments I made in that email.

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05 13:06     ` Keir Fraser
@ 2008-12-05 14:47       ` Keir Fraser
  2008-12-06  7:36         ` Wei, Gang
  0 siblings, 1 reply; 31+ messages in thread
From: Keir Fraser @ 2008-12-05 14:47 UTC (permalink / raw)
  To: Wei, Gang; +Cc: Tian, Kevin, xen-devel

On 05/12/2008 13:06, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:

>> Below patch should be what you expected. It will still bring continuously
>> increasing tsc skew. If I pin all domains' vcpus on 1 pcpu, the skew is
>> increasing faster.
> 
> I applied a modified version of this as c/s 18873. Primarily I removed
> scale_time() and replaced with a simpler reciprocal function which is applied
> to scale factors to invert them. Please take a look.

I should have tested the reciprocal function before checkin. A fixed (and
now rather less simple) version is in c/s 18875.

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05 14:47       ` Keir Fraser
@ 2008-12-06  7:36         ` Wei, Gang
  0 siblings, 0 replies; 31+ messages in thread
From: Wei, Gang @ 2008-12-06  7:36 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Tian, Kevin, xen-devel

On Friday, December 05, 2008 10:48 PM, Keir Fraser wrote:
> On 05/12/2008 13:06, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:
> 
>>> Below patch should be what you expected. It will still bring continuously
>>> increasing tsc skew. If I pin all domains' vcpus on 1 pcpu, the skew is
>>> increasing faster.
>> 
>> I applied a modified version of this as c/s 18873. Primarily I removed
>> scale_time() and replaced with a simpler reciprocal function which is applied
>> to scale factors to invert them. Please take a look.
> 
> I should have tested the reciprocal function before checkin. A fixed (and
> now rather less simple) version is in c/s 18875.

Your fix is absolutely right. Now we at least make the tsc skew increasing in a far slower speed. Thanks, and I will consider how to make my original patch work for constant_tsc case only.

Jimmy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-05 12:36           ` Keir Fraser
@ 2008-12-12  3:36             ` Wei, Gang
  2008-12-12  9:31               ` Keir Fraser
  0 siblings, 1 reply; 31+ messages in thread
From: Wei, Gang @ 2008-12-12  3:36 UTC (permalink / raw)
  To: Keir Fraser, Tian, Kevin, xen-devel

On Friday, December 05, 2008 8:36 PM, Keir Fraser wrote:
> It does depend on constant_tsc though. That's fine, but I think then it
> should be implemented as an alternative method selected only when you detect
> constant_tsc at runtime. The fallback should be something like Gang Wei
> posted most recently.
> 
> Also don't use cpu_khz as it's way less accurate than you need to put up
> with. Create a 'struct time_scale' that will convert from nanoseconds to TSC
> ticks. I think a reciprocal function for time_scale's would be useful here
> (and in Gang Wei's last patch as well) as then you can take the existing
> initial TSC->ns scale and flip it.
> 
> The other concern I have is that, even with a much more accurate scale
> factor, CPUs which do not enter deep-C (because they are running busy loops
> for example) will *slowly* wander from the time line determined by your
> scale factor (due to unavoidable imprecision in the scale factor). This
> could build up to be noticeable over days. It might be an idea therefore to
> occasionally force a TSC resync on any CPU that hasn't had it resynced for
> other reasons, just to zap any accumulating imprecision that has crept in.
> It'd only need to be done maybe once a minute or maybe even much less than
> that.

I am updating the original patch for constant_tsc case. There are some questions.

The first question is, can we just replace the per-second tsc_scale calibration with constant tsc_scale & per-second tsc restoring? For constant_tsc, it seems meaningless to do the tsc_scale calibration works. And to make the tsc_skew as small as possible, it is better to do the tsc resync per-second other than once a minute or even less.

The second question is, how can we make a more accruate tsc_scale and use it for all local NOW() calculation & tsc restoring? Current initial tsc->ns scale is based on PIT, but it is not certain to be the platform timer source. Is it practical to make tsc_scale calibration working for seconds and take the calibrated tsc_scale as the globle and fixed one?

Jimmy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-12  3:36             ` Wei, Gang
@ 2008-12-12  9:31               ` Keir Fraser
  2008-12-13  4:01                 ` Wei, Gang
  0 siblings, 1 reply; 31+ messages in thread
From: Keir Fraser @ 2008-12-12  9:31 UTC (permalink / raw)
  To: Wei, Gang, Tian, Kevin, xen-devel

On 12/12/2008 03:36, "Wei, Gang" <gang.wei@intel.com> wrote:

> I am updating the original patch for constant_tsc case. There are some
> questions.
> 
> The first question is, can we just replace the per-second tsc_scale
> calibration with constant tsc_scale & per-second tsc restoring? For
> constant_tsc, it seems meaningless to do the tsc_scale calibration works. And
> to make the tsc_skew as small as possible, it is better to do the tsc resync
> per-second other than once a minute or even less.

If you mean have the same tsc_scale in all per_cpu(cpu_time).tsc_scale, then
yes. And you can do your once-a-second work in time_calibration() -- you can
put an if-else in there.

I thought our tsc_skew was looking quite good now with the updated
scale_reciprocal(), by the way. How much more drift is there for you to
wring out?

> The second question is, how can we make a more accruate tsc_scale and use it
> for all local NOW() calculation & tsc restoring? Current initial tsc->ns scale
> is based on PIT, but it is not certain to be the platform timer source. Is it
> practical to make tsc_scale calibration working for seconds and take the
> calibrated tsc_scale as the globle and fixed one?

You don't want a platform source, right? So what does it matter? The more
important question is: how much inaccuracy is built in to the existing
tsc->ns scale factor compared with real wallclock progress of time? And how
easily can that be improved? The fact that PIT may wander relative to HPET,
for example.... Who cares?

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-12  9:31               ` Keir Fraser
@ 2008-12-13  4:01                 ` Wei, Gang
  2008-12-13  9:51                   ` Keir Fraser
  0 siblings, 1 reply; 31+ messages in thread
From: Wei, Gang @ 2008-12-13  4:01 UTC (permalink / raw)
  To: Keir Fraser, Tian, Kevin, xen-devel

On Friday, December 12, 2008 5:32 PM, Keir Fraser wrote:
> On 12/12/2008 03:36, "Wei, Gang" <gang.wei@intel.com> wrote:
> 
>> I am updating the original patch for constant_tsc case. There are some
>> questions. 
>> 
>> The first question is, can we just replace the per-second tsc_scale
>> calibration with constant tsc_scale & per-second tsc restoring? For
>> constant_tsc, it seems meaningless to do the tsc_scale calibration works. And
>> to make the tsc_skew as small as possible, it is better to do the tsc resync
>> per-second other than once a minute or even less.
> 
> If you mean have the same tsc_scale in all per_cpu(cpu_time).tsc_scale, then
> yes. And you can do your once-a-second work in time_calibration() -- you can
> put an if-else in there.

Yes, I will do it this way.

> 
> I thought our tsc_skew was looking quite good now with the updated
> scale_reciprocal(), by the way. How much more drift is there for you to
> wring out?

My tsc_skew testing method is:
1. specify "cpuidle hpetbroadcast cpufreq=xen" in grub xen line.
2. start 4 UP rhel5u1 hvm guest
3. 'xm vcpu-pin ...' to pin all vcpus of dom0 & all guests to cpu1(total 2 pcpus in my box)
4. run 'while true; do find . /; done' in several guests
5. trigger keyhandler of 't' from time to time to watch the time skew & cycle skew.

I observed the cycle skew easily exceeded ten seconds in hours.

> 
>> The second question is, how can we make a more accruate tsc_scale and use it
>> for all local NOW() calculation & tsc restoring? Current initial tsc->ns
>> scale is based on PIT, but it is not certain to be the platform timer
>> source. Is it practical to make tsc_scale calibration working for seconds
>> and take the calibrated tsc_scale as the globle and fixed one?
> 
> You don't want a platform source, right? So what does it matter? The more
> important question is: how much inaccuracy is built in to the existing
> tsc->ns scale factor compared with real wallclock progress of time? And how
> easily can that be improved? The fact that PIT may wander relative to HPET,
> for example.... Who cares?

In my box, the existing initial tsc->ns scale:
    .mul_frac=ca1761ff, .shift=-1 (2533507879 ticks/sec)
and the stable calibrated  tsc->ns scale in nopm case:
    .mul_frac=ca191f43, .shift=-1(2533422531 ticks/sec)
Don't you think the inaccuracy is a little big (85348 ticks/sec)? But It is not really easy to improve it. I will keep using the existing scale for this moment. We can keep looking whether we can find a better way to improve it.

Jimmy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-13  4:01                 ` Wei, Gang
@ 2008-12-13  9:51                   ` Keir Fraser
  2008-12-13 15:27                     ` Wei, Gang
  0 siblings, 1 reply; 31+ messages in thread
From: Keir Fraser @ 2008-12-13  9:51 UTC (permalink / raw)
  To: Wei, Gang, Tian, Kevin, xen-devel

On 13/12/2008 04:01, "Wei, Gang" <gang.wei@intel.com> wrote:

>> You don't want a platform source, right? So what does it matter? The more
>> important question is: how much inaccuracy is built in to the existing
>> tsc->ns scale factor compared with real wallclock progress of time? And how
>> easily can that be improved? The fact that PIT may wander relative to HPET,
>> for example.... Who cares?
> 
> In my box, the existing initial tsc->ns scale:
>     .mul_frac=ca1761ff, .shift=-1 (2533507879 ticks/sec)
> and the stable calibrated  tsc->ns scale in nopm case:
>     .mul_frac=ca191f43, .shift=-1(2533422531 ticks/sec)
> Don't you think the inaccuracy is a little big (85348 ticks/sec)? But It is
> not really easy to improve it. I will keep using the existing scale for this
> moment. We can keep looking whether we can find a better way to improve it.

The difference is 33 parts per million. The actual frequency of the timer
crystal will likely deviate from its stated frequency by more than that. I
don't think there's anything you can do here (beyond allowing tsc_scale to
be adjusted periodically by ntp algorithms in dom0).

By the way, c/s 18102 (subsequently reverted) may be interesting for you in
implementing the TSC-and-no-platform-timer mode. I'm not sure how much of it
will really be applicable, but it might be worth a look at least.

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-13  9:51                   ` Keir Fraser
@ 2008-12-13 15:27                     ` Wei, Gang
  2008-12-13 15:41                       ` Keir Fraser
  0 siblings, 1 reply; 31+ messages in thread
From: Wei, Gang @ 2008-12-13 15:27 UTC (permalink / raw)
  To: Keir Fraser, Tian, Kevin, xen-devel

On Saturday, December 13, 2008 5:51 PM, Keir Fraser wrote:
> By the way, c/s 18102 (subsequently reverted) may be interesting for you in
> implementing the TSC-and-no-platform-timer mode. I'm not sure how much of it
> will really be applicable, but it might be worth a look at least.

c/s 18102 looks like a incompleted patch which want to reintroduce TSC as the plt only when the tsc is always running & at a constant rate. I guess you still want it because TSC access has less cost, right? Anyway, it indeed has less to do with my current goal: handling the deepC-stop tsc which runs at constant rate.

Jimmy

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-13 15:27                     ` Wei, Gang
@ 2008-12-13 15:41                       ` Keir Fraser
  2008-12-15  3:06                         ` Wei, Gang
  0 siblings, 1 reply; 31+ messages in thread
From: Keir Fraser @ 2008-12-13 15:41 UTC (permalink / raw)
  To: Wei, Gang, Tian, Kevin, xen-devel




On 13/12/2008 15:27, "Wei, Gang" <gang.wei@intel.com> wrote:

> On Saturday, December 13, 2008 5:51 PM, Keir Fraser wrote:
>> By the way, c/s 18102 (subsequently reverted) may be interesting for you in
>> implementing the TSC-and-no-platform-timer mode. I'm not sure how much of it
>> will really be applicable, but it might be worth a look at least.
> 
> c/s 18102 looks like a incompleted patch which want to reintroduce TSC as the
> plt only when the tsc is always running & at a constant rate. I guess you
> still want it because TSC access has less cost, right? Anyway, it indeed has
> less to do with my current goal: handling the deepC-stop tsc which runs at
> constant rate.

You don;t need to take anything from that c/s if it's not useful.

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-13 15:41                       ` Keir Fraser
@ 2008-12-15  3:06                         ` Wei, Gang
  2008-12-15  9:40                           ` Wei, Gang
  0 siblings, 1 reply; 31+ messages in thread
From: Wei, Gang @ 2008-12-15  3:06 UTC (permalink / raw)
  To: Keir Fraser, xen-devel; +Cc: Tian, Kevin

[-- Attachment #1: Type: text/plain, Size: 5827 bytes --]

Here is the updated patch for constant-tsc case. -Jimmy

CPUIDLE: revise tsc-restore to avoid increasing tsc skew between cpus

Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt, exit deepC, tsc-restore], the system error is quite easy to be accumulated. Once the workloads between cpus are not balanced, the tsc skew between cpus will eventually become bigger & begger - more than 10 seconds can be observed.

Then we remove the tsc-save step, and just based on percpu t->stime_master_stamp, t->tsc_scale, & t->local_tsc_stamp to do the tsc-restore after exit from deepC. It make the accumulating slower, but can't remove it.

Now, for constant-tsc case, we just keep a initial stamp via cstate_init_stamp during the booting/s3 resuming, which is based on the platform stime. All cpus need only to do tsc-restore relative to the initial stamp after exit deepC. The base  and tsc->ns scale are fixed and same for all cpus, so it can avoid accumulated tsc-skew. BTW, bypass the percpu tsc scale calibration for constant-tsc case.

Signed-off-by: Wei Gang <gang.wei@intel.com>

diff -r 045f70d1acdb xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/time.c	Mon Dec 15 10:35:11 2008 +0800
@@ -69,8 +69,11 @@ static DEFINE_PER_CPU(struct cpu_time, c
 #define EPOCH MILLISECS(1000)
 static struct timer calibration_timer;
 
-/* TSC is invariant on C state entry? */
-static bool_t tsc_invariant;
+/* TSC will not stop during deep C state? */
+static bool_t tsc_nostop;
+/* TSC will be constant rate, independent with P/T state? */
+static int constant_tsc = 0;
+boolean_param("const_tsc", constant_tsc);
 
 /*
  * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -551,6 +554,10 @@ static u64 plt_stamp;            /* hard
 static u64 plt_stamp;            /* hardware-width platform counter stamp   */
 static struct timer plt_overflow_timer;
 
+/* following 2 variables are for deep C state TSC restore usage */
+static u64 initial_tsc_stamp;    /* initial tsc stamp while plt starting */
+static s_time_t initial_stime_platform_stamp; /* initial stime stamp */
+
 static void plt_overflow(void *unused)
 {
     u64 count;
@@ -664,25 +671,41 @@ static void init_platform_timer(void)
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_restore_tsc(void)
+static void cstate_init_stamp(void)
+{
+    if ( tsc_nostop || !constant_tsc )
+        return;
+
+    initial_stime_platform_stamp = read_platform_stime();
+    rdtscll(initial_tsc_stamp);
+}
+
+static inline void __restore_tsc(s_time_t plt_stime)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
     struct time_scale sys_to_tsc = scale_reciprocal(t->tsc_scale);
     s_time_t stime_delta;
     u64 tsc_delta;
 
-    if ( tsc_invariant )
+    if ( tsc_nostop )
         return;
 
-    stime_delta = read_platform_stime() - t->stime_master_stamp;
+    stime_delta = plt_stime - 
+        (constant_tsc ? initial_stime_platform_stamp : t->stime_master_stamp);
+
     if ( stime_delta < 0 )
         stime_delta = 0;
 
     tsc_delta = scale_delta(stime_delta, &sys_to_tsc);
 
-    wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta);
+    wrmsrl(MSR_IA32_TSC, 
+        (constant_tsc ? initial_tsc_stamp : t->local_tsc_stamp) + tsc_delta);
 }
 
+void cstate_restore_tsc(void)
+{
+    __restore_tsc(read_platform_stime());
+}
 /***************************************************************************
  * CMOS Timer functions
  ***************************************************************************/
@@ -960,6 +983,18 @@ static void local_time_calibration(void)
            curr_master_stime - curr_local_stime);
 #endif
 
+    if ( constant_tsc )
+    {
+        local_irq_disable();
+        t->local_tsc_stamp    = curr_tsc;
+        t->stime_local_stamp  = curr_master_stime;
+        t->stime_master_stamp = curr_master_stime;
+        local_irq_enable();
+
+        update_vcpu_system_time(current);
+        goto out;
+    }
+
     /* Local time warps forward if it lags behind master time. */
     if ( curr_local_stime < curr_master_stime )
         curr_local_stime = curr_master_stime;
@@ -1082,6 +1117,8 @@ static void time_calibration_rendezvous(
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
+    __restore_tsc(r->master_stime);
+
     rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
@@ -1125,9 +1162,23 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
-    /* Is TSC invariant during deep C state? */
+    /* for recent intel x86 model, the tsc increments at a constant rate */
+    if ( (current_cpu_data.x86 == 0xf && current_cpu_data.x86_model >= 0x03) ||
+         (current_cpu_data.x86 == 0x6 && current_cpu_data.x86_model >= 0x0e) )
+    {
+        int cpu;
+
+        constant_tsc = 1;
+
+        for_each_cpu(cpu)
+        {
+            per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+        }
+    }
+
+    /* Is TSC not stop during deep C state ? */
     if ( cpuid_edx(0x80000007) & (1u<<8) )
-        tsc_invariant = 1;
+        tsc_nostop = 1;
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
@@ -1139,6 +1190,8 @@ int __init init_xen_time(void)
 
     stime_platform_stamp = NOW();
     init_platform_timer();
+
+    cstate_init_stamp();
 
     init_percpu_time();
 
@@ -1260,6 +1313,8 @@ int time_resume(void)
     disable_pit_irq();
 
     init_percpu_time();
+
+    cstate_init_stamp();
 
     do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
 

[-- Attachment #2: tsc-skew-20081213-1.patch --]
[-- Type: application/octet-stream, Size: 5768 bytes --]

CPUIDLE: revise tsc-restore to avoid increasing tsc skew between cpus

Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt, exit deepC, tsc-restore], the system error is quite easy to be accumulated. Once the workloads between cpus are not balanced, the tsc skew between cpus will eventually become bigger & begger - more than 10 seconds can be observed.

Then we remove the tsc-save step, and just based on percpu t->stime_master_stamp, t->tsc_scale, & t->local_tsc_stamp to do the tsc-restore after exit from deepC. It make the accumulating slower, but can't remove it.

Now, for constant-tsc case, we just keep a initial stamp via cstate_init_stamp during the booting/s3 resuming, which is based on the platform stime. All cpus need only to do tsc-restore relative to the initial stamp after exit deepC. The base  and tsc->ns scale are fixed and same for all cpus, so it can avoid accumulated tsc-skew. BTW, bypass the percpu tsc scale calibration for constant-tsc case.

Signed-off-by: Wei Gang <gang.wei@intel.com>

diff -r 045f70d1acdb xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/time.c	Mon Dec 15 10:35:11 2008 +0800
@@ -69,8 +69,11 @@ static DEFINE_PER_CPU(struct cpu_time, c
 #define EPOCH MILLISECS(1000)
 static struct timer calibration_timer;
 
-/* TSC is invariant on C state entry? */
-static bool_t tsc_invariant;
+/* TSC will not stop during deep C state? */
+static bool_t tsc_nostop;
+/* TSC will be constant rate, independent with P/T state? */
+static int constant_tsc = 0;
+boolean_param("const_tsc", constant_tsc);
 
 /*
  * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -551,6 +554,10 @@ static u64 plt_stamp;            /* hard
 static u64 plt_stamp;            /* hardware-width platform counter stamp   */
 static struct timer plt_overflow_timer;
 
+/* following 2 variables are for deep C state TSC restore usage */
+static u64 initial_tsc_stamp;    /* initial tsc stamp while plt starting */
+static s_time_t initial_stime_platform_stamp; /* initial stime stamp */
+
 static void plt_overflow(void *unused)
 {
     u64 count;
@@ -664,25 +671,41 @@ static void init_platform_timer(void)
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_restore_tsc(void)
+static void cstate_init_stamp(void)
+{
+    if ( tsc_nostop || !constant_tsc )
+        return;
+
+    initial_stime_platform_stamp = read_platform_stime();
+    rdtscll(initial_tsc_stamp);
+}
+
+static inline void __restore_tsc(s_time_t plt_stime)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
     struct time_scale sys_to_tsc = scale_reciprocal(t->tsc_scale);
     s_time_t stime_delta;
     u64 tsc_delta;
 
-    if ( tsc_invariant )
+    if ( tsc_nostop )
         return;
 
-    stime_delta = read_platform_stime() - t->stime_master_stamp;
+    stime_delta = plt_stime - 
+        (constant_tsc ? initial_stime_platform_stamp : t->stime_master_stamp);
+
     if ( stime_delta < 0 )
         stime_delta = 0;
 
     tsc_delta = scale_delta(stime_delta, &sys_to_tsc);
 
-    wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta);
+    wrmsrl(MSR_IA32_TSC, 
+        (constant_tsc ? initial_tsc_stamp : t->local_tsc_stamp) + tsc_delta);
 }
 
+void cstate_restore_tsc(void)
+{
+    __restore_tsc(read_platform_stime());
+}
 /***************************************************************************
  * CMOS Timer functions
  ***************************************************************************/
@@ -960,6 +983,18 @@ static void local_time_calibration(void)
            curr_master_stime - curr_local_stime);
 #endif
 
+    if ( constant_tsc )
+    {
+        local_irq_disable();
+        t->local_tsc_stamp    = curr_tsc;
+        t->stime_local_stamp  = curr_master_stime;
+        t->stime_master_stamp = curr_master_stime;
+        local_irq_enable();
+
+        update_vcpu_system_time(current);
+        goto out;
+    }
+
     /* Local time warps forward if it lags behind master time. */
     if ( curr_local_stime < curr_master_stime )
         curr_local_stime = curr_master_stime;
@@ -1082,6 +1117,8 @@ static void time_calibration_rendezvous(
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
+    __restore_tsc(r->master_stime);
+
     rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
@@ -1125,9 +1162,23 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
-    /* Is TSC invariant during deep C state? */
+    /* for recent intel x86 model, the tsc increments at a constant rate */
+    if ( (current_cpu_data.x86 == 0xf && current_cpu_data.x86_model >= 0x03) ||
+         (current_cpu_data.x86 == 0x6 && current_cpu_data.x86_model >= 0x0e) )
+    {
+        int cpu;
+
+        constant_tsc = 1;
+
+        for_each_cpu(cpu)
+        {
+            per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+        }
+    }
+
+    /* Is TSC not stop during deep C state ? */
     if ( cpuid_edx(0x80000007) & (1u<<8) )
-        tsc_invariant = 1;
+        tsc_nostop = 1;
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
@@ -1139,6 +1190,8 @@ int __init init_xen_time(void)
 
     stime_platform_stamp = NOW();
     init_platform_timer();
+
+    cstate_init_stamp();
 
     init_percpu_time();
 
@@ -1260,6 +1313,8 @@ int time_resume(void)
     disable_pit_irq();
 
     init_percpu_time();
+
+    cstate_init_stamp();
 
     do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-15  3:06                         ` Wei, Gang
@ 2008-12-15  9:40                           ` Wei, Gang
  2008-12-15 12:03                             ` Keir Fraser
  0 siblings, 1 reply; 31+ messages in thread
From: Wei, Gang @ 2008-12-15  9:40 UTC (permalink / raw)
  To: Keir Fraser, xen-devel; +Cc: Tian, Kevin

[-- Attachment #1: Type: text/plain, Size: 193 bytes --]

Redo the constant_tsc & tsc_nostop check part and post it again.

Jimmy

On Monday, December 15, 2008 11:07 AM, Wei, Gang wrote:
> Here is the updated patch for constant-tsc case. -Jimmy

[-- Attachment #2: tsc-skew-20081215-2.patch --]
[-- Type: application/octet-stream, Size: 7228 bytes --]

CPUIDLE: revise tsc-restore to avoid increasing tsc skew between cpus

Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt, exit deepC, tsc-restore], the system error is quite easy to be accumulated. Once the workloads between cpus are not balanced, the tsc skew between cpus will eventually become bigger & begger - more than 10 seconds can be observed.

Then we remove the tsc-save step, and just based on percpu t->stime_master_stamp, t->tsc_scale, & t->local_tsc_stamp to do the tsc-restore after exit from deepC. It make the accumulating slower, but can't remove it.

Now, for constant-tsc case, we just keep a initial stamp via cstate_init_stamp during the booting/s3 resuming, which is based on the platform stime. All cpus need only to do tsc-restore relative to the initial stamp after exit deepC. The base & tsc->ns scale are fixed and  same for all cpus, so it can avoid accumulated tsc-skew. BTW, distinguish constant rate from no-stop in cstate for tsc; bypass the percpu tsc scale calibration for constant-tsc case.

Signed-off-by: Wei Gang <gang.wei@intel.com>

diff -r 045f70d1acdb xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c	Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/cpu/amd.c	Mon Dec 15 17:35:00 2008 +0800
@@ -461,8 +461,10 @@ static void __devinit init_amd(struct cp
 
 	if (cpuid_eax(0x80000000) >= 0x80000007) {
 		c->x86_power = cpuid_edx(0x80000007);
-		if (c->x86_power & (1<<8))
+		if (c->x86_power & (1<<8)) {
 			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+			set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+		}
 	}
 
 #ifdef CONFIG_X86_HT
diff -r 045f70d1acdb xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c	Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/cpu/intel.c	Mon Dec 15 17:35:00 2008 +0800
@@ -218,6 +218,10 @@ static void __devinit init_intel(struct 
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+	if (cpuid_edx(0x80000007) & (1u<<8)) {
+		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+		set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+	}
 
 	start_vmx();
 }
diff -r 045f70d1acdb xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/time.c	Mon Dec 15 17:35:00 2008 +0800
@@ -69,8 +69,10 @@ static DEFINE_PER_CPU(struct cpu_time, c
 #define EPOCH MILLISECS(1000)
 static struct timer calibration_timer;
 
-/* TSC is invariant on C state entry? */
-static bool_t tsc_invariant;
+/* TSC will not stop during deep C state? */
+static bool_t tsc_nostop;
+/* TSC will be constant rate, independent with P/T state? */
+static int constant_tsc = 0;
 
 /*
  * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -551,6 +553,10 @@ static u64 plt_stamp;            /* hard
 static u64 plt_stamp;            /* hardware-width platform counter stamp   */
 static struct timer plt_overflow_timer;
 
+/* following 2 variables are for deep C state TSC restore usage */
+static u64 initial_tsc_stamp;    /* initial tsc stamp while plt starting */
+static s_time_t initial_stime_platform_stamp; /* initial stime stamp */
+
 static void plt_overflow(void *unused)
 {
     u64 count;
@@ -664,25 +670,41 @@ static void init_platform_timer(void)
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_restore_tsc(void)
+static void cstate_init_stamp(void)
+{
+    if ( tsc_nostop || !constant_tsc )
+        return;
+
+    initial_stime_platform_stamp = read_platform_stime();
+    rdtscll(initial_tsc_stamp);
+}
+
+static inline void __restore_tsc(s_time_t plt_stime)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
     struct time_scale sys_to_tsc = scale_reciprocal(t->tsc_scale);
     s_time_t stime_delta;
     u64 tsc_delta;
 
-    if ( tsc_invariant )
+    if ( tsc_nostop )
         return;
 
-    stime_delta = read_platform_stime() - t->stime_master_stamp;
+    stime_delta = plt_stime - 
+        (constant_tsc ? initial_stime_platform_stamp : t->stime_master_stamp);
+
     if ( stime_delta < 0 )
         stime_delta = 0;
 
     tsc_delta = scale_delta(stime_delta, &sys_to_tsc);
 
-    wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta);
+    wrmsrl(MSR_IA32_TSC, 
+        (constant_tsc ? initial_tsc_stamp : t->local_tsc_stamp) + tsc_delta);
 }
 
+void cstate_restore_tsc(void)
+{
+    __restore_tsc(read_platform_stime());
+}
 /***************************************************************************
  * CMOS Timer functions
  ***************************************************************************/
@@ -960,6 +982,18 @@ static void local_time_calibration(void)
            curr_master_stime - curr_local_stime);
 #endif
 
+    if ( constant_tsc )
+    {
+        local_irq_disable();
+        t->local_tsc_stamp    = curr_tsc;
+        t->stime_local_stamp  = curr_master_stime;
+        t->stime_master_stamp = curr_master_stime;
+        local_irq_enable();
+
+        update_vcpu_system_time(current);
+        goto out;
+    }
+
     /* Local time warps forward if it lags behind master time. */
     if ( curr_local_stime < curr_master_stime )
         curr_local_stime = curr_master_stime;
@@ -1082,6 +1116,8 @@ static void time_calibration_rendezvous(
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
+    __restore_tsc(r->master_stime);
+
     rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
@@ -1125,9 +1161,20 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
-    /* Is TSC invariant during deep C state? */
-    if ( cpuid_edx(0x80000007) & (1u<<8) )
-        tsc_invariant = 1;
+    /* for recent intel x86 model, the tsc increments at a constant rate */
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    {
+        int cpu;
+
+        constant_tsc = 1;
+
+        for_each_cpu ( cpu )
+            per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+    }
+
+    /* Is TSC not stop during deep C state ? */
+    if ( boot_cpu_has(X86_FEATURE_NOSTOP_TSC) )
+        tsc_nostop = 1;
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
@@ -1139,6 +1186,8 @@ int __init init_xen_time(void)
 
     stime_platform_stamp = NOW();
     init_platform_timer();
+
+    cstate_init_stamp();
 
     init_percpu_time();
 
@@ -1260,6 +1309,8 @@ int time_resume(void)
     disable_pit_irq();
 
     init_percpu_time();
+
+    cstate_init_stamp();
 
     do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
 
diff -r 045f70d1acdb xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h	Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/include/asm-x86/cpufeature.h	Mon Dec 15 17:35:00 2008 +0800
@@ -74,6 +74,7 @@
 #define X86_FEATURE_P3		(3*32+ 6) /* P3 */
 #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_NOSTOP_TSC	(3*32+ 9) /* TSC does not stop in C states */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-15  9:40                           ` Wei, Gang
@ 2008-12-15 12:03                             ` Keir Fraser
  2008-12-15 13:28                               ` Wei, Gang
  0 siblings, 1 reply; 31+ messages in thread
From: Keir Fraser @ 2008-12-15 12:03 UTC (permalink / raw)
  To: Wei, Gang, xen-devel; +Cc: Tian, Kevin

[-- Attachment #1: Type: text/plain, Size: 475 bytes --]

On 15/12/2008 09:40, "Wei, Gang" <gang.wei@intel.com> wrote:

> Redo the constant_tsc & tsc_nostop check part and post it again.
> 
> Jimmy
> 
> On Monday, December 15, 2008 11:07 AM, Wei, Gang wrote:
>> Here is the updated patch for constant-tsc case. -Jimmy

I applied the bits outside time.c. For time.c itself, how about the simpler
attached alternative? Does it work well? :-)

Notice I don't reset TSCs at all (except when !NOSTOP_TSC and after deep
sleep).

 -- Keir


[-- Attachment #2: tsc-patch --]
[-- Type: application/octet-stream, Size: 3138 bytes --]

diff -r f827181eadd4 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Mon Dec 15 11:37:14 2008 +0000
+++ b/xen/arch/x86/time.c	Mon Dec 15 12:00:01 2008 +0000
@@ -69,9 +69,6 @@ static DEFINE_PER_CPU(struct cpu_time, c
 #define EPOCH MILLISECS(1000)
 static struct timer calibration_timer;
 
-/* TSC is invariant on C state entry? */
-static bool_t tsc_invariant;
-
 /*
  * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
  * Otherwise overflow happens too quickly (~50ms) for us to guarantee that
@@ -672,7 +669,7 @@ void cstate_restore_tsc(void)
     s_time_t stime_delta;
     u64 tsc_delta;
 
-    if ( tsc_invariant )
+    if ( boot_cpu_has(X86_FEATURE_NOSTOP_TSC) )
         return;
 
     stime_delta = read_platform_stime() - t->stime_master_stamp;
@@ -901,6 +898,7 @@ void do_settime(unsigned long secs, unsi
 /* Per-CPU communication between rendezvous IRQ and softirq handler. */
 struct cpu_calibration {
     u64 local_tsc_stamp;
+    u64 master_tsc_stamp;
     s_time_t stime_local_stamp;
     s_time_t stime_master_stamp;
 };
@@ -940,6 +938,18 @@ static void local_time_calibration(void)
 
     /* The overall calibration scale multiplier. */
     u32 calibration_mul_frac;
+
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    {
+        /* Atomically read cpu_calibration struct and write cpu_time struct. */
+        local_irq_disable();
+        t->local_tsc_stamp    = c->master_tsc_stamp;
+        t->stime_local_stamp  = c->stime_master_stamp;
+        t->stime_master_stamp = c->stime_master_stamp;
+        local_irq_enable();
+        update_vcpu_system_time(current);
+        goto out;
+    }
 
     prev_tsc          = t->local_tsc_stamp;
     prev_local_stime  = t->stime_local_stamp;
@@ -1059,6 +1069,7 @@ struct calibration_rendezvous {
     cpumask_t cpu_calibration_map;
     atomic_t nr_cpus;
     s_time_t master_stime;
+    u64 master_tsc_stamp;
 };
 
 static void time_calibration_rendezvous(void *_r)
@@ -1072,6 +1083,7 @@ static void time_calibration_rendezvous(
         while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
+        rdtscll(r->master_tsc_stamp);
         mb(); /* write r->master_stime /then/ signal */
         atomic_inc(&r->nr_cpus);
     }
@@ -1084,6 +1096,7 @@ static void time_calibration_rendezvous(
     }
 
     rdtscll(c->local_tsc_stamp);
+    c->master_tsc_stamp = r->master_tsc_stamp;
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 
@@ -1126,9 +1139,13 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
-    /* Is TSC invariant during deep C state? */
-    if ( cpuid_edx(0x80000007) & (1u<<8) )
-        tsc_invariant = 1;
+    /* If we have constant TSCs then scale factor can be shared. */
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    {
+        int cpu;
+        for_each_cpu ( cpu )
+            per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+    }
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-15 12:03                             ` Keir Fraser
@ 2008-12-15 13:28                               ` Wei, Gang
  2008-12-15 16:02                                 ` Keir Fraser
  0 siblings, 1 reply; 31+ messages in thread
From: Wei, Gang @ 2008-12-15 13:28 UTC (permalink / raw)
  To: Keir Fraser, xen-devel; +Cc: Tian, Kevin

On Monday, December 15, 2008 8:03 PM, Keir Fraser wrote:
> On 15/12/2008 09:40, "Wei, Gang" <gang.wei@intel.com> wrote:
> 
>> Redo the constant_tsc & tsc_nostop check part and post it again.
> 
> I applied the bits outside time.c. For time.c itself, how about the simpler
> attached alternative? Does it work well? :-)

Although it looks simpler & workable, but the practice shows it doesn't work. 

The phenomena:

A) xm vcpu-list shows weird number, & cpu 1 looks like dead-locked.

[root@lke_linux_mv ~]# xm vcpu-list
Name                                ID  VCPU   CPU State   Time(s) CPU Affinity
Domain-0                             0     0     0   r--     124.8 any cpu
Domain-0                             0     1     0   ---  7281029792.3 any cpu
LinuxHVMDomain_1                     1     0     0   ---      37.5 any cpu
LinuxHVMDomain_2                     3     0     0   ---      45.5 any cpu
LinuxHVMDomain_3                     4     0     1   r--       4.1 any cpu
LinuxHVMDomain_4                     2     0     0   ---      41.9 any cpu

The dump info of cpu1:

(XEN) Xen call trace:
(XEN)    [<ffff828c8010ca71>] __dump_execstate+0x1/0x60
(XEN)    [<ffff828c8014f66d>] smp_call_function_interrupt+0x7d/0xd0
(XEN)    [<ffff828c8013b41a>] call_function_interrupt+0x2a/0x30
(XEN)    [<ffff828c80151538>] get_s_time+0x28/0x70
(XEN)    [<ffff828c8017fa0a>] hvm_get_guest_time+0x3a/0x90
(XEN)    [<ffff828c8017d620>] pmt_timer_callback+0x0/0x80
(XEN)    [<ffff828c8017d4fc>] pmt_update_time+0x1c/0x70
(XEN)    [<ffff828c8017d620>] pmt_timer_callback+0x0/0x80
(XEN)    [<ffff828c8017d649>] pmt_timer_callback+0x29/0x80
(XEN)    [<ffff828c80118ebc>] execute_timer+0x2c/0x50
(XEN)    [<ffff828c80118ffa>] timer_softirq_action+0x11a/0x2d0
(XEN)    [<ffff828c801175b0>] do_softirq+0x70/0x80
(XEN)    [<ffff828c80188915>] vmx_asm_do_vmentry+0xd2/0xdd
(XEN)
(XEN) *** Dumping CPU1 guest state: ***
(XEN) ----[ Xen-3.4-unstable  x86_64  debug=n  Not tainted ]----
(XEN) CPU:    1
(XEN) RIP:    f000:[<000000000000054f>]
(XEN) RFLAGS: 0000000000000202   CONTEXT: hvm guest
(XEN) rax: 00000000000001f7   rbx: 0000000000000000   rcx: 0000000000000000
(XEN) rdx: 00000000000001f7   rsi: 00000000000058b0   rdi: 0000000000003400
(XEN) rbp: 0000000000001f7a   rsp: 0000000000001f78   r8:  0000000000000000
(XEN) r9:  0000000000000000   r10: 0000000000000000   r11: 0000000000000000
(XEN) r12: 0000000000000000   r13: 0000000000000000   r14: 0000000000000000
(XEN) r15: 0000000000000000   cr0: 0000000000000010   cr4: 0000000000000000
(XEN) cr3: 0000000000000000   cr2: 0000000000000000
(XEN) ds: 0000   es: 7000   fs: 0000   gs: 0000   ss: 0000   cs: f000
(XEN)

B) the tsc skew become larger and larger

(XEN) Synced stime skew: max=54879663ns avg=54529587ns samples=93 current=54879663ns
(XEN) Synced cycles skew: max=113678958 avg=112788173 samples=93 current=113678958
(XEN) Synced stime skew: max=54881161ns avg=54533327ns samples=94 current=54881161ns
(XEN) Synced cycles skew: max=113682743 avg=112797689 samples=94 current=113682743
(XEN) Synced stime skew: max=54882749ns avg=54537006ns samples=95 current=54882749ns
(XEN) Synced cycles skew: max=113686776 avg=112807048 samples=95 current=113686776

> 
> Notice I don't reset TSCs at all (except when !NOSTOP_TSC and after deep
> sleep).
> 
>  -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-15 13:28                               ` Wei, Gang
@ 2008-12-15 16:02                                 ` Keir Fraser
  2008-12-16  0:29                                   ` Tian, Kevin
  2008-12-16  2:26                                   ` Wei, Gang
  0 siblings, 2 replies; 31+ messages in thread
From: Keir Fraser @ 2008-12-15 16:02 UTC (permalink / raw)
  To: Wei, Gang, xen-devel; +Cc: Tian, Kevin

On 15/12/2008 13:28, "Wei, Gang" <gang.wei@intel.com> wrote:

>>> Redo the constant_tsc & tsc_nostop check part and post it again.
>> 
>> I applied the bits outside time.c. For time.c itself, how about the simpler
>> attached alternative? Does it work well? :-)
> 
> Although it looks simpler & workable, but the practice shows it doesn't work.

Weird. I wonder if CPU TSCs aren't as synced as we'd like, and we're getting
a -ve TSC delta in get_s_time(). Perhaps setting the TSC MSR to
r->master_tsc_stamp in time_calibration_rendezvous() would avoid that.

 -- Keir

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-15 16:02                                 ` Keir Fraser
@ 2008-12-16  0:29                                   ` Tian, Kevin
  2008-12-16  0:44                                     ` Tian, Kevin
  2008-12-16  2:26                                   ` Wei, Gang
  1 sibling, 1 reply; 31+ messages in thread
From: Tian, Kevin @ 2008-12-16  0:29 UTC (permalink / raw)
  To: 'Keir Fraser', Wei, Gang, xen-devel

>From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] 
>Sent: Tuesday, December 16, 2008 12:02 AM
>
>On 15/12/2008 13:28, "Wei, Gang" <gang.wei@intel.com> wrote:
>
>>>> Redo the constant_tsc & tsc_nostop check part and post it again.
>>> 
>>> I applied the bits outside time.c. For time.c itself, how 
>about the simpler
>>> attached alternative? Does it work well? :-)
>> 
>> Although it looks simpler & workable, but the practice shows 
>it doesn't work.
>
>Weird. I wonder if CPU TSCs aren't as synced as we'd like, and 
>we're getting
>a -ve TSC delta in get_s_time(). Perhaps setting the TSC MSR to
>r->master_tsc_stamp in time_calibration_rendezvous() would avoid that.
>

I'm not sure why you don't want to adjust TSC. Whether cpu TSCs
are sync-ed or not doesn't make sence if TSC will stop when
individual core enters deep C-state. As long as multiple cpus get
difference chance in deep C-state, finally you always has increasing
TSC skews. Whatever you can do in Xen side w/o adjusting TSC,
it only helps those aware of xen system time, e.g. pv guest. However
for hvm guest, TSC skew always causes problem.

I think no software approach can cleanly solve this TSC skew, unless
with hardware assistance like always running tsc. Since Jimmy's
approach can work far better than previous one, (yes, we know some
weakness when one cpu doesn't enter deep C-state for a long time),
it's still worthy of slipping in? In the meantime, IMO your change can
be also required, since there's no much need for periodic time calib-
ration upon a constant tsc feature. But it's a seperate issue as we
aim to solve. :-)

Thanks,
Kevin

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-16  0:29                                   ` Tian, Kevin
@ 2008-12-16  0:44                                     ` Tian, Kevin
  2008-12-16  0:58                                       ` Tian, Kevin
  0 siblings, 1 reply; 31+ messages in thread
From: Tian, Kevin @ 2008-12-16  0:44 UTC (permalink / raw)
  To: Tian, Kevin, 'Keir Fraser', Wei, Gang, xen-devel

Forgot below comment and I read your patch too quickly.

It's supposed to work, and maybe some sequence issue reverts
the effect you want to achieve. For example, at least the lines
within init_xen_time is useless, since calibrate_tsc_ap happens
later which will update AP tsc_scale. Anyway, this looks in a
right direction, and we'll do some debug to find the exact reason.

Thanks,
Kevin

>From: Tian, Kevin
>Sent: Tuesday, December 16, 2008 8:29 AM
>
>>From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] 
>>Sent: Tuesday, December 16, 2008 12:02 AM
>>
>>On 15/12/2008 13:28, "Wei, Gang" <gang.wei@intel.com> wrote:
>>
>>>>> Redo the constant_tsc & tsc_nostop check part and post it again.
>>>> 
>>>> I applied the bits outside time.c. For time.c itself, how 
>>about the simpler
>>>> attached alternative? Does it work well? :-)
>>> 
>>> Although it looks simpler & workable, but the practice shows 
>>it doesn't work.
>>
>>Weird. I wonder if CPU TSCs aren't as synced as we'd like, and 
>>we're getting
>>a -ve TSC delta in get_s_time(). Perhaps setting the TSC MSR to
>>r->master_tsc_stamp in time_calibration_rendezvous() would avoid that.
>>
>
>I'm not sure why you don't want to adjust TSC. Whether cpu TSCs
>are sync-ed or not doesn't make sence if TSC will stop when
>individual core enters deep C-state. As long as multiple cpus get
>difference chance in deep C-state, finally you always has increasing
>TSC skews. Whatever you can do in Xen side w/o adjusting TSC,
>it only helps those aware of xen system time, e.g. pv guest. However
>for hvm guest, TSC skew always causes problem.
>
>I think no software approach can cleanly solve this TSC skew, unless
>with hardware assistance like always running tsc. Since Jimmy's
>approach can work far better than previous one, (yes, we know some
>weakness when one cpu doesn't enter deep C-state for a long time),
>it's still worthy of slipping in? In the meantime, IMO your change can
>be also required, since there's no much need for periodic time calib-
>ration upon a constant tsc feature. But it's a seperate issue as we
>aim to solve. :-)
>
>Thanks,
>Kevin

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-16  0:44                                     ` Tian, Kevin
@ 2008-12-16  0:58                                       ` Tian, Kevin
  0 siblings, 0 replies; 31+ messages in thread
From: Tian, Kevin @ 2008-12-16  0:58 UTC (permalink / raw)
  To: Tian, Kevin, 'Keir Fraser', Wei, Gang, xen-devel

Again, another wrong comment was given by me since the
sequence is correct. Not sure my bad mind in such a good
morning. :-( Jimmy is now trying your suggestion to sync
TSC MSR in time calibration softirq.

Thanks,
Kevin

>From: Tian, Kevin 
>Sent: Tuesday, December 16, 2008 8:45 AM
>To: Tian, Kevin; 'Keir Fraser'; Wei, Gang; 
>
>Forgot below comment and I read your patch too quickly.
>
>It's supposed to work, and maybe some sequence issue reverts
>the effect you want to achieve. For example, at least the lines
>within init_xen_time is useless, since calibrate_tsc_ap happens
>later which will update AP tsc_scale. Anyway, this looks in a
>right direction, and we'll do some debug to find the exact reason.
>
>Thanks,
>Kevin
>
>>From: Tian, Kevin
>>Sent: Tuesday, December 16, 2008 8:29 AM
>>
>>>From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] 
>>>Sent: Tuesday, December 16, 2008 12:02 AM
>>>
>>>On 15/12/2008 13:28, "Wei, Gang" <gang.wei@intel.com> wrote:
>>>
>>>>>> Redo the constant_tsc & tsc_nostop check part and post it again.
>>>>> 
>>>>> I applied the bits outside time.c. For time.c itself, how 
>>>about the simpler
>>>>> attached alternative? Does it work well? :-)
>>>> 
>>>> Although it looks simpler & workable, but the practice shows 
>>>it doesn't work.
>>>
>>>Weird. I wonder if CPU TSCs aren't as synced as we'd like, and 
>>>we're getting
>>>a -ve TSC delta in get_s_time(). Perhaps setting the TSC MSR to
>>>r->master_tsc_stamp in time_calibration_rendezvous() would 
>avoid that.
>>>
>>
>>I'm not sure why you don't want to adjust TSC. Whether cpu TSCs
>>are sync-ed or not doesn't make sence if TSC will stop when
>>individual core enters deep C-state. As long as multiple cpus get
>>difference chance in deep C-state, finally you always has increasing
>>TSC skews. Whatever you can do in Xen side w/o adjusting TSC,
>>it only helps those aware of xen system time, e.g. pv guest. However
>>for hvm guest, TSC skew always causes problem.
>>
>>I think no software approach can cleanly solve this TSC skew, unless
>>with hardware assistance like always running tsc. Since Jimmy's
>>approach can work far better than previous one, (yes, we know some
>>weakness when one cpu doesn't enter deep C-state for a long time),
>>it's still worthy of slipping in? In the meantime, IMO your change can
>>be also required, since there's no much need for periodic time calib-
>>ration upon a constant tsc feature. But it's a seperate issue as we
>>aim to solve. :-)
>>
>>Thanks,
>>Kevin
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus
  2008-12-15 16:02                                 ` Keir Fraser
  2008-12-16  0:29                                   ` Tian, Kevin
@ 2008-12-16  2:26                                   ` Wei, Gang
  2008-12-16  8:00                                     ` Re: [PATCH] CPUIDLE: revise tsc-save/restore toavoid " Jan Beulich
  1 sibling, 1 reply; 31+ messages in thread
From: Wei, Gang @ 2008-12-16  2:26 UTC (permalink / raw)
  To: Keir Fraser, xen-devel; +Cc: Tian, Kevin

[-- Attachment #1: Type: text/plain, Size: 1519 bytes --]

On Tuesday, December 16, 2008 12:02 AM, Keir Fraser wrote:
> On 15/12/2008 13:28, "Wei, Gang" <gang.wei@intel.com> wrote:
> 
>>>> Redo the constant_tsc & tsc_nostop check part and post it again.
>>> 
>>> I applied the bits outside time.c. For time.c itself, how about the simpler
>>> attached alternative? Does it work well? :-)
>> 
>> Although it looks simpler & workable, but the practice shows it doesn't work.
> 
> Weird. I wonder if CPU TSCs aren't as synced as we'd like, and we're getting
> a -ve TSC delta in get_s_time(). Perhaps setting the TSC MSR to
> r->master_tsc_stamp in time_calibration_rendezvous() would avoid that.

I added a conditional wrmsrl as you said, meanwhile removed unnecessary c->master_tsc_stamp. It works fine now.

Below 't' key outputs are gotten in the extreme case: pin all dom0 & guest vcpus on cpu1 & execute cmd 'while true; do a=1; done' within one guest.
The largest stime skew is ~40us, largest cycles skew is ~100,000 ticks(~40us). The normal idle case skew is quite small (~xxxns).

(XEN) Synced stime skew: max=39662ns avg=8038ns samples=850 current=326ns
(XEN) Synced cycles skew: max=100475 avg=20368 samples=850 current=825
...
(XEN) Synced stime skew: max=39750ns avg=16958ns samples=3954 current=30667ns
(XEN) Synced cycles skew: max=100708 avg=42967 samples=3954 current=77696
...
(XEN) Synced stime skew: max=39750ns avg=17318ns samples=4544 current=22981ns
(XEN) Synced cycles skew: max=100708 avg=43880 samples=4544 current=58225

Jimmy

[-- Attachment #2: tsc-2.patch --]
[-- Type: application/octet-stream, Size: 2997 bytes --]

diff -r ec4cc8ee132a xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Tue Dec 16 09:09:09 2008 +0800
+++ b/xen/arch/x86/time.c	Tue Dec 16 10:03:48 2008 +0800
@@ -68,9 +68,6 @@ static DEFINE_PER_CPU(struct cpu_time, c
 /* Calibrate all CPUs to platform timer every EPOCH. */
 #define EPOCH MILLISECS(1000)
 static struct timer calibration_timer;
-
-/* TSC is invariant on C state entry? */
-static bool_t tsc_invariant;
 
 /*
  * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -671,7 +668,7 @@ void cstate_restore_tsc(void)
     s_time_t stime_delta;
     u64 tsc_delta;
 
-    if ( tsc_invariant )
+    if ( boot_cpu_has(X86_FEATURE_NOSTOP_TSC) )
         return;
 
     stime_delta = read_platform_stime() - t->stime_master_stamp;
@@ -940,6 +937,18 @@ static void local_time_calibration(void)
     /* The overall calibration scale multiplier. */
     u32 calibration_mul_frac;
 
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    {
+        /* Atomically read cpu_calibration struct and write cpu_time struct. */
+        local_irq_disable();
+        t->local_tsc_stamp    = c->local_tsc_stamp;
+        t->stime_local_stamp  = c->stime_master_stamp;
+        t->stime_master_stamp = c->stime_master_stamp;
+        local_irq_enable();
+        update_vcpu_system_time(current);
+        goto out;
+    }
+
     prev_tsc          = t->local_tsc_stamp;
     prev_local_stime  = t->stime_local_stamp;
     prev_master_stime = t->stime_master_stamp;
@@ -1058,6 +1067,7 @@ struct calibration_rendezvous {
     cpumask_t cpu_calibration_map;
     atomic_t nr_cpus;
     s_time_t master_stime;
+    u64 master_tsc_stamp;
 };
 
 static void time_calibration_rendezvous(void *_r)
@@ -1071,6 +1081,7 @@ static void time_calibration_rendezvous(
         while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
+        rdtscll(r->master_tsc_stamp);
         mb(); /* write r->master_stime /then/ signal */
         atomic_inc(&r->nr_cpus);
     }
@@ -1082,6 +1093,8 @@ static void time_calibration_rendezvous(
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+        wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
     rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
@@ -1125,9 +1138,13 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
-    /* Is TSC invariant during deep C state? */
-    if ( cpuid_edx(0x80000007) & (1u<<8) )
-        tsc_invariant = 1;
+    /* If we have constant TSCs then scale factor can be shared. */
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    {
+        int cpu;
+        for_each_cpu ( cpu )
+            per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+    }
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore toavoid big tsc skew between cpus
  2008-12-16  2:26                                   ` Wei, Gang
@ 2008-12-16  8:00                                     ` Jan Beulich
  2008-12-16  8:09                                       ` Wei, Gang
  0 siblings, 1 reply; 31+ messages in thread
From: Jan Beulich @ 2008-12-16  8:00 UTC (permalink / raw)
  To: Gang Wei; +Cc: Kevin Tian, xen-devel, Keir Fraser

>>> "Wei, Gang" <gang.wei@intel.com> 16.12.08 03:26 >>>
>Below 't' key outputs are gotten in the extreme case: pin all dom0 & guest vcpus on cpu1 & execute cmd 'while true; do a=1; done'
>within one guest.
>The largest stime skew is ~40us, largest cycles skew is ~100,000 ticks(~40us). The normal idle case skew is quite small (~xxxns).
>
>(XEN) Synced stime skew: max=39662ns avg=8038ns samples=850 current=326ns
>(XEN) Synced cycles skew: max=100475 avg=20368 samples=850 current=825
>...
>(XEN) Synced stime skew: max=39750ns avg=16958ns samples=3954 current=30667ns
>(XEN) Synced cycles skew: max=100708 avg=42967 samples=3954 current=77696
>...
>(XEN) Synced stime skew: max=39750ns avg=17318ns samples=4544 current=22981ns
>(XEN) Synced cycles skew: max=100708 avg=43880 samples=4544 current=58225

Is the average continuing to grow over larger periods of time? I would have
expected it to converge rather than increase as a proof of it being a long
term solution.

Jan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Re: [PATCH] CPUIDLE: revise tsc-save/restore toavoid big tsc skew between cpus
  2008-12-16  8:00                                     ` Re: [PATCH] CPUIDLE: revise tsc-save/restore toavoid " Jan Beulich
@ 2008-12-16  8:09                                       ` Wei, Gang
  0 siblings, 0 replies; 31+ messages in thread
From: Wei, Gang @ 2008-12-16  8:09 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tian, Kevin, xen-devel, Keir Fraser

On Tuesday, December 16, 2008 4:00 PM, Jan Beulich wrote:
>>>> "Wei, Gang" <gang.wei@intel.com> 16.12.08 03:26 >>>
>> Below 't' key outputs are gotten in the extreme case: pin all dom0 & guest
>> vcpus on cpu1 & execute cmd 'while true; do a=1; done' within one guest. The
>> largest stime skew is ~40us, largest cycles skew is ~100,000 ticks(~40us).
>> The normal idle case skew is quite small (~xxxns).  
>> 
>> (XEN) Synced stime skew: max=39662ns avg=8038ns samples=850 current=326ns
>> (XEN) Synced cycles skew: max=100475 avg=20368 samples=850 current=825 ...
>> (XEN) Synced stime skew: max=39750ns avg=16958ns samples=3954 current=30667ns
>> (XEN) Synced cycles skew: max=100708 avg=42967 samples=3954 current=77696 ...
>> (XEN) Synced stime skew: max=39750ns avg=17318ns samples=4544 current=22981ns
>> (XEN) Synced cycles skew: max=100708 avg=43880 samples=4544 current=58225
> 
> Is the average continuing to grow over larger periods of time? I would have
> expected it to converge rather than increase as a proof of it being a long
> term solution.

The average looks converge in my box. We can do more tests after it get in.

Jimmy

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2008-12-16  8:09 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-05  6:22 [PATCH] CPUIDLE: revise tsc-save/restore to avoid big tsc skew between cpus Wei, Gang
2008-12-05  8:53 ` Keir Fraser
2008-12-05  9:21   ` Wei, Gang
2008-12-05  9:59   ` Tian, Kevin
2008-12-05 10:05     ` Tian, Kevin
2008-12-05 10:13       ` Keir Fraser
2008-12-05 11:50         ` Tian, Kevin
2008-12-05 12:36           ` Keir Fraser
2008-12-12  3:36             ` Wei, Gang
2008-12-12  9:31               ` Keir Fraser
2008-12-13  4:01                 ` Wei, Gang
2008-12-13  9:51                   ` Keir Fraser
2008-12-13 15:27                     ` Wei, Gang
2008-12-13 15:41                       ` Keir Fraser
2008-12-15  3:06                         ` Wei, Gang
2008-12-15  9:40                           ` Wei, Gang
2008-12-15 12:03                             ` Keir Fraser
2008-12-15 13:28                               ` Wei, Gang
2008-12-15 16:02                                 ` Keir Fraser
2008-12-16  0:29                                   ` Tian, Kevin
2008-12-16  0:44                                     ` Tian, Kevin
2008-12-16  0:58                                       ` Tian, Kevin
2008-12-16  2:26                                   ` Wei, Gang
2008-12-16  8:00                                     ` Re: [PATCH] CPUIDLE: revise tsc-save/restore toavoid " Jan Beulich
2008-12-16  8:09                                       ` Wei, Gang
2008-12-05 11:30   ` [PATCH] CPUIDLE: revise tsc-save/restore to avoid " Wei, Gang
2008-12-05 11:47     ` Keir Fraser
2008-12-05 11:53       ` Tian, Kevin
2008-12-05 13:06     ` Keir Fraser
2008-12-05 14:47       ` Keir Fraser
2008-12-06  7:36         ` Wei, Gang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.