[RFC] [PATCH] use "reliable" tsc properly when available, but verify

* [RFC] [PATCH] use "reliable" tsc properly when available, but verify
@ 2009-09-28 20:19 Dan Magenheimer
  2009-09-28 21:01 ` Keir Fraser
  0 siblings, 1 reply; 12+ messages in thread
From: Dan Magenheimer @ 2009-09-28 20:19 UTC (permalink / raw)
  To: Xen-Devel (E-mail)

[-- Attachment #1: Type: text/plain, Size: 11555 bytes --]

(This is compile-tested only.  Review requested especially
where marked with FIXME.  Also, code that does write_tsc
in smpboot.c may also need patching.  And Linux does
a touch_nmi_watchdog()... does Xen need to do
anything similar?)

Most modern Intel and AMD processors and servers have
fully synchronized, non-stop TSCs that don't even stop
in C3 state.  Recent upstream Linux kernels test a cpuid
bit and record this capability as
X86_FEATURE_TSC_RELIABLE.  According to Intel, all
recent Intel processors AND the systems built on them
have this property.  According to AMD, many recent AMD
processors (and all recent server processors) have
this property but apparently some of the systems built
on them do not.

So we trust but verify... if the cpuid bit is set
we assume a reliable tsc, but use the Linux boottime
check_tsc_warp algorithm to periodically verify that
the tsc hasn't skewed.  If it has, we fall back to Xen
managing (and periodically writing) the TSC.

The check_tsc_warp algorithm is CPU-intensive, so
we test it on a decaying schedule, at 1sec, 2sec,
4sec, 8sec, 16sec, 32sec, etc.

Also correct mis-spelling of NOSTOP to NONSTOP to
match the Linux spelling.

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>

diff -r 1e33261a814f xen/arch/x86/Makefile

--- a/xen/arch/x86/Makefile	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/Makefile	Mon Sep 28 13:26:11 2009 -0600
@@ -45,6 +45,7 @@ obj-y += string.o
 obj-y += string.o
 obj-y += sysctl.o
 obj-y += time.o
+obj-y += tsc_sync.o
 obj-y += trace.o
 obj-y += traps.o
 obj-y += usercopy.o
diff -r 1e33261a814f xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/cpu/amd.c	Mon Sep 28 13:26:11 2009 -0600
@@ -463,7 +463,9 @@ static void __devinit init_amd(struct cp
 		c->x86_power = cpuid_edx(0x80000007);
 		if (c->x86_power & (1<<8)) {
 			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-			set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+			set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+                        if ( c->x86 != 0x11 )
+			    set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
 		}
 	}
 
diff -r 1e33261a814f xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/cpu/intel.c	Mon Sep 28 13:26:11 2009 -0600
@@ -226,7 +226,8 @@ static void __devinit init_intel(struct 
 		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
 	if (cpuid_edx(0x80000007) & (1u<<8)) {
 		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-		set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+		set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+		set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
 	}
 	if ((c->cpuid_level >= 0x00000006) &&
 	    (cpuid_eax(0x00000006) & (1u<<2)))
diff -r 1e33261a814f xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/time.c	Mon Sep 28 13:26:11 2009 -0600
@@ -698,7 +698,7 @@ void cstate_restore_tsc(void)
     s_time_t stime_delta;
     u64 new_tsc;
 
-    if ( boot_cpu_has(X86_FEATURE_NOSTOP_TSC) )
+    if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
         return;
 
     stime_delta = read_platform_stime() - t->stime_master_stamp;
@@ -1100,6 +1100,11 @@ struct calibration_rendezvous {
     u64 master_tsc_stamp;
 };
 
+static void (*rendezvous_func) (void *info);
+static int tsc_reliable = 0;
+static unsigned long tsc_max_warp = 0;
+static unsigned long tsc_verify_decay = 0;
+
 static void time_calibration_tsc_rendezvous(void *_r)
 {
     int i;
@@ -1180,6 +1185,50 @@ static void time_calibration_std_rendezv
     raise_softirq(TIME_CALIBRATE_SOFTIRQ);
 }
 
+static void time_verify_tsc_calibration_rendezvous(void *_r)
+{
+    struct cpu_calibration *c = &this_cpu(cpu_calibration);
+    struct calibration_rendezvous *r = _r;
+    unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
+
+    /* check_tsc_warp is VERY expensive so test only on log2 intervals */
+    tsc_verify_decay++;
+    if ( !(tsc_verify_decay & (tsc_verify_decay-1)) )
+    {
+        if ( smp_processor_id() == 0 )
+        {
+            while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
+                mb();
+            check_tsc_warp(cpu_khz, &tsc_max_warp);
+            atomic_inc(&r->semaphore);
+        }
+        else
+        {
+            atomic_inc(&r->semaphore);
+            while ( atomic_read(&r->semaphore) < total_cpus )
+                mb();
+            check_tsc_warp(cpu_khz, &tsc_max_warp);
+            atomic_inc(&r->semaphore);
+            while ( atomic_read(&r->semaphore) > total_cpus )
+                mb();
+        }
+    }
+
+    if ( tsc_max_warp && smp_processor_id() == 0 )
+    {
+        printk("TSC warp detected (%lu cycles), disabling reliable TSC\n",
+                tsc_max_warp);
+        tsc_reliable = -1;
+        rendezvous_func = time_calibration_tsc_rendezvous;
+    }
+
+    rdtscll(c->local_tsc_stamp);
+    c->stime_local_stamp = get_s_time();
+    c->stime_master_stamp = r->master_stime;
+
+    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
+}
+
 static void time_calibration(void *unused)
 {
     struct calibration_rendezvous r = {
@@ -1188,11 +1237,7 @@ static void time_calibration(void *unuse
     };
 
     /* @wait=1 because we must wait for all cpus before freeing @r. */
-    on_selected_cpus(&r.cpu_calibration_map,
-                     opt_consistent_tscs
-                     ? time_calibration_tsc_rendezvous
-                     : time_calibration_std_rendezvous,
-                     &r, 1);
+    on_selected_cpus(&r.cpu_calibration_map, rendezvous_func, &r, 1);
 }
 
 void init_percpu_time(void)
@@ -1219,16 +1264,19 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
-    if ( !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
-        opt_consistent_tscs = 0;
-
-    /* If we have constant TSCs then scale factor can be shared. */
-    if ( opt_consistent_tscs )
+    /* If we have reliable TSCs then scale factor can be shared. */
+    if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
     {
         int cpu;
         for_each_possible_cpu ( cpu )
             per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+        rendezvous_func = time_verify_tsc_calibration_rendezvous;
+        tsc_reliable = 1;
     }
+    else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+        rendezvous_func = time_calibration_tsc_rendezvous;
+    else
+        rendezvous_func = time_calibration_std_rendezvous;
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
@@ -1463,6 +1511,13 @@ static void dump_softtsc(unsigned char k
     struct domain *d;
     int domcnt = 0;
 
+    if ( tsc_reliable > 0 )
+        printk("TSC is reliable\n");
+    else if ( tsc_reliable < 0 )
+        printk("Hardware determined TSC reliable, verification failed with "
+               "warp = %lu cycles\n", tsc_max_warp);
+    else
+        printk("TSC is not reliable\n");
     for_each_domain ( d )
     {
         if ( !d->arch.vtsc )
diff -r 1e33261a814f xen/arch/x86/tsc_sync.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/tsc_sync.c	Mon Sep 28 13:26:11 2009 -0600
@@ -0,0 +1,93 @@
+/*
+ * check TSC synchronization.
+ *
+ * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
+ * Modified for Xen by Dan Magenheimer, Oracle Corp.
+ *
+ * We check whether all boot CPUs have their TSC's synchronized,
+ * print a warning if not and turn off the TSC clock-source.
+ *
+ * The warp-check is point-to-point between two CPUs, the CPU
+ * initiating the bootup is the 'source CPU', the freshly booting
+ * CPU is the 'target CPU'.
+ *
+ * Only two CPUs may participate - they can enter in any order.
+ * ( The serial nature of the boot logic and the CPU hotplug lock
+ *   protects against more than 2 CPUs entering this code. )
+ */
+#include <xen/config.h>
+#include <xen/spinlock.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+
+/* FIXME Are these OK for Xen? Xen has no _raw_spin_lock() */
+#define rdtsc_barrier  mb
+#define raw_spinlock_t spinlock_t
+#define __raw_spin_lock spin_lock
+#define __raw_spin_unlock spin_unlock
+#define __RAW_SPIN_LOCK_UNLOCKED  SPIN_LOCK_UNLOCKED
+
+/*
+ * We use a raw spinlock in this exceptional case, because
+ * we want to have the fastest, inlined, non-debug version
+ * of a critical section, to be able to prove TSC time-warps:
+ */
+static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+
+static __cpuinitdata cycles_t last_tsc;
+
+/*
+ * TSC-warp measurement loop running on both CPUs:
+ */
+void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
+{
+	cycles_t start, now, prev, end;
+	int i;
+
+	rdtsc_barrier();
+	start = get_cycles();
+	rdtsc_barrier();
+	/*
+	 * The measurement runs for 2 msecs:
+	 */
+	end = start + tsc_khz * 2ULL;
+	now = start;
+
+	for (i = 0; ; i++) {
+		/*
+		 * We take the global lock, measure TSC, save the
+		 * previous TSC that was measured (possibly on
+		 * another CPU) and update the previous TSC timestamp.
+		 */
+		__raw_spin_lock(&sync_lock);
+		prev = last_tsc;
+		rdtsc_barrier();
+		now = get_cycles();
+		rdtsc_barrier();
+		last_tsc = now;
+		__raw_spin_unlock(&sync_lock);
+
+		/*
+		 * Be nice every now and then (and also check whether
+		 * measurement is done [we also insert a 10 million
+		 * loops safety exit, so we dont lock up in case the
+		 * TSC readout is totally broken]):
+		 */
+		if (unlikely(!(i & 7))) {
+			if (now > end || i > 10000000)
+				break;
+			cpu_relax();
+			/*touch_nmi_watchdog();*/
+		}
+		/*
+		 * Outside the critical section we can now see whether
+		 * we saw a time-warp of the TSC going backwards:
+		 */
+		if (unlikely(prev > now)) {
+			__raw_spin_lock(&sync_lock);
+			if ( *max_warp > prev - now )
+				*max_warp = prev - now;
+			__raw_spin_unlock(&sync_lock);
+		}
+	}
+}
diff -r 1e33261a814f xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/include/asm-x86/cpufeature.h	Mon Sep 28 13:26:11 2009 -0600
@@ -74,9 +74,10 @@
 #define X86_FEATURE_P3		(3*32+ 6) /* P3 */
 #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_NOSTOP_TSC	(3*32+ 9) /* TSC does not stop in C states */
+#define X86_FEATURE_NONSTOP_TSC	(3*32+ 9) /* TSC does not stop in C states */
 #define X86_FEATURE_ARAT	(3*32+ 10) /* Always running APIC timer */
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_TSC_RELIABLE (3*32+12) /* TSC is known to be reliable */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff -r 1e33261a814f xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/include/asm-x86/time.h	Mon Sep 28 13:26:11 2009 -0600
@@ -43,4 +43,6 @@ uint64_t ns_to_acpi_pm_tick(uint64_t ns)
 
 void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs);
 
+void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp);
+
 #endif /* __X86_TIME_H__ */

[-- Attachment #2: tsc-reliable.patch --]
[-- Type: application/octet-stream, Size: 9991 bytes --]

diff -r 1e33261a814f xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/Makefile	Mon Sep 28 13:26:11 2009 -0600
@@ -45,6 +45,7 @@ obj-y += string.o
 obj-y += string.o
 obj-y += sysctl.o
 obj-y += time.o
+obj-y += tsc_sync.o
 obj-y += trace.o
 obj-y += traps.o
 obj-y += usercopy.o
diff -r 1e33261a814f xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/cpu/amd.c	Mon Sep 28 13:26:11 2009 -0600
@@ -463,7 +463,9 @@ static void __devinit init_amd(struct cp
 		c->x86_power = cpuid_edx(0x80000007);
 		if (c->x86_power & (1<<8)) {
 			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-			set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+			set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+                        if ( c->x86 != 0x11 )
+			    set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
 		}
 	}
 
diff -r 1e33261a814f xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/cpu/intel.c	Mon Sep 28 13:26:11 2009 -0600
@@ -226,7 +226,8 @@ static void __devinit init_intel(struct 
 		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
 	if (cpuid_edx(0x80000007) & (1u<<8)) {
 		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-		set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+		set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+		set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
 	}
 	if ((c->cpuid_level >= 0x00000006) &&
 	    (cpuid_eax(0x00000006) & (1u<<2)))
diff -r 1e33261a814f xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/time.c	Mon Sep 28 13:26:11 2009 -0600
@@ -698,7 +698,7 @@ void cstate_restore_tsc(void)
     s_time_t stime_delta;
     u64 new_tsc;
 
-    if ( boot_cpu_has(X86_FEATURE_NOSTOP_TSC) )
+    if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
         return;
 
     stime_delta = read_platform_stime() - t->stime_master_stamp;
@@ -1100,6 +1100,11 @@ struct calibration_rendezvous {
     u64 master_tsc_stamp;
 };
 
+static void (*rendezvous_func) (void *info);
+static int tsc_reliable = 0;
+static unsigned long tsc_max_warp = 0;
+static unsigned long tsc_verify_decay = 0;
+
 static void time_calibration_tsc_rendezvous(void *_r)
 {
     int i;
@@ -1180,6 +1185,50 @@ static void time_calibration_std_rendezv
     raise_softirq(TIME_CALIBRATE_SOFTIRQ);
 }
 
+static void time_verify_tsc_calibration_rendezvous(void *_r)
+{
+    struct cpu_calibration *c = &this_cpu(cpu_calibration);
+    struct calibration_rendezvous *r = _r;
+    unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
+
+    /* check_tsc_warp is VERY expensive so test only on log2 intervals */
+    tsc_verify_decay++;
+    if ( !(tsc_verify_decay & (tsc_verify_decay-1)) )
+    {
+        if ( smp_processor_id() == 0 )
+        {
+            while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
+                mb();
+            check_tsc_warp(cpu_khz, &tsc_max_warp);
+            atomic_inc(&r->semaphore);
+        }
+        else
+        {
+            atomic_inc(&r->semaphore);
+            while ( atomic_read(&r->semaphore) < total_cpus )
+                mb();
+            check_tsc_warp(cpu_khz, &tsc_max_warp);
+            atomic_inc(&r->semaphore);
+            while ( atomic_read(&r->semaphore) > total_cpus )
+                mb();
+        }
+    }
+
+    if ( tsc_max_warp && smp_processor_id() == 0 )
+    {
+        printk("TSC warp detected (%lu cycles), disabling reliable TSC\n",
+                tsc_max_warp);
+        tsc_reliable = -1;
+        rendezvous_func = time_calibration_tsc_rendezvous;
+    }
+
+    rdtscll(c->local_tsc_stamp);
+    c->stime_local_stamp = get_s_time();
+    c->stime_master_stamp = r->master_stime;
+
+    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
+}
+
 static void time_calibration(void *unused)
 {
     struct calibration_rendezvous r = {
@@ -1188,11 +1237,7 @@ static void time_calibration(void *unuse
     };
 
     /* @wait=1 because we must wait for all cpus before freeing @r. */
-    on_selected_cpus(&r.cpu_calibration_map,
-                     opt_consistent_tscs
-                     ? time_calibration_tsc_rendezvous
-                     : time_calibration_std_rendezvous,
-                     &r, 1);
+    on_selected_cpus(&r.cpu_calibration_map, rendezvous_func, &r, 1);
 }
 
 void init_percpu_time(void)
@@ -1219,16 +1264,19 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
-    if ( !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
-        opt_consistent_tscs = 0;
-
-    /* If we have constant TSCs then scale factor can be shared. */
-    if ( opt_consistent_tscs )
+    /* If we have reliable TSCs then scale factor can be shared. */
+    if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
     {
         int cpu;
         for_each_possible_cpu ( cpu )
             per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+        rendezvous_func = time_verify_tsc_calibration_rendezvous;
+        tsc_reliable = 1;
     }
+    else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+        rendezvous_func = time_calibration_tsc_rendezvous;
+    else
+        rendezvous_func = time_calibration_std_rendezvous;
 
     open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
 
@@ -1463,6 +1511,13 @@ static void dump_softtsc(unsigned char k
     struct domain *d;
     int domcnt = 0;
 
+    if ( tsc_reliable > 0 )
+        printk("TSC is reliable\n");
+    else if ( tsc_reliable < 0 )
+        printk("Hardware determined TSC reliable, verification failed with "
+               "warp = %lu cycles\n", tsc_max_warp);
+    else
+        printk("TSC is not reliable\n");
     for_each_domain ( d )
     {
         if ( !d->arch.vtsc )
diff -r 1e33261a814f xen/arch/x86/tsc_sync.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/tsc_sync.c	Mon Sep 28 13:26:11 2009 -0600
@@ -0,0 +1,93 @@
+/*
+ * check TSC synchronization.
+ *
+ * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
+ * Modified for Xen by Dan Magenheimer, Oracle Corp.
+ *
+ * We check whether all boot CPUs have their TSC's synchronized,
+ * print a warning if not and turn off the TSC clock-source.
+ *
+ * The warp-check is point-to-point between two CPUs, the CPU
+ * initiating the bootup is the 'source CPU', the freshly booting
+ * CPU is the 'target CPU'.
+ *
+ * Only two CPUs may participate - they can enter in any order.
+ * ( The serial nature of the boot logic and the CPU hotplug lock
+ *   protects against more than 2 CPUs entering this code. )
+ */
+#include <xen/config.h>
+#include <xen/spinlock.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+
+/* FIXME Are these OK for Xen? Xen has no _raw_spin_lock() */
+#define rdtsc_barrier  mb
+#define raw_spinlock_t spinlock_t
+#define __raw_spin_lock spin_lock
+#define __raw_spin_unlock spin_unlock
+#define __RAW_SPIN_LOCK_UNLOCKED  SPIN_LOCK_UNLOCKED
+
+/*
+ * We use a raw spinlock in this exceptional case, because
+ * we want to have the fastest, inlined, non-debug version
+ * of a critical section, to be able to prove TSC time-warps:
+ */
+static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+
+static __cpuinitdata cycles_t last_tsc;
+
+/*
+ * TSC-warp measurement loop running on both CPUs:
+ */
+void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
+{
+	cycles_t start, now, prev, end;
+	int i;
+
+	rdtsc_barrier();
+	start = get_cycles();
+	rdtsc_barrier();
+	/*
+	 * The measurement runs for 2 msecs:
+	 */
+	end = start + tsc_khz * 2ULL;
+	now = start;
+
+	for (i = 0; ; i++) {
+		/*
+		 * We take the global lock, measure TSC, save the
+		 * previous TSC that was measured (possibly on
+		 * another CPU) and update the previous TSC timestamp.
+		 */
+		__raw_spin_lock(&sync_lock);
+		prev = last_tsc;
+		rdtsc_barrier();
+		now = get_cycles();
+		rdtsc_barrier();
+		last_tsc = now;
+		__raw_spin_unlock(&sync_lock);
+
+		/*
+		 * Be nice every now and then (and also check whether
+		 * measurement is done [we also insert a 10 million
+		 * loops safety exit, so we dont lock up in case the
+		 * TSC readout is totally broken]):
+		 */
+		if (unlikely(!(i & 7))) {
+			if (now > end || i > 10000000)
+				break;
+			cpu_relax();
+			/*touch_nmi_watchdog();*/
+		}
+		/*
+		 * Outside the critical section we can now see whether
+		 * we saw a time-warp of the TSC going backwards:
+		 */
+		if (unlikely(prev > now)) {
+			__raw_spin_lock(&sync_lock);
+			if ( *max_warp > prev - now )
+				*max_warp = prev - now;
+			__raw_spin_unlock(&sync_lock);
+		}
+	}
+}
diff -r 1e33261a814f xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/include/asm-x86/cpufeature.h	Mon Sep 28 13:26:11 2009 -0600
@@ -74,9 +74,10 @@
 #define X86_FEATURE_P3		(3*32+ 6) /* P3 */
 #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_NOSTOP_TSC	(3*32+ 9) /* TSC does not stop in C states */
+#define X86_FEATURE_NONSTOP_TSC	(3*32+ 9) /* TSC does not stop in C states */
 #define X86_FEATURE_ARAT	(3*32+ 10) /* Always running APIC timer */
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_TSC_RELIABLE (3*32+12) /* TSC is known to be reliable */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff -r 1e33261a814f xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h	Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/include/asm-x86/time.h	Mon Sep 28 13:26:11 2009 -0600
@@ -43,4 +43,6 @@ uint64_t ns_to_acpi_pm_tick(uint64_t ns)
 
 void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs);
 
+void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp);
+
 #endif /* __X86_TIME_H__ */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread