All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
@ 2011-04-14  7:18 Jan Beulich
  2011-04-14  7:25 ` Keir Fraser
                   ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Jan Beulich @ 2011-04-14  7:18 UTC (permalink / raw)
  To: xen-devel; +Cc: winston.l.wang

This means suppressing the uses in time_calibration_tsc_rendezvous(),
cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
hang of Linux Dom0 when loading processor.ko on such systems that
have support for C states above C1.

Signed-off-by: Jan Beulich <jbeulich@novell.com>

--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -1098,3 +1098,7 @@ void cpuidle_disable_deep_cstate(void)
     hpet_disable_legacy_broadcast();
 }
 
+bool_t cpuidle_using_deep_cstate(void)
+{
+    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
+}
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -41,6 +41,7 @@
 #include <asm/flushtlb.h>
 #include <asm/msr.h>
 #include <asm/mtrr.h>
+#include <asm/time.h>
 #include <mach_apic.h>
 #include <mach_wakecpu.h>
 #include <smpboot_hooks.h>
@@ -124,6 +125,12 @@ static void smp_store_cpu_info(int id)
     ;
 }
 
+/*
+ * TSC's upper 32 bits can't be written in earlier CPUs (before
+ * Prescott), there is no way to resync one AP against BP.
+ */
+bool_t disable_tsc_sync;
+
 static atomic_t tsc_count;
 static uint64_t tsc_value;
 static cpumask_t tsc_sync_cpu_mask;
@@ -132,6 +139,9 @@ static void synchronize_tsc_master(unsig
 {
     unsigned int i;
 
+    if ( disable_tsc_sync )
+        return;
+
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
          !cpu_isset(slave, tsc_sync_cpu_mask) )
         return;
@@ -153,6 +163,9 @@ static void synchronize_tsc_slave(unsign
 {
     unsigned int i;
 
+    if ( disable_tsc_sync )
+        return;
+
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
          !cpu_isset(slave, tsc_sync_cpu_mask) )
         return;
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -21,6 +21,7 @@
 #include <xen/smp.h>
 #include <xen/irq.h>
 #include <xen/softirq.h>
+#include <xen/cpuidle.h>
 #include <xen/symbols.h>
 #include <xen/keyhandler.h>
 #include <xen/guest_access.h>
@@ -1385,6 +1386,9 @@ void init_percpu_time(void)
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
+    u64 tsc, tmp;
+    const char *what = NULL;
+
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
     {
         /*
@@ -1398,6 +1402,45 @@ int __init init_xen_time(void)
             setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
     }
 
+    /*
+     * On certain older Intel CPUs writing the TSC MSR clears the upper
+     * 32 bits. Obviously we must not use write_tsc() on such CPUs.
+     *
+     * Additionally, AMD specifies that being able to write the TSC MSR
+     * is not an architectural feature (but, other than their manual says,
+     * also cannot be determined from CPUID bits).
+     */
+    rdtscll(tsc);
+    if ( wrmsr_safe(MSR_IA32_TSC, (u32)tsc) == 0 )
+    {
+        u64 tmp2;
+
+        rdtscll(tmp2);
+        write_tsc(tsc | (1ULL << 32));
+        rdtscll(tmp);
+        if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) )
+            what = "only partially";
+    }
+    else
+        what = "not";
+    if ( what )
+    {
+        printk(XENLOG_WARNING "TSC %s writable\n", what);
+
+        /* time_calibration_tsc_rendezvous() must not be used */
+        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+            setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
+
+        /* cstate_restore_tsc() must not be used (or do nothing) */
+        if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
+            cpuidle_disable_deep_cstate();
+
+        /* synchronize_tsc_slave() must do nothing */
+        disable_tsc_sync = 1;
+    }
+    else
+        write_tsc(tsc + 4 * (s32)(tmp - tsc));
+
     /* If we have constant-rate TSCs then scale factor can be shared. */
     if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
     {
@@ -1451,7 +1494,7 @@ static int _disable_pit_irq(void(*hpet_b
      * XXX dom0 may rely on RTC interrupt delivery, so only enable
      * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
      */
-    if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
+    if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) )
     {
         hpet_broadcast_setup();
         if ( !hpet_broadcast_is_available() )
--- a/xen/include/asm-x86/setup.h
+++ b/xen/include/asm-x86/setup.h
@@ -4,7 +4,6 @@
 #include <xen/multiboot.h>
 
 extern bool_t early_boot;
-extern s8 xen_cpuidle;
 extern unsigned long xenheap_initial_phys_start;
 
 void init_done(void);
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -24,6 +24,8 @@
 
 typedef u64 cycles_t;
 
+extern bool_t disable_tsc_sync;
+
 static inline cycles_t get_cycles(void)
 {
     cycles_t c;
--- a/xen/include/xen/cpuidle.h
+++ b/xen/include/xen/cpuidle.h
@@ -85,7 +85,10 @@ struct cpuidle_governor
     void (*reflect)         (struct acpi_processor_power *dev);
 };
 
+extern s8 xen_cpuidle;
 extern struct cpuidle_governor *cpuidle_current_governor;
+
+bool_t cpuidle_using_deep_cstate(void);
 void cpuidle_disable_deep_cstate(void);
 
 extern void cpuidle_wakeup_mwait(cpumask_t *mask);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14  7:18 [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits Jan Beulich
@ 2011-04-14  7:25 ` Keir Fraser
  2011-04-14  7:42   ` Jan Beulich
  2011-04-14  7:28 ` Jan Beulich
  2011-04-14 16:05 ` Keir Fraser
  2 siblings, 1 reply; 21+ messages in thread
From: Keir Fraser @ 2011-04-14  7:25 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Dan Magenheimer, winston.l.wang

On 14/04/2011 08:18, "Jan Beulich" <JBeulich@novell.com> wrote:

> This means suppressing the uses in time_calibration_tsc_rendezvous(),
> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
> hang of Linux Dom0 when loading processor.ko on such systems that
> have support for C states above C1.

Should your new test be gated on !X86_FEATURE_TSC_RELIABLE? We already
*never* write the TSC when boot_cpu_has(TSC_RELIABLE) -- Dan Magenheimer
made that change on the assumption that TSCs were globally synced by
firmware in this case, and us writing one or more TSCs could only ever make
things worse.

 -- Keir

> Signed-off-by: Jan Beulich <jbeulich@novell.com>
> 
> --- a/xen/arch/x86/acpi/cpu_idle.c
> +++ b/xen/arch/x86/acpi/cpu_idle.c
> @@ -1098,3 +1098,7 @@ void cpuidle_disable_deep_cstate(void)
>      hpet_disable_legacy_broadcast();
>  }
>  
> +bool_t cpuidle_using_deep_cstate(void)
> +{
> +    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
> +}
> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -41,6 +41,7 @@
>  #include <asm/flushtlb.h>
>  #include <asm/msr.h>
>  #include <asm/mtrr.h>
> +#include <asm/time.h>
>  #include <mach_apic.h>
>  #include <mach_wakecpu.h>
>  #include <smpboot_hooks.h>
> @@ -124,6 +125,12 @@ static void smp_store_cpu_info(int id)
>      ;
>  }
>  
> +/*
> + * TSC's upper 32 bits can't be written in earlier CPUs (before
> + * Prescott), there is no way to resync one AP against BP.
> + */
> +bool_t disable_tsc_sync;
> +
>  static atomic_t tsc_count;
>  static uint64_t tsc_value;
>  static cpumask_t tsc_sync_cpu_mask;
> @@ -132,6 +139,9 @@ static void synchronize_tsc_master(unsig
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> @@ -153,6 +163,9 @@ static void synchronize_tsc_slave(unsign
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -21,6 +21,7 @@
>  #include <xen/smp.h>
>  #include <xen/irq.h>
>  #include <xen/softirq.h>
> +#include <xen/cpuidle.h>
>  #include <xen/symbols.h>
>  #include <xen/keyhandler.h>
>  #include <xen/guest_access.h>
> @@ -1385,6 +1386,9 @@ void init_percpu_time(void)
>  /* Late init function (after all CPUs are booted). */
>  int __init init_xen_time(void)
>  {
> +    u64 tsc, tmp;
> +    const char *what = NULL;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
>      {
>          /*
> @@ -1398,6 +1402,45 @@ int __init init_xen_time(void)
>              setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
>      }
>  
> +    /*
> +     * On certain older Intel CPUs writing the TSC MSR clears the upper
> +     * 32 bits. Obviously we must not use write_tsc() on such CPUs.
> +     *
> +     * Additionally, AMD specifies that being able to write the TSC MSR
> +     * is not an architectural feature (but, other than their manual says,
> +     * also cannot be determined from CPUID bits).
> +     */
> +    rdtscll(tsc);
> +    if ( wrmsr_safe(MSR_IA32_TSC, (u32)tsc) == 0 )
> +    {
> +        u64 tmp2;
> +
> +        rdtscll(tmp2);
> +        write_tsc(tsc | (1ULL << 32));
> +        rdtscll(tmp);
> +        if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) )
> +            what = "only partially";
> +    }
> +    else
> +        what = "not";
> +    if ( what )
> +    {
> +        printk(XENLOG_WARNING "TSC %s writable\n", what);
> +
> +        /* time_calibration_tsc_rendezvous() must not be used */
> +        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
> +            setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
> +
> +        /* cstate_restore_tsc() must not be used (or do nothing) */
> +        if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
> +            cpuidle_disable_deep_cstate();
> +
> +        /* synchronize_tsc_slave() must do nothing */
> +        disable_tsc_sync = 1;
> +    }
> +    else
> +        write_tsc(tsc + 4 * (s32)(tmp - tsc));
> +
>      /* If we have constant-rate TSCs then scale factor can be shared. */
>      if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
>      {
> @@ -1451,7 +1494,7 @@ static int _disable_pit_irq(void(*hpet_b
>       * XXX dom0 may rely on RTC interrupt delivery, so only enable
>       * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
>       */
> -    if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
> +    if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) )
>      {
>          hpet_broadcast_setup();
>          if ( !hpet_broadcast_is_available() )
> --- a/xen/include/asm-x86/setup.h
> +++ b/xen/include/asm-x86/setup.h
> @@ -4,7 +4,6 @@
>  #include <xen/multiboot.h>
>  
>  extern bool_t early_boot;
> -extern s8 xen_cpuidle;
>  extern unsigned long xenheap_initial_phys_start;
>  
>  void init_done(void);
> --- a/xen/include/asm-x86/time.h
> +++ b/xen/include/asm-x86/time.h
> @@ -24,6 +24,8 @@
>  
>  typedef u64 cycles_t;
>  
> +extern bool_t disable_tsc_sync;
> +
>  static inline cycles_t get_cycles(void)
>  {
>      cycles_t c;
> --- a/xen/include/xen/cpuidle.h
> +++ b/xen/include/xen/cpuidle.h
> @@ -85,7 +85,10 @@ struct cpuidle_governor
>      void (*reflect)         (struct acpi_processor_power *dev);
>  };
>  
> +extern s8 xen_cpuidle;
>  extern struct cpuidle_governor *cpuidle_current_governor;
> +
> +bool_t cpuidle_using_deep_cstate(void);
>  void cpuidle_disable_deep_cstate(void);
>  
>  extern void cpuidle_wakeup_mwait(cpumask_t *mask);
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14  7:18 [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits Jan Beulich
  2011-04-14  7:25 ` Keir Fraser
@ 2011-04-14  7:28 ` Jan Beulich
  2011-04-14 16:05 ` Keir Fraser
  2 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2011-04-14  7:28 UTC (permalink / raw)
  To: xen-devel; +Cc: winston.l.wang

>>> On 14.04.11 at 09:18, "Jan Beulich" <JBeulich@novell.com> wrote:
> This means suppressing the uses in time_calibration_tsc_rendezvous(),
> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
> hang of Linux Dom0 when loading processor.ko on such systems that
> have support for C states above C1.
> 
> Signed-off-by: Jan Beulich <jbeulich@novell.com>

A note on backporting to 4.1 and 4.0 (the problem was really found on
4.0.x): An additional adjustment to hpet_disable_legacy_broadcast()
is necessary here, as other than in -unstable it isn't prepared to be
called before hpet_broadcast_init(), e.g.:

--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -632,6 +632,9 @@ void hpet_disable_legacy_broadcast(void)
     u32 cfg;
     unsigned long flags;
 
+    if ( !legacy_hpet_event.shift )
+        return;
+
     spin_lock_irqsave(&legacy_hpet_event.lock, flags);
 
     legacy_hpet_event.flags |= HPET_EVT_DISABLE;


Additionally, the disabling of TSC sync isn't necessary on 4.0 - zero gets
written into the TSC MSR there.

Jan

> --- a/xen/arch/x86/acpi/cpu_idle.c
> +++ b/xen/arch/x86/acpi/cpu_idle.c
> @@ -1098,3 +1098,7 @@ void cpuidle_disable_deep_cstate(void)
>      hpet_disable_legacy_broadcast();
>  }
>  
> +bool_t cpuidle_using_deep_cstate(void)
> +{
> +    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
> +}
> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -41,6 +41,7 @@
>  #include <asm/flushtlb.h>
>  #include <asm/msr.h>
>  #include <asm/mtrr.h>
> +#include <asm/time.h>
>  #include <mach_apic.h>
>  #include <mach_wakecpu.h>
>  #include <smpboot_hooks.h>
> @@ -124,6 +125,12 @@ static void smp_store_cpu_info(int id)
>      ;
>  }
>  
> +/*
> + * TSC's upper 32 bits can't be written in earlier CPUs (before
> + * Prescott), there is no way to resync one AP against BP.
> + */
> +bool_t disable_tsc_sync;
> +
>  static atomic_t tsc_count;
>  static uint64_t tsc_value;
>  static cpumask_t tsc_sync_cpu_mask;
> @@ -132,6 +139,9 @@ static void synchronize_tsc_master(unsig
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> @@ -153,6 +163,9 @@ static void synchronize_tsc_slave(unsign
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -21,6 +21,7 @@
>  #include <xen/smp.h>
>  #include <xen/irq.h>
>  #include <xen/softirq.h>
> +#include <xen/cpuidle.h>
>  #include <xen/symbols.h>
>  #include <xen/keyhandler.h>
>  #include <xen/guest_access.h>
> @@ -1385,6 +1386,9 @@ void init_percpu_time(void)
>  /* Late init function (after all CPUs are booted). */
>  int __init init_xen_time(void)
>  {
> +    u64 tsc, tmp;
> +    const char *what = NULL;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
>      {
>          /*
> @@ -1398,6 +1402,45 @@ int __init init_xen_time(void)
>              setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
>      }
>  
> +    /*
> +     * On certain older Intel CPUs writing the TSC MSR clears the upper
> +     * 32 bits. Obviously we must not use write_tsc() on such CPUs.
> +     *
> +     * Additionally, AMD specifies that being able to write the TSC MSR
> +     * is not an architectural feature (but, other than their manual says,
> +     * also cannot be determined from CPUID bits).
> +     */
> +    rdtscll(tsc);
> +    if ( wrmsr_safe(MSR_IA32_TSC, (u32)tsc) == 0 )
> +    {
> +        u64 tmp2;
> +
> +        rdtscll(tmp2);
> +        write_tsc(tsc | (1ULL << 32));
> +        rdtscll(tmp);
> +        if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) )
> +            what = "only partially";
> +    }
> +    else
> +        what = "not";
> +    if ( what )
> +    {
> +        printk(XENLOG_WARNING "TSC %s writable\n", what);
> +
> +        /* time_calibration_tsc_rendezvous() must not be used */
> +        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
> +            setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
> +
> +        /* cstate_restore_tsc() must not be used (or do nothing) */
> +        if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
> +            cpuidle_disable_deep_cstate();
> +
> +        /* synchronize_tsc_slave() must do nothing */
> +        disable_tsc_sync = 1;
> +    }
> +    else
> +        write_tsc(tsc + 4 * (s32)(tmp - tsc));
> +
>      /* If we have constant-rate TSCs then scale factor can be shared. */
>      if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
>      {
> @@ -1451,7 +1494,7 @@ static int _disable_pit_irq(void(*hpet_b
>       * XXX dom0 may rely on RTC interrupt delivery, so only enable
>       * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
>       */
> -    if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
> +    if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) )
>      {
>          hpet_broadcast_setup();
>          if ( !hpet_broadcast_is_available() )
> --- a/xen/include/asm-x86/setup.h
> +++ b/xen/include/asm-x86/setup.h
> @@ -4,7 +4,6 @@
>  #include <xen/multiboot.h>
>  
>  extern bool_t early_boot;
> -extern s8 xen_cpuidle;
>  extern unsigned long xenheap_initial_phys_start;
>  
>  void init_done(void);
> --- a/xen/include/asm-x86/time.h
> +++ b/xen/include/asm-x86/time.h
> @@ -24,6 +24,8 @@
>  
>  typedef u64 cycles_t;
>  
> +extern bool_t disable_tsc_sync;
> +
>  static inline cycles_t get_cycles(void)
>  {
>      cycles_t c;
> --- a/xen/include/xen/cpuidle.h
> +++ b/xen/include/xen/cpuidle.h
> @@ -85,7 +85,10 @@ struct cpuidle_governor
>      void (*reflect)         (struct acpi_processor_power *dev);
>  };
>  
> +extern s8 xen_cpuidle;
>  extern struct cpuidle_governor *cpuidle_current_governor;
> +
> +bool_t cpuidle_using_deep_cstate(void);
>  void cpuidle_disable_deep_cstate(void);
>  
>  extern void cpuidle_wakeup_mwait(cpumask_t *mask);
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com 
> http://lists.xensource.com/xen-devel 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14  7:25 ` Keir Fraser
@ 2011-04-14  7:42   ` Jan Beulich
  2011-04-14  7:50     ` Keir Fraser
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2011-04-14  7:42 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Dan Magenheimer, xen-devel, winston.l.wang

>>> On 14.04.11 at 09:25, Keir Fraser <keir.xen@gmail.com> wrote:
> On 14/04/2011 08:18, "Jan Beulich" <JBeulich@novell.com> wrote:
> 
>> This means suppressing the uses in time_calibration_tsc_rendezvous(),
>> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
>> hang of Linux Dom0 when loading processor.ko on such systems that
>> have support for C states above C1.
> 
> Should your new test be gated on !X86_FEATURE_TSC_RELIABLE? We already

Which test? The write-TSC-probe itself?

> *never* write the TSC when boot_cpu_has(TSC_RELIABLE) -- Dan Magenheimer
> made that change on the assumption that TSCs were globally synced by
> firmware in this case, and us writing one or more TSCs could only ever make
> things worse.

That's not true - we only avoid the writing for TSC sync during boot.
Post-boot bringup of CPUs will write the TSC no matter what, and
cstate_restore_tsc() also has no such gating afaics.

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14  7:42   ` Jan Beulich
@ 2011-04-14  7:50     ` Keir Fraser
  2011-04-14  8:06       ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Keir Fraser @ 2011-04-14  7:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Dan Magenheimer, xen-devel, winston.l.wang

On 14/04/2011 08:42, "Jan Beulich" <JBeulich@novell.com> wrote:

>>>> On 14.04.11 at 09:25, Keir Fraser <keir.xen@gmail.com> wrote:
>> On 14/04/2011 08:18, "Jan Beulich" <JBeulich@novell.com> wrote:
>> 
>>> This means suppressing the uses in time_calibration_tsc_rendezvous(),
>>> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
>>> hang of Linux Dom0 when loading processor.ko on such systems that
>>> have support for C states above C1.
>> 
>> Should your new test be gated on !X86_FEATURE_TSC_RELIABLE? We already
> 
> Which test? The write-TSC-probe itself?
> 
>> *never* write the TSC when boot_cpu_has(TSC_RELIABLE) -- Dan Magenheimer
>> made that change on the assumption that TSCs were globally synced by
>> firmware in this case, and us writing one or more TSCs could only ever make
>> things worse.
> 
> That's not true - we only avoid the writing for TSC sync during boot.
> Post-boot bringup of CPUs will write the TSC no matter what, and

For physically-added CPUs only. Kind of unavoidable, that one: we can only
try to do our best in that case. And let's face it, that probably affects
exactly zero production users of Xen/x86 right now.

> cstate_restore_tsc() also has no such gating afaics.

It is gated on NONSTOP_TSC which is implied by TSC_RELIABLE.

 -- Keir

> Jan
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14  7:50     ` Keir Fraser
@ 2011-04-14  8:06       ` Jan Beulich
  2011-04-14  9:18         ` Keir Fraser
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2011-04-14  8:06 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Dan Magenheimer, xen-devel, winston.l.wang

>>> On 14.04.11 at 09:50, Keir Fraser <keir.xen@gmail.com> wrote:
> On 14/04/2011 08:42, "Jan Beulich" <JBeulich@novell.com> wrote:
> 
>>>>> On 14.04.11 at 09:25, Keir Fraser <keir.xen@gmail.com> wrote:
>>> On 14/04/2011 08:18, "Jan Beulich" <JBeulich@novell.com> wrote:
>>> 
>>>> This means suppressing the uses in time_calibration_tsc_rendezvous(),
>>>> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
>>>> hang of Linux Dom0 when loading processor.ko on such systems that
>>>> have support for C states above C1.
>>> 
>>> Should your new test be gated on !X86_FEATURE_TSC_RELIABLE? We already
>> 
>> Which test? The write-TSC-probe itself?
>> 
>>> *never* write the TSC when boot_cpu_has(TSC_RELIABLE) -- Dan Magenheimer
>>> made that change on the assumption that TSCs were globally synced by
>>> firmware in this case, and us writing one or more TSCs could only ever make
>>> things worse.
>> 
>> That's not true - we only avoid the writing for TSC sync during boot.
>> Post-boot bringup of CPUs will write the TSC no matter what, and
> 
> For physically-added CPUs only. Kind of unavoidable, that one: we can only
> try to do our best in that case. And let's face it, that probably affects
> exactly zero production users of Xen/x86 right now.

That latter part I agree to.

But what are you afraid of? Probing the TSC write shouldn't do any
harm. Additionally, did you read the comment immediately preceding
the probing code? AMD doesn't guarantee the TSC to be writable at
all.

>> cstate_restore_tsc() also has no such gating afaics.
> 
> It is gated on NONSTOP_TSC which is implied by TSC_RELIABLE.

Ah, yes. But (I think) not architecturally, only by virtue of how
code is currently structured. If that changes, we'd be back at a
latent (and quite non-obvious) bug.

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14  8:06       ` Jan Beulich
@ 2011-04-14  9:18         ` Keir Fraser
  2011-04-14 22:41           ` Dan Magenheimer
  0 siblings, 1 reply; 21+ messages in thread
From: Keir Fraser @ 2011-04-14  9:18 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Dan Magenheimer, xen-devel, winston.l.wang

On 14/04/2011 09:06, "Jan Beulich" <JBeulich@novell.com> wrote:

>> For physically-added CPUs only. Kind of unavoidable, that one: we can only
>> try to do our best in that case. And let's face it, that probably affects
>> exactly zero production users of Xen/x86 right now.
> 
> That latter part I agree to.
> 
> But what are you afraid of? Probing the TSC write shouldn't do any
> harm.

You will end up with a BSP TSC value different than what it would have been
if you had not probed. Since you write back a (slightly) stale TSC value.
Which would increase cross-CPU TSC skew if the platform has done a good sync
job at power-on.

Now, do I personally think this much matters? Not really, since I believe
that direct guest TSC access is a huge non-issue. But it is something that
Dan disagreed on, he did a bunch of work on time management, and the code
as-is does try quite hard to avoid writing TSC if at all possible. I don't
think it's a good idea to change this without feedback from Dan, at least.

> Additionally, did you read the comment immediately preceding
> the probing code? AMD doesn't guarantee the TSC to be writable at
> all.
> 
>>> cstate_restore_tsc() also has no such gating afaics.
>> 
>> It is gated on NONSTOP_TSC which is implied by TSC_RELIABLE.
> 
> Ah, yes. But (I think) not architecturally, only by virtue of how
> code is currently structured. If that changes, we'd be back at a
> latent (and quite non-obvious) bug.

Yeah, if we want to continue to try avoiding write_tsc() on TSC_RELIABLE
then we should assert !TSC_RELIABLE on the write_tsc() path in
cstate_tsc_restore().

 -- Keir

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14  7:18 [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits Jan Beulich
  2011-04-14  7:25 ` Keir Fraser
  2011-04-14  7:28 ` Jan Beulich
@ 2011-04-14 16:05 ` Keir Fraser
  2011-04-14 16:28   ` Jan Beulich
  2 siblings, 1 reply; 21+ messages in thread
From: Keir Fraser @ 2011-04-14 16:05 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: winston.l.wang

[-- Attachment #1: Type: text/plain, Size: 6251 bytes --]

On 14/04/2011 08:18, "Jan Beulich" <JBeulich@novell.com> wrote:

> This means suppressing the uses in time_calibration_tsc_rendezvous(),
> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
> hang of Linux Dom0 when loading processor.ko on such systems that
> have support for C states above C1.

I've attached a version which would avoid doing the writability test on
TSC_RELIABLE systems. See what you think.

I also simplified the actual writability check itself. I couldn't figure out
what the benefit of your more complex approach would be. In fact it looked
like it wouldn't work if bit 32 was set already in the TSC counter, as then
you would write back an unmodified TSC (and in fact you would detect the
wrong way round, as you'd see a big delta if the write silently cleared bit
32 (and bits 33-63)). And the final write of tsc+4*delta, wasn't sure what
that was about either! But if you can explain why your test is better I'd be
happy to use it as you originally wrote it.

 -- Keir
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
> 
> --- a/xen/arch/x86/acpi/cpu_idle.c
> +++ b/xen/arch/x86/acpi/cpu_idle.c
> @@ -1098,3 +1098,7 @@ void cpuidle_disable_deep_cstate(void)
>      hpet_disable_legacy_broadcast();
>  }
>  
> +bool_t cpuidle_using_deep_cstate(void)
> +{
> +    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
> +}
> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -41,6 +41,7 @@
>  #include <asm/flushtlb.h>
>  #include <asm/msr.h>
>  #include <asm/mtrr.h>
> +#include <asm/time.h>
>  #include <mach_apic.h>
>  #include <mach_wakecpu.h>
>  #include <smpboot_hooks.h>
> @@ -124,6 +125,12 @@ static void smp_store_cpu_info(int id)
>      ;
>  }
>  
> +/*
> + * TSC's upper 32 bits can't be written in earlier CPUs (before
> + * Prescott), there is no way to resync one AP against BP.
> + */
> +bool_t disable_tsc_sync;
> +
>  static atomic_t tsc_count;
>  static uint64_t tsc_value;
>  static cpumask_t tsc_sync_cpu_mask;
> @@ -132,6 +139,9 @@ static void synchronize_tsc_master(unsig
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> @@ -153,6 +163,9 @@ static void synchronize_tsc_slave(unsign
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -21,6 +21,7 @@
>  #include <xen/smp.h>
>  #include <xen/irq.h>
>  #include <xen/softirq.h>
> +#include <xen/cpuidle.h>
>  #include <xen/symbols.h>
>  #include <xen/keyhandler.h>
>  #include <xen/guest_access.h>
> @@ -1385,6 +1386,9 @@ void init_percpu_time(void)
>  /* Late init function (after all CPUs are booted). */
>  int __init init_xen_time(void)
>  {
> +    u64 tsc, tmp;
> +    const char *what = NULL;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
>      {
>          /*
> @@ -1398,6 +1402,45 @@ int __init init_xen_time(void)
>              setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
>      }
>  
> +    /*
> +     * On certain older Intel CPUs writing the TSC MSR clears the upper
> +     * 32 bits. Obviously we must not use write_tsc() on such CPUs.
> +     *
> +     * Additionally, AMD specifies that being able to write the TSC MSR
> +     * is not an architectural feature (but, other than their manual says,
> +     * also cannot be determined from CPUID bits).
> +     */
> +    rdtscll(tsc);
> +    if ( wrmsr_safe(MSR_IA32_TSC, (u32)tsc) == 0 )
> +    {
> +        u64 tmp2;
> +
> +        rdtscll(tmp2);
> +        write_tsc(tsc | (1ULL << 32));
> +        rdtscll(tmp);
> +        if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) )
> +            what = "only partially";
> +    }
> +    else
> +        what = "not";
> +    if ( what )
> +    {
> +        printk(XENLOG_WARNING "TSC %s writable\n", what);
> +
> +        /* time_calibration_tsc_rendezvous() must not be used */
> +        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
> +            setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
> +
> +        /* cstate_restore_tsc() must not be used (or do nothing) */
> +        if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
> +            cpuidle_disable_deep_cstate();
> +
> +        /* synchronize_tsc_slave() must do nothing */
> +        disable_tsc_sync = 1;
> +    }
> +    else
> +        write_tsc(tsc + 4 * (s32)(tmp - tsc));
> +
>      /* If we have constant-rate TSCs then scale factor can be shared. */
>      if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
>      {
> @@ -1451,7 +1494,7 @@ static int _disable_pit_irq(void(*hpet_b
>       * XXX dom0 may rely on RTC interrupt delivery, so only enable
>       * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
>       */
> -    if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
> +    if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) )
>      {
>          hpet_broadcast_setup();
>          if ( !hpet_broadcast_is_available() )
> --- a/xen/include/asm-x86/setup.h
> +++ b/xen/include/asm-x86/setup.h
> @@ -4,7 +4,6 @@
>  #include <xen/multiboot.h>
>  
>  extern bool_t early_boot;
> -extern s8 xen_cpuidle;
>  extern unsigned long xenheap_initial_phys_start;
>  
>  void init_done(void);
> --- a/xen/include/asm-x86/time.h
> +++ b/xen/include/asm-x86/time.h
> @@ -24,6 +24,8 @@
>  
>  typedef u64 cycles_t;
>  
> +extern bool_t disable_tsc_sync;
> +
>  static inline cycles_t get_cycles(void)
>  {
>      cycles_t c;
> --- a/xen/include/xen/cpuidle.h
> +++ b/xen/include/xen/cpuidle.h
> @@ -85,7 +85,10 @@ struct cpuidle_governor
>      void (*reflect)         (struct acpi_processor_power *dev);
>  };
>  
> +extern s8 xen_cpuidle;
>  extern struct cpuidle_governor *cpuidle_current_governor;
> +
> +bool_t cpuidle_using_deep_cstate(void);
>  void cpuidle_disable_deep_cstate(void);
>  
>  extern void cpuidle_wakeup_mwait(cpumask_t *mask);
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel


[-- Attachment #2: 00-tsc-check --]
[-- Type: application/octet-stream, Size: 5451 bytes --]

diff -r b5165fb66b56 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c	Thu Apr 14 16:58:41 2011 +0100
@@ -1098,3 +1098,7 @@
     hpet_disable_legacy_broadcast();
 }
 
+bool_t cpuidle_using_deep_cstate(void)
+{
+    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
+}
diff -r b5165fb66b56 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/arch/x86/smpboot.c	Thu Apr 14 16:58:41 2011 +0100
@@ -41,6 +41,7 @@
 #include <asm/flushtlb.h>
 #include <asm/msr.h>
 #include <asm/mtrr.h>
+#include <asm/time.h>
 #include <mach_apic.h>
 #include <mach_wakecpu.h>
 #include <smpboot_hooks.h>
@@ -124,6 +125,12 @@
     ;
 }
 
+/*
+ * TSC's upper 32 bits can't be written in earlier CPUs (before
+ * Prescott), there is no way to resync one AP against BP.
+ */
+bool_t disable_tsc_sync;
+
 static atomic_t tsc_count;
 static uint64_t tsc_value;
 static cpumask_t tsc_sync_cpu_mask;
@@ -132,6 +139,9 @@
 {
     unsigned int i;
 
+    if ( disable_tsc_sync )
+        return;
+
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
          !cpu_isset(slave, tsc_sync_cpu_mask) )
         return;
@@ -153,6 +163,9 @@
 {
     unsigned int i;
 
+    if ( disable_tsc_sync )
+        return;
+
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
          !cpu_isset(slave, tsc_sync_cpu_mask) )
         return;
diff -r b5165fb66b56 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/arch/x86/time.c	Thu Apr 14 16:58:41 2011 +0100
@@ -21,6 +21,7 @@
 #include <xen/smp.h>
 #include <xen/irq.h>
 #include <xen/softirq.h>
+#include <xen/cpuidle.h>
 #include <xen/symbols.h>
 #include <xen/keyhandler.h>
 #include <xen/guest_access.h>
@@ -680,6 +681,8 @@
     if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
         return;
 
+    ASSERT(boot_cpu_has(X86_FEATURE_TSC_RELIABLE));
+
     write_tsc(stime2tsc(read_platform_stime()));
 }
 
@@ -1382,6 +1385,56 @@
     }
 }
 
+/*
+ * On certain older Intel CPUs writing the TSC MSR clears the upper 32 bits. 
+ * Obviously we must not use write_tsc() on such CPUs.
+ *
+ * Additionally, AMD specifies that being able to write the TSC MSR is not an 
+ * architectural feature (but, other than their manual says, also cannot be 
+ * determined from CPUID bits).
+ */
+static void __init tsc_check_writability(void)
+{
+    const char *what = NULL;
+
+    /*
+     * If all CPUs are reported as synchronised and in sync, we never write
+     * the TSCs (except unavoidably, when a CPU is physically hot-plugged).
+     * Hence testing for writability is pointless and even harmful.
+     */
+    if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+        return;
+
+    if ( wrmsr_safe(MSR_IA32_TSC, 0) == 0 )
+    {
+        uint64_t tmp, tmp2;
+
+        rdtscll(tmp2);
+        write_tsc(1ULL << 32);
+        rdtscll(tmp);
+        if ( ABS((s64)tmp - (s64)tmp2) < (1ULL << 31) )
+            what = "only partially";
+    }
+    else
+        what = "not";
+
+    /* Nothing to do if the TSC is fully writable. */
+    if ( !what )
+        return;
+
+    printk(XENLOG_WARNING "TSC %s writable\n", what);
+
+    /* time_calibration_tsc_rendezvous() must not be used */
+    setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
+
+    /* cstate_restore_tsc() must not be used (or do nothing) */
+    if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
+        cpuidle_disable_deep_cstate();
+
+    /* synchronize_tsc_slave() must do nothing */
+    disable_tsc_sync = 1;
+}
+
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
@@ -1398,6 +1451,8 @@
             setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
     }
 
+    tsc_check_writability();
+
     /* If we have constant-rate TSCs then scale factor can be shared. */
     if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
     {
@@ -1451,7 +1506,7 @@
      * XXX dom0 may rely on RTC interrupt delivery, so only enable
      * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
      */
-    if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
+    if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) )
     {
         hpet_broadcast_setup();
         if ( !hpet_broadcast_is_available() )
diff -r b5165fb66b56 xen/include/asm-x86/setup.h
--- a/xen/include/asm-x86/setup.h	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/include/asm-x86/setup.h	Thu Apr 14 16:58:41 2011 +0100
@@ -4,7 +4,6 @@
 #include <xen/multiboot.h>
 
 extern bool_t early_boot;
-extern s8 xen_cpuidle;
 extern unsigned long xenheap_initial_phys_start;
 
 void init_done(void);
diff -r b5165fb66b56 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/include/asm-x86/time.h	Thu Apr 14 16:58:41 2011 +0100
@@ -24,6 +24,8 @@
 
 typedef u64 cycles_t;
 
+extern bool_t disable_tsc_sync;
+
 static inline cycles_t get_cycles(void)
 {
     cycles_t c;
diff -r b5165fb66b56 xen/include/xen/cpuidle.h
--- a/xen/include/xen/cpuidle.h	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/include/xen/cpuidle.h	Thu Apr 14 16:58:41 2011 +0100
@@ -85,7 +85,10 @@
     void (*reflect)         (struct acpi_processor_power *dev);
 };
 
+extern s8 xen_cpuidle;
 extern struct cpuidle_governor *cpuidle_current_governor;
+
+bool_t cpuidle_using_deep_cstate(void);
 void cpuidle_disable_deep_cstate(void);
 
 extern void cpuidle_wakeup_mwait(cpumask_t *mask);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14 16:05 ` Keir Fraser
@ 2011-04-14 16:28   ` Jan Beulich
  2011-04-14 16:48     ` Keir Fraser
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2011-04-14 16:28 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, winston.l.wang

>>> On 14.04.11 at 18:05, Keir Fraser <keir@xen.org> wrote:
> On 14/04/2011 08:18, "Jan Beulich" <JBeulich@novell.com> wrote:
> 
>> This means suppressing the uses in time_calibration_tsc_rendezvous(),
>> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
>> hang of Linux Dom0 when loading processor.ko on such systems that
>> have support for C states above C1.
> 
> I've attached a version which would avoid doing the writability test on
> TSC_RELIABLE systems. See what you think.

Looks reasonable.

> I also simplified the actual writability check itself. I couldn't figure out
> what the benefit of your more complex approach would be. In fact it looked
> like it wouldn't work if bit 32 was set already in the TSC counter, as then
> you would write back an unmodified TSC (and in fact you would detect the
> wrong way round, as you'd see a big delta if the write silently cleared bit
> 32 (and bits 33-63)). And the final write of tsc+4*delta, wasn't sure what
> that was about either! But if you can explain why your test is better I'd be
> happy to use it as you originally wrote it.

So you were concerned about getting the TSC slightly off, and now
you flush it to zero, without any attempt to restore the original
value?

As to my original test being broken - I don't think that was the case:
The first write used (u32)tsc as the input, so the two writes, if
happening completely, would be certain to be apart by
approximately 1<<32 (or more, depending on what was in the
upper half originally). The only case not handled was if the TSC
overflowed 64 bits during the process - I considered this case
hypothetical only.

The final write of tsc+4*delta was basically an attempt to restore
the value that would have been there if we didn't fiddle with it.
The factor 4 was sort of invented, on the basis that the delta was
between one write and an immediate read back, with there being
a total of four such windows (read->write or write->read). As
one wouldn't get it precise anyway, the number seemed fine to
me, though just writing back the original values probably wouldn't
have been much worse.

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14 16:28   ` Jan Beulich
@ 2011-04-14 16:48     ` Keir Fraser
  2011-04-14 18:33       ` Wang, Winston L
  2011-04-15  7:08       ` Jan Beulich
  0 siblings, 2 replies; 21+ messages in thread
From: Keir Fraser @ 2011-04-14 16:48 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, winston.l.wang

On 14/04/2011 17:28, "Jan Beulich" <JBeulich@novell.com> wrote:

> 
>> I also simplified the actual writability check itself. I couldn't figure out
>> what the benefit of your more complex approach would be. In fact it looked
>> like it wouldn't work if bit 32 was set already in the TSC counter, as then
>> you would write back an unmodified TSC (and in fact you would detect the
>> wrong way round, as you'd see a big delta if the write silently cleared bit
>> 32 (and bits 33-63)). And the final write of tsc+4*delta, wasn't sure what
>> that was about either! But if you can explain why your test is better I'd be
>> happy to use it as you originally wrote it.
> 
> So you were concerned about getting the TSC slightly off, and now
> you flush it to zero, without any attempt to restore the original
> value?

Haha, well it doesn't matter too much if we sync TSCs as we bring them
online anyway. But I agree it makes sense to try if we are only able to
write the lower 32 bits -- we can at least hope the write test happens while
TSC counter is a 32-bit value anyway, and at least we've had a best-effort
attempt to keep TSCs in sync.

> As to my original test being broken - I don't think that was the case:
> The first write used (u32)tsc as the input, so the two writes, if
> happening completely, would be certain to be apart by
> approximately 1<<32 (or more, depending on what was in the
> upper half originally).

Ah yes, I missed the importance of the (u32)tsc write. Fair enough, your way
is better. :-)

> The only case not handled was if the TSC
> overflowed 64 bits during the process - I considered this case
> hypothetical only.
> 
> The final write of tsc+4*delta was basically an attempt to restore
> the value that would have been there if we didn't fiddle with it.

But the write is actually tsc + 4*(s32)(tmp-tsc), and tmp has 1U<<32 ORed
into it (because it was read after your second write to the TSC. Perhaps we
should just write back the full original tsc and call that good enough?

 -- Keir


> The factor 4 was sort of invented, on the basis that the delta was
> between one write and an immediate read back, with there being
> a total of four such windows (read->write or write->read). As
> one wouldn't get it precise anyway, the number seemed fine to
> me, though just writing back the original values probably wouldn't
> have been much worse.

> Jan
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14 16:48     ` Keir Fraser
@ 2011-04-14 18:33       ` Wang, Winston L
  2011-04-14 21:06         ` Keir Fraser
  2011-04-15  7:08       ` Jan Beulich
  1 sibling, 1 reply; 21+ messages in thread
From: Wang, Winston L @ 2011-04-14 18:33 UTC (permalink / raw)
  To: Keir Fraser, Jan Beulich
  Cc: Liu, Jinsong, xen-devel, KeirFraser, Jiang, Yunhong, Dugger,
	Donald D, Li, Xin

Jan and Keir,

Great efforts for turning the test code re-validate the processor with (X86_FEATURE_TSC_RELIABLE) set. We know for sure that the broken old processors any write TSC will Zero the upper 32 bit. So can we move this test code as early as possible, say immediately after the early Processor init code where checking CPU id and set X86_FEATURE_TSC_RELIABLE? 

Thanks,

Winston,

> -----Original Message-----
> From: Keir Fraser [mailto:keir.xen@gmail.com]
> Sent: Thursday, April 14, 2011 9:48 AM
> To: Jan Beulich
> 
> On 14/04/2011 17:28, "Jan Beulich" <JBeulich@novell.com> wrote:
> 
> >
> >> I also simplified the actual writability check itself. I couldn't
> figure out
> >> what the benefit of your more complex approach would be. In fact it
> looked
> >> like it wouldn't work if bit 32 was set already in the TSC counter,
> as then
> >> you would write back an unmodified TSC (and in fact you would detect
> the
> >> wrong way round, as you'd see a big delta if the write silently
> cleared bit
> >> 32 (and bits 33-63)). And the final write of tsc+4*delta, wasn't
> sure what
> >> that was about either! But if you can explain why your test is
> better I'd be
> >> happy to use it as you originally wrote it.
> >
> > So you were concerned about getting the TSC slightly off, and now
> > you flush it to zero, without any attempt to restore the original
> > value?
> 
> Haha, well it doesn't matter too much if we sync TSCs as we bring them
> online anyway. But I agree it makes sense to try if we are only able to
> write the lower 32 bits -- we can at least hope the write test happens
> while
> TSC counter is a 32-bit value anyway, and at least we've had a best-
> effort
> attempt to keep TSCs in sync.
> 
> > As to my original test being broken - I don't think that was the case:
> > The first write used (u32)tsc as the input, so the two writes, if
> > happening completely, would be certain to be apart by
> > approximately 1<<32 (or more, depending on what was in the
> > upper half originally).
> 
> Ah yes, I missed the importance of the (u32)tsc write. Fair enough,
> your way
> is better. :-)
> 
> > The only case not handled was if the TSC
> > overflowed 64 bits during the process - I considered this case
> > hypothetical only.
> >
> > The final write of tsc+4*delta was basically an attempt to restore
> > the value that would have been there if we didn't fiddle with it.
> 
> But the write is actually tsc + 4*(s32)(tmp-tsc), and tmp has 1U<<32
> ORed
> into it (because it was read after your second write to the TSC.
> Perhaps we
> should just write back the full original tsc and call that good enough?
> 
>  -- Keir
> 
> 
> > The factor 4 was sort of invented, on the basis that the delta was
> > between one write and an immediate read back, with there being
> > a total of four such windows (read->write or write->read). As
> > one wouldn't get it precise anyway, the number seemed fine to
> > me, though just writing back the original values probably wouldn't
> > have been much worse.
> 
> > Jan
> >
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14 18:33       ` Wang, Winston L
@ 2011-04-14 21:06         ` Keir Fraser
  2011-04-14 21:37           ` Wang, Winston L
  2011-04-15  7:06           ` Jan Beulich
  0 siblings, 2 replies; 21+ messages in thread
From: Keir Fraser @ 2011-04-14 21:06 UTC (permalink / raw)
  To: Wang, Winston L, Jan Beulich
  Cc: Liu, Jinsong, xen-devel, KeirFraser, Jiang, Yunhong, Dugger,
	Donald D, Li, Xin

On 14/04/2011 19:33, "Wang, Winston L" <winston.l.wang@intel.com> wrote:

> Jan and Keir,
> 
> Great efforts for turning the test code re-validate the processor with
> (X86_FEATURE_TSC_RELIABLE) set. We know for sure that the broken old
> processors any write TSC will Zero the upper 32 bit. So can we move this test
> code as early as possible, say immediately after the early Processor init code
> where checking CPU id and set X86_FEATURE_TSC_RELIABLE?

For our meaning of TSC_RELIABLE, there's no need for its setting to rely on
TSC 64-bit writability. I think leaving it where it is in init_xen_time() is
probably fine.

 -- Keir

> Thanks,
> 
> Winston,
> 
>> -----Original Message-----
>> From: Keir Fraser [mailto:keir.xen@gmail.com]
>> Sent: Thursday, April 14, 2011 9:48 AM
>> To: Jan Beulich
>> 
>> On 14/04/2011 17:28, "Jan Beulich" <JBeulich@novell.com> wrote:
>> 
>>> 
>>>> I also simplified the actual writability check itself. I couldn't
>> figure out
>>>> what the benefit of your more complex approach would be. In fact it
>> looked
>>>> like it wouldn't work if bit 32 was set already in the TSC counter,
>> as then
>>>> you would write back an unmodified TSC (and in fact you would detect
>> the
>>>> wrong way round, as you'd see a big delta if the write silently
>> cleared bit
>>>> 32 (and bits 33-63)). And the final write of tsc+4*delta, wasn't
>> sure what
>>>> that was about either! But if you can explain why your test is
>> better I'd be
>>>> happy to use it as you originally wrote it.
>>> 
>>> So you were concerned about getting the TSC slightly off, and now
>>> you flush it to zero, without any attempt to restore the original
>>> value?
>> 
>> Haha, well it doesn't matter too much if we sync TSCs as we bring them
>> online anyway. But I agree it makes sense to try if we are only able to
>> write the lower 32 bits -- we can at least hope the write test happens
>> while
>> TSC counter is a 32-bit value anyway, and at least we've had a best-
>> effort
>> attempt to keep TSCs in sync.
>> 
>>> As to my original test being broken - I don't think that was the case:
>>> The first write used (u32)tsc as the input, so the two writes, if
>>> happening completely, would be certain to be apart by
>>> approximately 1<<32 (or more, depending on what was in the
>>> upper half originally).
>> 
>> Ah yes, I missed the importance of the (u32)tsc write. Fair enough,
>> your way
>> is better. :-)
>> 
>>> The only case not handled was if the TSC
>>> overflowed 64 bits during the process - I considered this case
>>> hypothetical only.
>>> 
>>> The final write of tsc+4*delta was basically an attempt to restore
>>> the value that would have been there if we didn't fiddle with it.
>> 
>> But the write is actually tsc + 4*(s32)(tmp-tsc), and tmp has 1U<<32
>> ORed
>> into it (because it was read after your second write to the TSC.
>> Perhaps we
>> should just write back the full original tsc and call that good enough?
>> 
>>  -- Keir
>> 
>> 
>>> The factor 4 was sort of invented, on the basis that the delta was
>>> between one write and an immediate read back, with there being
>>> a total of four such windows (read->write or write->read). As
>>> one wouldn't get it precise anyway, the number seemed fine to
>>> me, though just writing back the original values probably wouldn't
>>> have been much worse.
>> 
>>> Jan
>>> 
>> 
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14 21:06         ` Keir Fraser
@ 2011-04-14 21:37           ` Wang, Winston L
  2011-04-15  7:06           ` Jan Beulich
  1 sibling, 0 replies; 21+ messages in thread
From: Wang, Winston L @ 2011-04-14 21:37 UTC (permalink / raw)
  To: Keir Fraser, Jan Beulich
  Cc: Liu, Jinsong, xen-devel, KeirFraser, Jiang, Yunhong, Dugger,
	Donald D, Li, Xin

> -----Original Message-----
> From: Keir Fraser [mailto:keir.xen@gmail.com]
> Sent: Thursday, April 14, 2011 2:06 PM
> On 14/04/2011 19:33, "Wang, Winston L" <winston.l.wang@intel.com> wrote:
> 
> > Jan and Keir,
> >
> > Great efforts for turning the test code re-validate the processor
> with
> > (X86_FEATURE_TSC_RELIABLE) set. We know for sure that the broken old
> > processors any write TSC will Zero the upper 32 bit. So can we move
> this test
> > code as early as possible, say immediately after the early Processor
> init code
> > where checking CPU id and set X86_FEATURE_TSC_RELIABLE?
> 
> For our meaning of TSC_RELIABLE, there's no need for its setting to
> rely on
> TSC 64-bit writability. I think leaving it where it is in
> init_xen_time() is probably fine.

It's fine with me.

Winston,

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14  9:18         ` Keir Fraser
@ 2011-04-14 22:41           ` Dan Magenheimer
  2011-04-15  6:40             ` Keir Fraser
  0 siblings, 1 reply; 21+ messages in thread
From: Dan Magenheimer @ 2011-04-14 22:41 UTC (permalink / raw)
  To: Keir Fraser, Jan Beulich; +Cc: xen-devel, winston.l.wang



> -----Original Message-----
> From: Keir Fraser [mailto:keir.xen@gmail.com]
> Sent: Thursday, April 14, 2011 3:18 AM
> To: Jan Beulich
> Cc: winston.l.wang; xen-devel@lists.xensource.com; Dan Magenheimer
> Subject: Re: [Xen-devel] [PATCH] x86: don't write_tsc() non-zero values
> on CPUs updating only the lower 32 bits
> 
> On 14/04/2011 09:06, "Jan Beulich" <JBeulich@novell.com> wrote:
> 
> >> For physically-added CPUs only. Kind of unavoidable, that one: we
> can only
> >> try to do our best in that case. And let's face it, that probably
> affects
> >> exactly zero production users of Xen/x86 right now.
> >
> > That latter part I agree to.
> >
> > But what are you afraid of? Probing the TSC write shouldn't do any
> > harm.
> 
> You will end up with a BSP TSC value different than what it would have
> been
> if you had not probed. Since you write back a (slightly) stale TSC
> value.
> Which would increase cross-CPU TSC skew if the platform has done a good
> sync
> job at power-on.
> 
> Now, do I personally think this much matters? Not really, since I
> believe
> that direct guest TSC access is a huge non-issue. But it is something
> that
> Dan disagreed on, he did a bunch of work on time management, and the
> code
> as-is does try quite hard to avoid writing TSC if at all possible. I
> don't
> think it's a good idea to change this without feedback from Dan, at
> least.

My feedback is don't break what is fixed ;-)

The CPU vendors are trying REALLY hard to make TSC a very fast
synchronized-across-all-CPUs clocksource and Linux now does use
it that way if TSC_RELIABLE is set.

While it's not perfect, it's close enough as long as your recent
vintage machine doesn't have hot-plug CPUs or buggy firmware.
 
> > Additionally, did you read the comment immediately preceding
> > the probing code? AMD doesn't guarantee the TSC to be writable at
> > all.
> >
> >>> cstate_restore_tsc() also has no such gating afaics.
> >>
> >> It is gated on NONSTOP_TSC which is implied by TSC_RELIABLE.
> >
> > Ah, yes. But (I think) not architecturally, only by virtue of how
> > code is currently structured. If that changes, we'd be back at a
> > latent (and quite non-obvious) bug.
> 
> Yeah, if we want to continue to try avoiding write_tsc() on
> TSC_RELIABLE
> then we should assert !TSC_RELIABLE on the write_tsc() path in
> cstate_tsc_restore().

Agreed.  In fact, maybe it should be asserted in write_tsc?

Dan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14 22:41           ` Dan Magenheimer
@ 2011-04-15  6:40             ` Keir Fraser
  2011-04-15 14:34               ` Dan Magenheimer
  0 siblings, 1 reply; 21+ messages in thread
From: Keir Fraser @ 2011-04-15  6:40 UTC (permalink / raw)
  To: Dan Magenheimer, Jan Beulich; +Cc: xen-devel, winston.l.wang

On 14/04/2011 23:41, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

>> Yeah, if we want to continue to try avoiding write_tsc() on
>> TSC_RELIABLE
>> then we should assert !TSC_RELIABLE on the write_tsc() path in
>> cstate_tsc_restore().
> 
> Agreed.  In fact, maybe it should be asserted in write_tsc?

We still write_tsc on CPU physical hot-add.

 -- Keir

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14 21:06         ` Keir Fraser
  2011-04-14 21:37           ` Wang, Winston L
@ 2011-04-15  7:06           ` Jan Beulich
  1 sibling, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2011-04-15  7:06 UTC (permalink / raw)
  To: Keir Fraser, Winston L Wang
  Cc: Jinsong Liu, xen-devel, KeirFraser, Yunhong Jiang,
	Donald D Dugger, Xin Li

>>> On 14.04.11 at 23:06, Keir Fraser <keir.xen@gmail.com> wrote:
> On 14/04/2011 19:33, "Wang, Winston L" <winston.l.wang@intel.com> wrote:
>> Great efforts for turning the test code re-validate the processor with
>> (X86_FEATURE_TSC_RELIABLE) set. We know for sure that the broken old
>> processors any write TSC will Zero the upper 32 bit. So can we move this 
> test
>> code as early as possible, say immediately after the early Processor init 
> code
>> where checking CPU id and set X86_FEATURE_TSC_RELIABLE?
> 
> For our meaning of TSC_RELIABLE, there's no need for its setting to rely on
> TSC 64-bit writability. I think leaving it where it is in init_xen_time() is
> probably fine.

So do I think - I already tried to carefully pick the place where this
is best done.

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-14 16:48     ` Keir Fraser
  2011-04-14 18:33       ` Wang, Winston L
@ 2011-04-15  7:08       ` Jan Beulich
  2011-04-15  7:37         ` Keir Fraser
  1 sibling, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2011-04-15  7:08 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, winston.l.wang

>>> On 14.04.11 at 18:48, Keir Fraser <keir.xen@gmail.com> wrote:
> On 14/04/2011 17:28, "Jan Beulich" <JBeulich@novell.com> wrote:
>> The only case not handled was if the TSC
>> overflowed 64 bits during the process - I considered this case
>> hypothetical only.
>> 
>> The final write of tsc+4*delta was basically an attempt to restore
>> the value that would have been there if we didn't fiddle with it.
> 
> But the write is actually tsc + 4*(s32)(tmp-tsc), and tmp has 1U<<32 ORed
> into it (because it was read after your second write to the TSC. Perhaps we
> should just write back the full original tsc and call that good enough?

Again, note the (s32) cast.

Jan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-15  7:08       ` Jan Beulich
@ 2011-04-15  7:37         ` Keir Fraser
  2011-04-15 14:49           ` Wang, Winston L
  0 siblings, 1 reply; 21+ messages in thread
From: Keir Fraser @ 2011-04-15  7:37 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, winston.l.wang

On 15/04/2011 08:08, "Jan Beulich" <JBeulich@novell.com> wrote:

>> But the write is actually tsc + 4*(s32)(tmp-tsc), and tmp has 1U<<32 ORed
>> into it (because it was read after your second write to the TSC. Perhaps we
>> should just write back the full original tsc and call that good enough?
> 
> Again, note the (s32) cast.

Oh yes. Still the 4x is weird, and on this path (!TSC_RELIABLE, TSC is fully
writable) we will sync all AP TSCs as they come up anyway. So writing back
the original TSC value is good enough, as far as this matters at all (which
it probably doesn't).

 -- Keir

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-15  6:40             ` Keir Fraser
@ 2011-04-15 14:34               ` Dan Magenheimer
  2011-04-15 17:28                 ` Keir Fraser
  0 siblings, 1 reply; 21+ messages in thread
From: Dan Magenheimer @ 2011-04-15 14:34 UTC (permalink / raw)
  To: Keir Fraser, Jan Beulich; +Cc: xen-devel, winston.l.wang

> From: Keir Fraser [mailto:keir.xen@gmail.com]
> Sent: Friday, April 15, 2011 12:40 AM
> To: Dan Magenheimer; Jan Beulich
> Cc: winston.l.wang; xen-devel@lists.xensource.com
> Subject: Re: [Xen-devel] [PATCH] x86: don't write_tsc() non-zero values
> on CPUs updating only the lower 32 bits
> 
> On 14/04/2011 23:41, "Dan Magenheimer" <dan.magenheimer@oracle.com>
> wrote:
> 
> >> Yeah, if we want to continue to try avoiding write_tsc() on
> >> TSC_RELIABLE
> >> then we should assert !TSC_RELIABLE on the write_tsc() path in
> >> cstate_tsc_restore().
> >
> > Agreed.  In fact, maybe it should be asserted in write_tsc?
> 
> We still write_tsc on CPU physical hot-add.

Hmmm... IIRC the testing that Intel was doing for hot-add was
not for processors that were actually electrically hot-plugged
but only for processors that were powered-on at the same
time as all other processors but left offline until needed
(e.g. for capacity-on-demand).  For this situation, writing
to tsc is still the wrong approach.  I don't think we finished
the discussion about electrically hot-plugged processors
because they didn't exist... don't know if they do yet either.
IIRC I had proposed an unnamed boot parameter that said
"this machine may add unsynchronized processors post-boot"
and disallow hot-add processors if not specified (or if
not specified AND a run-time check of a hot-add processor
shows non-synchronization).

Dan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-15  7:37         ` Keir Fraser
@ 2011-04-15 14:49           ` Wang, Winston L
  0 siblings, 0 replies; 21+ messages in thread
From: Wang, Winston L @ 2011-04-15 14:49 UTC (permalink / raw)
  To: Keir Fraser, Jan Beulich
  Cc: Liu, Jinsong, Dan Magenheimer, xen-devel, KeirFraser, Jiang,
	Yunhong, Dugger, Donald D, Li, Xin

> -----Original Message-----
> From: Keir Fraser [mailto:keir.xen@gmail.com]
> Sent: Friday, April 15, 2011 12:37 AM
> On 15/04/2011 08:08, "Jan Beulich" <JBeulich@novell.com> wrote:
> 
> >> But the write is actually tsc + 4*(s32)(tmp-tsc), and tmp has 1U<<32
> ORed
> >> into it (because it was read after your second write to the TSC.
> Perhaps we
> >> should just write back the full original tsc and call that good
> enough?
> >
> > Again, note the (s32) cast.
> 
> Oh yes. Still the 4x is weird, and on this path (!TSC_RELIABLE, TSC is
> fully
> writable) we will sync all AP TSCs as they come up anyway. So writing
> back
> the original TSC value is good enough, as far as this matters at all
> (which
> it probably doesn't).
Agree, and new processor use for hot add should be upper 32 bit TSC is writeable, I don't think anyone want use those old ones (old model CPU ID before family [0FH]) which do not support up32 bit TSC write for hot add. 

Winston,

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
  2011-04-15 14:34               ` Dan Magenheimer
@ 2011-04-15 17:28                 ` Keir Fraser
  0 siblings, 0 replies; 21+ messages in thread
From: Keir Fraser @ 2011-04-15 17:28 UTC (permalink / raw)
  To: Dan Magenheimer, Jan Beulich; +Cc: xen-devel, winston.l.wang

On 15/04/2011 15:34, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:

>>> Agreed.  In fact, maybe it should be asserted in write_tsc?
>> 
>> We still write_tsc on CPU physical hot-add.
> 
> Hmmm... IIRC the testing that Intel was doing for hot-add was
> not for processors that were actually electrically hot-plugged
> but only for processors that were powered-on at the same
> time as all other processors but left offline until needed
> (e.g. for capacity-on-demand).  For this situation, writing
> to tsc is still the wrong approach.  I don't think we finished
> the discussion about electrically hot-plugged processors
> because they didn't exist... don't know if they do yet either.
> IIRC I had proposed an unnamed boot parameter that said
> "this machine may add unsynchronized processors post-boot"
> and disallow hot-add processors if not specified (or if
> not specified AND a run-time check of a hot-add processor
> shows non-synchronization).

Well, I think the case I'm thinking of is electrical hot-plug. Not sure.
Either way I doubt anyone is actually using the feature.

 -- Keir

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2011-04-15 17:28 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-14  7:18 [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits Jan Beulich
2011-04-14  7:25 ` Keir Fraser
2011-04-14  7:42   ` Jan Beulich
2011-04-14  7:50     ` Keir Fraser
2011-04-14  8:06       ` Jan Beulich
2011-04-14  9:18         ` Keir Fraser
2011-04-14 22:41           ` Dan Magenheimer
2011-04-15  6:40             ` Keir Fraser
2011-04-15 14:34               ` Dan Magenheimer
2011-04-15 17:28                 ` Keir Fraser
2011-04-14  7:28 ` Jan Beulich
2011-04-14 16:05 ` Keir Fraser
2011-04-14 16:28   ` Jan Beulich
2011-04-14 16:48     ` Keir Fraser
2011-04-14 18:33       ` Wang, Winston L
2011-04-14 21:06         ` Keir Fraser
2011-04-14 21:37           ` Wang, Winston L
2011-04-15  7:06           ` Jan Beulich
2011-04-15  7:08       ` Jan Beulich
2011-04-15  7:37         ` Keir Fraser
2011-04-15 14:49           ` Wang, Winston L

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.