All of lore.kernel.org
 help / color / mirror / Atom feed
From: Keir Fraser <keir@xen.org>
To: Jan Beulich <JBeulich@novell.com>,
	"xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>
Cc: "winston.l.wang" <winston.l.wang@intel.com>
Subject: Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
Date: Thu, 14 Apr 2011 17:05:11 +0100	[thread overview]
Message-ID: <C9CCD9C9.2CAC6%keir@xen.org> (raw)
In-Reply-To: <4DA6BBD1020000780003B865@vpn.id2.novell.com>

[-- Attachment #1: Type: text/plain, Size: 6251 bytes --]

On 14/04/2011 08:18, "Jan Beulich" <JBeulich@novell.com> wrote:

> This means suppressing the uses in time_calibration_tsc_rendezvous(),
> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
> hang of Linux Dom0 when loading processor.ko on such systems that
> have support for C states above C1.

I've attached a version which would avoid doing the writability test on
TSC_RELIABLE systems. See what you think.

I also simplified the actual writability check itself. I couldn't figure out
what the benefit of your more complex approach would be. In fact it looked
like it wouldn't work if bit 32 was set already in the TSC counter, as then
you would write back an unmodified TSC (and in fact you would detect the
wrong way round, as you'd see a big delta if the write silently cleared bit
32 (and bits 33-63)). And the final write of tsc+4*delta, wasn't sure what
that was about either! But if you can explain why your test is better I'd be
happy to use it as you originally wrote it.

 -- Keir
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
> 
> --- a/xen/arch/x86/acpi/cpu_idle.c
> +++ b/xen/arch/x86/acpi/cpu_idle.c
> @@ -1098,3 +1098,7 @@ void cpuidle_disable_deep_cstate(void)
>      hpet_disable_legacy_broadcast();
>  }
>  
> +bool_t cpuidle_using_deep_cstate(void)
> +{
> +    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
> +}
> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -41,6 +41,7 @@
>  #include <asm/flushtlb.h>
>  #include <asm/msr.h>
>  #include <asm/mtrr.h>
> +#include <asm/time.h>
>  #include <mach_apic.h>
>  #include <mach_wakecpu.h>
>  #include <smpboot_hooks.h>
> @@ -124,6 +125,12 @@ static void smp_store_cpu_info(int id)
>      ;
>  }
>  
> +/*
> + * TSC's upper 32 bits can't be written in earlier CPUs (before
> + * Prescott), there is no way to resync one AP against BP.
> + */
> +bool_t disable_tsc_sync;
> +
>  static atomic_t tsc_count;
>  static uint64_t tsc_value;
>  static cpumask_t tsc_sync_cpu_mask;
> @@ -132,6 +139,9 @@ static void synchronize_tsc_master(unsig
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> @@ -153,6 +163,9 @@ static void synchronize_tsc_slave(unsign
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -21,6 +21,7 @@
>  #include <xen/smp.h>
>  #include <xen/irq.h>
>  #include <xen/softirq.h>
> +#include <xen/cpuidle.h>
>  #include <xen/symbols.h>
>  #include <xen/keyhandler.h>
>  #include <xen/guest_access.h>
> @@ -1385,6 +1386,9 @@ void init_percpu_time(void)
>  /* Late init function (after all CPUs are booted). */
>  int __init init_xen_time(void)
>  {
> +    u64 tsc, tmp;
> +    const char *what = NULL;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
>      {
>          /*
> @@ -1398,6 +1402,45 @@ int __init init_xen_time(void)
>              setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
>      }
>  
> +    /*
> +     * On certain older Intel CPUs writing the TSC MSR clears the upper
> +     * 32 bits. Obviously we must not use write_tsc() on such CPUs.
> +     *
> +     * Additionally, AMD specifies that being able to write the TSC MSR
> +     * is not an architectural feature (but, other than their manual says,
> +     * also cannot be determined from CPUID bits).
> +     */
> +    rdtscll(tsc);
> +    if ( wrmsr_safe(MSR_IA32_TSC, (u32)tsc) == 0 )
> +    {
> +        u64 tmp2;
> +
> +        rdtscll(tmp2);
> +        write_tsc(tsc | (1ULL << 32));
> +        rdtscll(tmp);
> +        if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) )
> +            what = "only partially";
> +    }
> +    else
> +        what = "not";
> +    if ( what )
> +    {
> +        printk(XENLOG_WARNING "TSC %s writable\n", what);
> +
> +        /* time_calibration_tsc_rendezvous() must not be used */
> +        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
> +            setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
> +
> +        /* cstate_restore_tsc() must not be used (or do nothing) */
> +        if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
> +            cpuidle_disable_deep_cstate();
> +
> +        /* synchronize_tsc_slave() must do nothing */
> +        disable_tsc_sync = 1;
> +    }
> +    else
> +        write_tsc(tsc + 4 * (s32)(tmp - tsc));
> +
>      /* If we have constant-rate TSCs then scale factor can be shared. */
>      if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
>      {
> @@ -1451,7 +1494,7 @@ static int _disable_pit_irq(void(*hpet_b
>       * XXX dom0 may rely on RTC interrupt delivery, so only enable
>       * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
>       */
> -    if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
> +    if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) )
>      {
>          hpet_broadcast_setup();
>          if ( !hpet_broadcast_is_available() )
> --- a/xen/include/asm-x86/setup.h
> +++ b/xen/include/asm-x86/setup.h
> @@ -4,7 +4,6 @@
>  #include <xen/multiboot.h>
>  
>  extern bool_t early_boot;
> -extern s8 xen_cpuidle;
>  extern unsigned long xenheap_initial_phys_start;
>  
>  void init_done(void);
> --- a/xen/include/asm-x86/time.h
> +++ b/xen/include/asm-x86/time.h
> @@ -24,6 +24,8 @@
>  
>  typedef u64 cycles_t;
>  
> +extern bool_t disable_tsc_sync;
> +
>  static inline cycles_t get_cycles(void)
>  {
>      cycles_t c;
> --- a/xen/include/xen/cpuidle.h
> +++ b/xen/include/xen/cpuidle.h
> @@ -85,7 +85,10 @@ struct cpuidle_governor
>      void (*reflect)         (struct acpi_processor_power *dev);
>  };
>  
> +extern s8 xen_cpuidle;
>  extern struct cpuidle_governor *cpuidle_current_governor;
> +
> +bool_t cpuidle_using_deep_cstate(void);
>  void cpuidle_disable_deep_cstate(void);
>  
>  extern void cpuidle_wakeup_mwait(cpumask_t *mask);
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel


[-- Attachment #2: 00-tsc-check --]
[-- Type: application/octet-stream, Size: 5451 bytes --]

diff -r b5165fb66b56 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c	Thu Apr 14 16:58:41 2011 +0100
@@ -1098,3 +1098,7 @@
     hpet_disable_legacy_broadcast();
 }
 
+bool_t cpuidle_using_deep_cstate(void)
+{
+    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
+}
diff -r b5165fb66b56 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/arch/x86/smpboot.c	Thu Apr 14 16:58:41 2011 +0100
@@ -41,6 +41,7 @@
 #include <asm/flushtlb.h>
 #include <asm/msr.h>
 #include <asm/mtrr.h>
+#include <asm/time.h>
 #include <mach_apic.h>
 #include <mach_wakecpu.h>
 #include <smpboot_hooks.h>
@@ -124,6 +125,12 @@
     ;
 }
 
+/*
+ * TSC's upper 32 bits can't be written in earlier CPUs (before
+ * Prescott), there is no way to resync one AP against BP.
+ */
+bool_t disable_tsc_sync;
+
 static atomic_t tsc_count;
 static uint64_t tsc_value;
 static cpumask_t tsc_sync_cpu_mask;
@@ -132,6 +139,9 @@
 {
     unsigned int i;
 
+    if ( disable_tsc_sync )
+        return;
+
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
          !cpu_isset(slave, tsc_sync_cpu_mask) )
         return;
@@ -153,6 +163,9 @@
 {
     unsigned int i;
 
+    if ( disable_tsc_sync )
+        return;
+
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
          !cpu_isset(slave, tsc_sync_cpu_mask) )
         return;
diff -r b5165fb66b56 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/arch/x86/time.c	Thu Apr 14 16:58:41 2011 +0100
@@ -21,6 +21,7 @@
 #include <xen/smp.h>
 #include <xen/irq.h>
 #include <xen/softirq.h>
+#include <xen/cpuidle.h>
 #include <xen/symbols.h>
 #include <xen/keyhandler.h>
 #include <xen/guest_access.h>
@@ -680,6 +681,8 @@
     if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
         return;
 
+    ASSERT(boot_cpu_has(X86_FEATURE_TSC_RELIABLE));
+
     write_tsc(stime2tsc(read_platform_stime()));
 }
 
@@ -1382,6 +1385,56 @@
     }
 }
 
+/*
+ * On certain older Intel CPUs writing the TSC MSR clears the upper 32 bits. 
+ * Obviously we must not use write_tsc() on such CPUs.
+ *
+ * Additionally, AMD specifies that being able to write the TSC MSR is not an 
+ * architectural feature (but, other than their manual says, also cannot be 
+ * determined from CPUID bits).
+ */
+static void __init tsc_check_writability(void)
+{
+    const char *what = NULL;
+
+    /*
+     * If all CPUs are reported as synchronised and in sync, we never write
+     * the TSCs (except unavoidably, when a CPU is physically hot-plugged).
+     * Hence testing for writability is pointless and even harmful.
+     */
+    if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+        return;
+
+    if ( wrmsr_safe(MSR_IA32_TSC, 0) == 0 )
+    {
+        uint64_t tmp, tmp2;
+
+        rdtscll(tmp2);
+        write_tsc(1ULL << 32);
+        rdtscll(tmp);
+        if ( ABS((s64)tmp - (s64)tmp2) < (1ULL << 31) )
+            what = "only partially";
+    }
+    else
+        what = "not";
+
+    /* Nothing to do if the TSC is fully writable. */
+    if ( !what )
+        return;
+
+    printk(XENLOG_WARNING "TSC %s writable\n", what);
+
+    /* time_calibration_tsc_rendezvous() must not be used */
+    setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
+
+    /* cstate_restore_tsc() must not be used (or do nothing) */
+    if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
+        cpuidle_disable_deep_cstate();
+
+    /* synchronize_tsc_slave() must do nothing */
+    disable_tsc_sync = 1;
+}
+
 /* Late init function (after all CPUs are booted). */
 int __init init_xen_time(void)
 {
@@ -1398,6 +1451,8 @@
             setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
     }
 
+    tsc_check_writability();
+
     /* If we have constant-rate TSCs then scale factor can be shared. */
     if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
     {
@@ -1451,7 +1506,7 @@
      * XXX dom0 may rely on RTC interrupt delivery, so only enable
      * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
      */
-    if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
+    if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) )
     {
         hpet_broadcast_setup();
         if ( !hpet_broadcast_is_available() )
diff -r b5165fb66b56 xen/include/asm-x86/setup.h
--- a/xen/include/asm-x86/setup.h	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/include/asm-x86/setup.h	Thu Apr 14 16:58:41 2011 +0100
@@ -4,7 +4,6 @@
 #include <xen/multiboot.h>
 
 extern bool_t early_boot;
-extern s8 xen_cpuidle;
 extern unsigned long xenheap_initial_phys_start;
 
 void init_done(void);
diff -r b5165fb66b56 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/include/asm-x86/time.h	Thu Apr 14 16:58:41 2011 +0100
@@ -24,6 +24,8 @@
 
 typedef u64 cycles_t;
 
+extern bool_t disable_tsc_sync;
+
 static inline cycles_t get_cycles(void)
 {
     cycles_t c;
diff -r b5165fb66b56 xen/include/xen/cpuidle.h
--- a/xen/include/xen/cpuidle.h	Thu Apr 14 14:57:24 2011 +0100
+++ b/xen/include/xen/cpuidle.h	Thu Apr 14 16:58:41 2011 +0100
@@ -85,7 +85,10 @@
     void (*reflect)         (struct acpi_processor_power *dev);
 };
 
+extern s8 xen_cpuidle;
 extern struct cpuidle_governor *cpuidle_current_governor;
+
+bool_t cpuidle_using_deep_cstate(void);
 void cpuidle_disable_deep_cstate(void);
 
 extern void cpuidle_wakeup_mwait(cpumask_t *mask);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

  parent reply	other threads:[~2011-04-14 16:05 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-14  7:18 [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits Jan Beulich
2011-04-14  7:25 ` Keir Fraser
2011-04-14  7:42   ` Jan Beulich
2011-04-14  7:50     ` Keir Fraser
2011-04-14  8:06       ` Jan Beulich
2011-04-14  9:18         ` Keir Fraser
2011-04-14 22:41           ` Dan Magenheimer
2011-04-15  6:40             ` Keir Fraser
2011-04-15 14:34               ` Dan Magenheimer
2011-04-15 17:28                 ` Keir Fraser
2011-04-14  7:28 ` Jan Beulich
2011-04-14 16:05 ` Keir Fraser [this message]
2011-04-14 16:28   ` Jan Beulich
2011-04-14 16:48     ` Keir Fraser
2011-04-14 18:33       ` Wang, Winston L
2011-04-14 21:06         ` Keir Fraser
2011-04-14 21:37           ` Wang, Winston L
2011-04-15  7:06           ` Jan Beulich
2011-04-15  7:08       ` Jan Beulich
2011-04-15  7:37         ` Keir Fraser
2011-04-15 14:49           ` Wang, Winston L

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=C9CCD9C9.2CAC6%keir@xen.org \
    --to=keir@xen.org \
    --cc=JBeulich@novell.com \
    --cc=winston.l.wang@intel.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.