All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/nmi: Make external NMI injection reliably crash the host
@ 2014-08-26 10:10 Ross Lagerwall
  2014-08-26 10:17 ` Andrew Cooper
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Ross Lagerwall @ 2014-08-26 10:10 UTC (permalink / raw)
  To: Xen-devel; +Cc: Ross Lagerwall, Keir Fraser, Jan Beulich

Change the watchdog handler to only "tick" if the corresponding perf
counter has overflowed; otherwise, return false from the NMI handler to
indicate that the NMI is not a watchdog tick and let the other handlers
handle it.  This allows externally injected NMIs to reliably crash the
host rather than be swallowed by the watchdog handler.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
CC: Keir Fraser <keir@xen.org>
CC: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/nmi.c         | 76 ++++++++++++++++++++++++++++++++--------------
 xen/arch/x86/traps.c       |  6 ++--
 xen/include/asm-x86/apic.h |  2 +-
 3 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
index c4427a6..5bf0e2c 100644
--- a/xen/arch/x86/nmi.c
+++ b/xen/arch/x86/nmi.c
@@ -15,6 +15,7 @@
 
 #include <xen/config.h>
 #include <xen/init.h>
+#include <xen/stdbool.h>
 #include <xen/lib.h>
 #include <xen/mm.h>
 #include <xen/irq.h>
@@ -82,6 +83,7 @@ int nmi_active;
 #define K7_EVNTSEL_USR		(1 << 16)
 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
 #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+#define K7_EVENT_WIDTH		32
 
 #define P6_EVNTSEL0_ENABLE	(1 << 22)
 #define P6_EVNTSEL_INT		(1 << 20)
@@ -89,10 +91,12 @@ int nmi_active;
 #define P6_EVNTSEL_USR		(1 << 16)
 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED	 0x79
 #define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c
+#define P6_EVENT_WIDTH		32
 
 #define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
 #define P4_CCCR_OVF_PMI0	(1<<26)
 #define P4_CCCR_OVF_PMI1	(1<<27)
+#define P4_CCCR_OVF		(1<<31)
 #define P4_CCCR_THRESHOLD(N)	((N)<<20)
 #define P4_CCCR_COMPLEMENT	(1<<19)
 #define P4_CCCR_COMPARE		(1<<18)
@@ -432,35 +436,23 @@ int __init watchdog_setup(void)
     return 0;
 }
 
-void nmi_watchdog_tick(const struct cpu_user_regs *regs)
+/* Returns true if this was a watchdog NMI, false otherwise */
+bool_t nmi_watchdog_tick(const struct cpu_user_regs *regs)
 {
+    bool_t watchdog_tick = 0;
     unsigned int sum = this_cpu(nmi_timer_ticks);
 
-    if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() )
-    {
-        /*
-         * Ayiee, looks like this CPU is stuck ... wait for the timeout
-         * before doing the oops ...
-         */
-        this_cpu(alert_counter)++;
-        if ( this_cpu(alert_counter) == opt_watchdog_timeout*nmi_hz )
-        {
-            console_force_unlock();
-            printk("Watchdog timer detects that CPU%d is stuck!\n",
-                   smp_processor_id());
-            fatal_trap(TRAP_nmi, regs);
-        }
-    } 
-    else 
-    {
-        this_cpu(last_irq_sums) = sum;
-        this_cpu(alert_counter) = 0;
-    }
-
     if ( nmi_perfctr_msr )
     {
+        uint64_t msr_content;
+
+        /* Work out if this is a watchdog tick by checking for overflow. */
         if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 )
         {
+            rdmsrl(MSR_P4_IQ_CCCR0, msr_content);
+            if ( msr_content & P4_CCCR_OVF )
+                watchdog_tick = 1;
+
             /*
              * P4 quirks:
              * - An overflown perfctr will assert its interrupt
@@ -473,14 +465,52 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs)
         }
         else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 )
         {
+            rdmsrl(MSR_P6_PERFCTR0, msr_content);
+            watchdog_tick = !(msr_content & (1ULL << P6_EVENT_WIDTH));
+
             /*
              * Only P6 based Pentium M need to re-unmask the apic vector but
              * it doesn't hurt other P6 variants.
              */
             apic_write(APIC_LVTPC, APIC_DM_NMI);
         }
-        write_watchdog_counter(NULL);
+        else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 )
+        {
+            rdmsrl(MSR_K7_PERFCTR0, msr_content);
+            watchdog_tick = !(msr_content & (1ULL << K7_EVENT_WIDTH));
+        }
+
+        if ( watchdog_tick )
+        {
+            unsigned int *this_alert_counter = &this_cpu(alert_counter);
+            unsigned int *this_last_irq_sums = &this_cpu(last_irq_sums);
+
+            write_watchdog_counter(NULL);
+
+            if ( (*this_last_irq_sums == sum) && watchdog_enabled() )
+            {
+                /*
+                 * Ayiee, looks like this CPU is stuck ... wait for the timeout
+                 * before doing the oops ...
+                 */
+                ++*this_alert_counter;
+                if ( *this_alert_counter == opt_watchdog_timeout * nmi_hz )
+                {
+                    console_force_unlock();
+                    printk("Watchdog timer detects that CPU%d is stuck!\n",
+                           smp_processor_id());
+                    fatal_trap(TRAP_nmi, regs);
+                }
+            }
+            else
+            {
+                *this_last_irq_sums = sum;
+                this_alert_counter = 0;
+            }
+        }
     }
+
+    return watchdog_tick;
 }
 
 /*
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 71be2ae..38f42fa 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -3312,8 +3312,8 @@ void do_nmi(const struct cpu_user_regs *regs)
     if ( nmi_callback(regs, cpu) )
         return;
 
-    if ( nmi_watchdog )
-        nmi_watchdog_tick(regs);
+    if ( nmi_watchdog && nmi_watchdog_tick(regs) )
+        return;
 
     /* Only the BSP gets external NMIs from the system. */
     if ( cpu == 0 )
@@ -3323,7 +3323,7 @@ void do_nmi(const struct cpu_user_regs *regs)
             pci_serr_error(regs);
         if ( reason & 0x40 )
             io_check_error(regs);
-        if ( !(reason & 0xc0) && !nmi_watchdog )
+        if ( !(reason & 0xc0) )
             unknown_nmi_error(regs, reason);
     }
 }
diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h
index 5d7623f..6697245 100644
--- a/xen/include/asm-x86/apic.h
+++ b/xen/include/asm-x86/apic.h
@@ -206,7 +206,7 @@ extern void release_lapic_nmi(void);
 extern void self_nmi(void);
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
-extern void nmi_watchdog_tick (const struct cpu_user_regs *regs);
+extern bool_t nmi_watchdog_tick (const struct cpu_user_regs *regs);
 extern int APIC_init_uniprocessor (void);
 extern void disable_APIC_timer(void);
 extern void enable_APIC_timer(void);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-26 10:10 [PATCH] x86/nmi: Make external NMI injection reliably crash the host Ross Lagerwall
@ 2014-08-26 10:17 ` Andrew Cooper
  2014-08-26 12:59 ` Jan Beulich
  2014-08-26 16:06 ` Don Slutz
  2 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2014-08-26 10:17 UTC (permalink / raw)
  To: xen-devel

On 26/08/14 11:10, Ross Lagerwall wrote:
> Change the watchdog handler to only "tick" if the corresponding perf
> counter has overflowed; otherwise, return false from the NMI handler to
> indicate that the NMI is not a watchdog tick and let the other handlers
> handle it.  This allows externally injected NMIs to reliably crash the
> host rather than be swallowed by the watchdog handler.

More specifically, external NMIs which don't set one of the bits in the
System Control Port B

Most vendors which support an "Inject NMI" option from their ipmi/web
interface do not set any bits in control port B, at which point the
attempt to crash the server will be eaten by the watchdog logic.

>
> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
> CC: Keir Fraser <keir@xen.org>
> CC: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

> ---
>  xen/arch/x86/nmi.c         | 76 ++++++++++++++++++++++++++++++++--------------
>  xen/arch/x86/traps.c       |  6 ++--
>  xen/include/asm-x86/apic.h |  2 +-
>  3 files changed, 57 insertions(+), 27 deletions(-)
>
> diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
> index c4427a6..5bf0e2c 100644
> --- a/xen/arch/x86/nmi.c
> +++ b/xen/arch/x86/nmi.c
> @@ -15,6 +15,7 @@
>  
>  #include <xen/config.h>
>  #include <xen/init.h>
> +#include <xen/stdbool.h>
>  #include <xen/lib.h>
>  #include <xen/mm.h>
>  #include <xen/irq.h>
> @@ -82,6 +83,7 @@ int nmi_active;
>  #define K7_EVNTSEL_USR		(1 << 16)
>  #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
>  #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
> +#define K7_EVENT_WIDTH		32
>  
>  #define P6_EVNTSEL0_ENABLE	(1 << 22)
>  #define P6_EVNTSEL_INT		(1 << 20)
> @@ -89,10 +91,12 @@ int nmi_active;
>  #define P6_EVNTSEL_USR		(1 << 16)
>  #define P6_EVENT_CPU_CLOCKS_NOT_HALTED	 0x79
>  #define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c
> +#define P6_EVENT_WIDTH		32
>  
>  #define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
>  #define P4_CCCR_OVF_PMI0	(1<<26)
>  #define P4_CCCR_OVF_PMI1	(1<<27)
> +#define P4_CCCR_OVF		(1<<31)
>  #define P4_CCCR_THRESHOLD(N)	((N)<<20)
>  #define P4_CCCR_COMPLEMENT	(1<<19)
>  #define P4_CCCR_COMPARE		(1<<18)
> @@ -432,35 +436,23 @@ int __init watchdog_setup(void)
>      return 0;
>  }
>  
> -void nmi_watchdog_tick(const struct cpu_user_regs *regs)
> +/* Returns true if this was a watchdog NMI, false otherwise */
> +bool_t nmi_watchdog_tick(const struct cpu_user_regs *regs)
>  {
> +    bool_t watchdog_tick = 0;
>      unsigned int sum = this_cpu(nmi_timer_ticks);
>  
> -    if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() )
> -    {
> -        /*
> -         * Ayiee, looks like this CPU is stuck ... wait for the timeout
> -         * before doing the oops ...
> -         */
> -        this_cpu(alert_counter)++;
> -        if ( this_cpu(alert_counter) == opt_watchdog_timeout*nmi_hz )
> -        {
> -            console_force_unlock();
> -            printk("Watchdog timer detects that CPU%d is stuck!\n",
> -                   smp_processor_id());
> -            fatal_trap(TRAP_nmi, regs);
> -        }
> -    } 
> -    else 
> -    {
> -        this_cpu(last_irq_sums) = sum;
> -        this_cpu(alert_counter) = 0;
> -    }
> -
>      if ( nmi_perfctr_msr )
>      {
> +        uint64_t msr_content;
> +
> +        /* Work out if this is a watchdog tick by checking for overflow. */
>          if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 )
>          {
> +            rdmsrl(MSR_P4_IQ_CCCR0, msr_content);
> +            if ( msr_content & P4_CCCR_OVF )
> +                watchdog_tick = 1;
> +
>              /*
>               * P4 quirks:
>               * - An overflown perfctr will assert its interrupt
> @@ -473,14 +465,52 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs)
>          }
>          else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 )
>          {
> +            rdmsrl(MSR_P6_PERFCTR0, msr_content);
> +            watchdog_tick = !(msr_content & (1ULL << P6_EVENT_WIDTH));
> +
>              /*
>               * Only P6 based Pentium M need to re-unmask the apic vector but
>               * it doesn't hurt other P6 variants.
>               */
>              apic_write(APIC_LVTPC, APIC_DM_NMI);
>          }
> -        write_watchdog_counter(NULL);
> +        else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 )
> +        {
> +            rdmsrl(MSR_K7_PERFCTR0, msr_content);
> +            watchdog_tick = !(msr_content & (1ULL << K7_EVENT_WIDTH));
> +        }
> +
> +        if ( watchdog_tick )
> +        {
> +            unsigned int *this_alert_counter = &this_cpu(alert_counter);
> +            unsigned int *this_last_irq_sums = &this_cpu(last_irq_sums);
> +
> +            write_watchdog_counter(NULL);
> +
> +            if ( (*this_last_irq_sums == sum) && watchdog_enabled() )
> +            {
> +                /*
> +                 * Ayiee, looks like this CPU is stuck ... wait for the timeout
> +                 * before doing the oops ...
> +                 */
> +                ++*this_alert_counter;
> +                if ( *this_alert_counter == opt_watchdog_timeout * nmi_hz )
> +                {
> +                    console_force_unlock();
> +                    printk("Watchdog timer detects that CPU%d is stuck!\n",
> +                           smp_processor_id());
> +                    fatal_trap(TRAP_nmi, regs);
> +                }
> +            }
> +            else
> +            {
> +                *this_last_irq_sums = sum;
> +                this_alert_counter = 0;
> +            }
> +        }
>      }
> +
> +    return watchdog_tick;
>  }
>  
>  /*
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index 71be2ae..38f42fa 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -3312,8 +3312,8 @@ void do_nmi(const struct cpu_user_regs *regs)
>      if ( nmi_callback(regs, cpu) )
>          return;
>  
> -    if ( nmi_watchdog )
> -        nmi_watchdog_tick(regs);
> +    if ( nmi_watchdog && nmi_watchdog_tick(regs) )
> +        return;
>  
>      /* Only the BSP gets external NMIs from the system. */
>      if ( cpu == 0 )
> @@ -3323,7 +3323,7 @@ void do_nmi(const struct cpu_user_regs *regs)
>              pci_serr_error(regs);
>          if ( reason & 0x40 )
>              io_check_error(regs);
> -        if ( !(reason & 0xc0) && !nmi_watchdog )
> +        if ( !(reason & 0xc0) )
>              unknown_nmi_error(regs, reason);
>      }
>  }
> diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h
> index 5d7623f..6697245 100644
> --- a/xen/include/asm-x86/apic.h
> +++ b/xen/include/asm-x86/apic.h
> @@ -206,7 +206,7 @@ extern void release_lapic_nmi(void);
>  extern void self_nmi(void);
>  extern void disable_timer_nmi_watchdog(void);
>  extern void enable_timer_nmi_watchdog(void);
> -extern void nmi_watchdog_tick (const struct cpu_user_regs *regs);
> +extern bool_t nmi_watchdog_tick (const struct cpu_user_regs *regs);
>  extern int APIC_init_uniprocessor (void);
>  extern void disable_APIC_timer(void);
>  extern void enable_APIC_timer(void);

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-26 10:10 [PATCH] x86/nmi: Make external NMI injection reliably crash the host Ross Lagerwall
  2014-08-26 10:17 ` Andrew Cooper
@ 2014-08-26 12:59 ` Jan Beulich
  2014-08-26 15:26   ` Ross Lagerwall
  2014-08-26 16:06 ` Don Slutz
  2 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2014-08-26 12:59 UTC (permalink / raw)
  To: Ross Lagerwall; +Cc: Keir Fraser, Xen-devel

>>> On 26.08.14 at 12:10, <ross.lagerwall@citrix.com> wrote:
> --- a/xen/arch/x86/nmi.c
> +++ b/xen/arch/x86/nmi.c
> @@ -15,6 +15,7 @@
>  
>  #include <xen/config.h>
>  #include <xen/init.h>
> +#include <xen/stdbool.h>

???

> @@ -432,35 +436,23 @@ int __init watchdog_setup(void)
>      return 0;
>  }
>  
> -void nmi_watchdog_tick(const struct cpu_user_regs *regs)
> +/* Returns true if this was a watchdog NMI, false otherwise */
> +bool_t nmi_watchdog_tick(const struct cpu_user_regs *regs)
>  {
> +    bool_t watchdog_tick = 0;
>      unsigned int sum = this_cpu(nmi_timer_ticks);
>  
> -    if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() )
> -    {
> -        /*
> -         * Ayiee, looks like this CPU is stuck ... wait for the timeout
> -         * before doing the oops ...
> -         */
> -        this_cpu(alert_counter)++;
> -        if ( this_cpu(alert_counter) == opt_watchdog_timeout*nmi_hz )
> -        {
> -            console_force_unlock();
> -            printk("Watchdog timer detects that CPU%d is stuck!\n",
> -                   smp_processor_id());
> -            fatal_trap(TRAP_nmi, regs);
> -        }
> -    } 
> -    else 
> -    {
> -        this_cpu(last_irq_sums) = sum;
> -        this_cpu(alert_counter) = 0;
> -    }
> -
>      if ( nmi_perfctr_msr )
>      {

So if we don't get into this block ...

> +        uint64_t msr_content;
> +
> +        /* Work out if this is a watchdog tick by checking for overflow. */
>          if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 )
>          {
> +            rdmsrl(MSR_P4_IQ_CCCR0, msr_content);
> +            if ( msr_content & P4_CCCR_OVF )
> +                watchdog_tick = 1;
> +
>              /*
>               * P4 quirks:
>               * - An overflown perfctr will assert its interrupt
> @@ -473,14 +465,52 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs)
>          }
>          else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 )
>          {
> +            rdmsrl(MSR_P6_PERFCTR0, msr_content);
> +            watchdog_tick = !(msr_content & (1ULL << P6_EVENT_WIDTH));
> +
>              /*
>               * Only P6 based Pentium M need to re-unmask the apic vector but
>               * it doesn't hurt other P6 variants.
>               */
>              apic_write(APIC_LVTPC, APIC_DM_NMI);
>          }
> -        write_watchdog_counter(NULL);
> +        else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 )
> +        {
> +            rdmsrl(MSR_K7_PERFCTR0, msr_content);
> +            watchdog_tick = !(msr_content & (1ULL << K7_EVENT_WIDTH));
> +        }
> +
> +        if ( watchdog_tick )
> +        {

... we'll never get here. That's clearly a change in behavior for
systems with no suitable perfctr MSR. I'm of the opinion that for
such systems behavior should not change from what it is right
now, i.e. if you can't exclude the NMI to be watchdog induced,
assume it is (rather than assuming that to be a fatal condition).

Or wait - is this only a theoretical consideration? If so, the patch
description should be adjusted to make clear we can't get there.
And perhaps the surrounding if(nmi_perfctr_msr) should then
become ASSERT(nmi_perfctr_msr).

> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -3312,8 +3312,8 @@ void do_nmi(const struct cpu_user_regs *regs)
>      if ( nmi_callback(regs, cpu) )
>          return;
>  
> -    if ( nmi_watchdog )
> -        nmi_watchdog_tick(regs);
> +    if ( nmi_watchdog && nmi_watchdog_tick(regs) )
> +        return;
>  
>      /* Only the BSP gets external NMIs from the system. */
>      if ( cpu == 0 )
> @@ -3323,7 +3323,7 @@ void do_nmi(const struct cpu_user_regs *regs)
>              pci_serr_error(regs);
>          if ( reason & 0x40 )
>              io_check_error(regs);
> -        if ( !(reason & 0xc0) && !nmi_watchdog )
> +        if ( !(reason & 0xc0) )
>              unknown_nmi_error(regs, reason);

As much as I like the original idea, I'm afraid this won't fly: I do
know of systems where bad motherboard design leads to neither
of these two bits ever getting set. I.e. at the very minimum we'd
need a command line option to restore old behavior. Personally I
think it should in fact remain default behavior, and new behavior
should only be enabled via command line option.

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-26 12:59 ` Jan Beulich
@ 2014-08-26 15:26   ` Ross Lagerwall
  2014-08-26 15:38     ` Jan Beulich
  0 siblings, 1 reply; 11+ messages in thread
From: Ross Lagerwall @ 2014-08-26 15:26 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Keir Fraser, Xen-devel

On 08/26/2014 01:59 PM, Jan Beulich wrote:
>>>> On 26.08.14 at 12:10, <ross.lagerwall@citrix.com> wrote:
>> --- a/xen/arch/x86/nmi.c
>> +++ b/xen/arch/x86/nmi.c
>> @@ -15,6 +15,7 @@
>>
>>   #include <xen/config.h>
>>   #include <xen/init.h>
>> +#include <xen/stdbool.h>
>
> ???

I thought this was needed for bool_t but obviously it's not.

>
>> @@ -432,35 +436,23 @@ int __init watchdog_setup(void)
>>       return 0;
>>   }
>>
>> -void nmi_watchdog_tick(const struct cpu_user_regs *regs)
>> +/* Returns true if this was a watchdog NMI, false otherwise */
>> +bool_t nmi_watchdog_tick(const struct cpu_user_regs *regs)
>>   {
>> +    bool_t watchdog_tick = 0;
>>       unsigned int sum = this_cpu(nmi_timer_ticks);
>>
>> -    if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() )
>> -    {
>> -        /*
>> -         * Ayiee, looks like this CPU is stuck ... wait for the timeout
>> -         * before doing the oops ...
>> -         */
>> -        this_cpu(alert_counter)++;
>> -        if ( this_cpu(alert_counter) == opt_watchdog_timeout*nmi_hz )
>> -        {
>> -            console_force_unlock();
>> -            printk("Watchdog timer detects that CPU%d is stuck!\n",
>> -                   smp_processor_id());
>> -            fatal_trap(TRAP_nmi, regs);
>> -        }
>> -    }
>> -    else
>> -    {
>> -        this_cpu(last_irq_sums) = sum;
>> -        this_cpu(alert_counter) = 0;
>> -    }
>> -
>>       if ( nmi_perfctr_msr )
>>       {
>
> So if we don't get into this block ...
>
>> +        uint64_t msr_content;
>> +
>> +        /* Work out if this is a watchdog tick by checking for overflow. */
>>           if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 )
>>           {
>> +            rdmsrl(MSR_P4_IQ_CCCR0, msr_content);
>> +            if ( msr_content & P4_CCCR_OVF )
>> +                watchdog_tick = 1;
>> +
>>               /*
>>                * P4 quirks:
>>                * - An overflown perfctr will assert its interrupt
>> @@ -473,14 +465,52 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs)
>>           }
>>           else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 )
>>           {
>> +            rdmsrl(MSR_P6_PERFCTR0, msr_content);
>> +            watchdog_tick = !(msr_content & (1ULL << P6_EVENT_WIDTH));
>> +
>>               /*
>>                * Only P6 based Pentium M need to re-unmask the apic vector but
>>                * it doesn't hurt other P6 variants.
>>                */
>>               apic_write(APIC_LVTPC, APIC_DM_NMI);
>>           }
>> -        write_watchdog_counter(NULL);
>> +        else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 )
>> +        {
>> +            rdmsrl(MSR_K7_PERFCTR0, msr_content);
>> +            watchdog_tick = !(msr_content & (1ULL << K7_EVENT_WIDTH));
>> +        }
>> +
>> +        if ( watchdog_tick )
>> +        {
>
> ... we'll never get here. That's clearly a change in behavior for
> systems with no suitable perfctr MSR. I'm of the opinion that for
> such systems behavior should not change from what it is right
> now, i.e. if you can't exclude the NMI to be watchdog induced,
> assume it is (rather than assuming that to be a fatal condition).
>
> Or wait - is this only a theoretical consideration? If so, the patch
> description should be adjusted to make clear we can't get there.
> And perhaps the surrounding if(nmi_perfctr_msr) should then
> become ASSERT(nmi_perfctr_msr).

A comment in the source code says that the watchdog may also be driven 
from the I/O APIC timer so this is probably a valid consideration. I 
shall rework this a bit.

>
>> --- a/xen/arch/x86/traps.c
>> +++ b/xen/arch/x86/traps.c
>> @@ -3312,8 +3312,8 @@ void do_nmi(const struct cpu_user_regs *regs)
>>       if ( nmi_callback(regs, cpu) )
>>           return;
>>
>> -    if ( nmi_watchdog )
>> -        nmi_watchdog_tick(regs);
>> +    if ( nmi_watchdog && nmi_watchdog_tick(regs) )
>> +        return;
>>
>>       /* Only the BSP gets external NMIs from the system. */
>>       if ( cpu == 0 )
>> @@ -3323,7 +3323,7 @@ void do_nmi(const struct cpu_user_regs *regs)
>>               pci_serr_error(regs);
>>           if ( reason & 0x40 )
>>               io_check_error(regs);
>> -        if ( !(reason & 0xc0) && !nmi_watchdog )
>> +        if ( !(reason & 0xc0) )
>>               unknown_nmi_error(regs, reason);
>
> As much as I like the original idea, I'm afraid this won't fly: I do
> know of systems where bad motherboard design leads to neither
> of these two bits ever getting set. I.e. at the very minimum we'd
> need a command line option to restore old behavior. Personally I
> think it should in fact remain default behavior, and new behavior
> should only be enabled via command line option.
>

Well the old behavior was different depending on whether the watchdog 
was enabled or not. Since the watchdog was disabled by default, that's 
no different from the behavior here.

So are you thinking something like an ignore_unknown_nmi boolean 
parameter that defaults to true?

Regards
-- 
Ross Lagerwall

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-26 15:26   ` Ross Lagerwall
@ 2014-08-26 15:38     ` Jan Beulich
  2014-08-27 11:14       ` Ross Lagerwall
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2014-08-26 15:38 UTC (permalink / raw)
  To: Ross Lagerwall; +Cc: Keir Fraser, Xen-devel

>>> On 26.08.14 at 17:26, <ross.lagerwall@citrix.com> wrote:
> On 08/26/2014 01:59 PM, Jan Beulich wrote:
>>>>> On 26.08.14 at 12:10, <ross.lagerwall@citrix.com> wrote:
>>> @@ -3323,7 +3323,7 @@ void do_nmi(const struct cpu_user_regs *regs)
>>>               pci_serr_error(regs);
>>>           if ( reason & 0x40 )
>>>               io_check_error(regs);
>>> -        if ( !(reason & 0xc0) && !nmi_watchdog )
>>> +        if ( !(reason & 0xc0) )
>>>               unknown_nmi_error(regs, reason);
>>
>> As much as I like the original idea, I'm afraid this won't fly: I do
>> know of systems where bad motherboard design leads to neither
>> of these two bits ever getting set. I.e. at the very minimum we'd
>> need a command line option to restore old behavior. Personally I
>> think it should in fact remain default behavior, and new behavior
>> should only be enabled via command line option.
> 
> Well the old behavior was different depending on whether the watchdog 
> was enabled or not. Since the watchdog was disabled by default, that's 
> no different from the behavior here.
> 
> So are you thinking something like an ignore_unknown_nmi boolean 
> parameter that defaults to true?

More like a "watchdog=force" one, but right, since the watchdog
isn't being enabled by default, maybe making it an opt-out instead
of opt-in would indeed be acceptable.

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-26 10:10 [PATCH] x86/nmi: Make external NMI injection reliably crash the host Ross Lagerwall
  2014-08-26 10:17 ` Andrew Cooper
  2014-08-26 12:59 ` Jan Beulich
@ 2014-08-26 16:06 ` Don Slutz
  2014-08-26 16:51   ` Andrew Cooper
  2 siblings, 1 reply; 11+ messages in thread
From: Don Slutz @ 2014-08-26 16:06 UTC (permalink / raw)
  To: Ross Lagerwall, Xen-devel; +Cc: Keir Fraser, Jan Beulich


On 08/26/14 06:10, Ross Lagerwall wrote:
> Change the watchdog handler to only "tick" if the corresponding perf
> counter has overflowed; otherwise, return false from the NMI handler to
> indicate that the NMI is not a watchdog tick and let the other handlers
> handle it.  This allows externally injected NMIs to reliably crash the
> host rather than be swallowed by the watchdog handler.

If a crash kernel has been setup via kexec, does this change to
"crash host" ends up jumping into the crash kernel?

     -Don Slutz

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-26 16:06 ` Don Slutz
@ 2014-08-26 16:51   ` Andrew Cooper
  2014-08-26 21:51     ` Don Slutz
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2014-08-26 16:51 UTC (permalink / raw)
  To: Don Slutz, Ross Lagerwall, Xen-devel; +Cc: Keir Fraser, Jan Beulich

On 26/08/14 17:06, Don Slutz wrote:
>
> On 08/26/14 06:10, Ross Lagerwall wrote:
>> Change the watchdog handler to only "tick" if the corresponding perf
>> counter has overflowed; otherwise, return false from the NMI handler to
>> indicate that the NMI is not a watchdog tick and let the other handlers
>> handle it.  This allows externally injected NMIs to reliably crash the
>> host rather than be swallowed by the watchdog handler.
>
> If a crash kernel has been setup via kexec, does this change to
> "crash host" ends up jumping into the crash kernel?
>
>     -Don Slutz

No - this has no change of behaviour as to how Xen proceeds after it has
decided to panic().

It does however change whether Xen decided to panic, depending on
whether the NMI was a result of the watchdog, or some otherwise
unidentified NMI.

Basically, without this change, the "inject fatal NMI" option in most
IPMI controllers doesn't work in combination with running the Xen
watchdog.  Only certain HP systems appear to set the IOCK bit in the
system control port B when injecting an NMI.  All other systems just
send an NMI with no change to the control ports, which get eaten by the
watchdog logic.

This patch changes the watchdog logic to only consider an NMI as a
watchdog tick if the perf counter confirms that it injected the NMI.

~Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-26 16:51   ` Andrew Cooper
@ 2014-08-26 21:51     ` Don Slutz
  2014-08-26 23:01       ` Andrew Cooper
  0 siblings, 1 reply; 11+ messages in thread
From: Don Slutz @ 2014-08-26 21:51 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Ross Lagerwall, Keir Fraser, Jan Beulich, Don Slutz, Xen-devel

On 08/26/14 12:51, Andrew Cooper wrote:
> On 26/08/14 17:06, Don Slutz wrote:
>> On 08/26/14 06:10, Ross Lagerwall wrote:
>>> Change the watchdog handler to only "tick" if the corresponding perf
>>> counter has overflowed; otherwise, return false from the NMI handler to
>>> indicate that the NMI is not a watchdog tick and let the other handlers
>>> handle it.  This allows externally injected NMIs to reliably crash the
>>> host rather than be swallowed by the watchdog handler.
>> If a crash kernel has been setup via kexec, does this change to
>> "crash host" ends up jumping into the crash kernel?
>>
>>      -Don Slutz
> No - this has no change of behaviour as to how Xen proceeds after it has
> decided to panic().
>
> It does however change whether Xen decided to panic, depending on
> whether the NMI was a result of the watchdog, or some otherwise
> unidentified NMI.
>
> Basically, without this change, the "inject fatal NMI" option in most
> IPMI controllers doesn't work in combination with running the Xen
> watchdog.  Only certain HP systems appear to set the IOCK bit in the
> system control port B when injecting an NMI.  All other systems just
> send an NMI with no change to the control ports, which get eaten by the
> watchdog logic.
>
> This patch changes the watchdog logic to only consider an NMI as a
> watchdog tick if the perf counter confirms that it injected the NMI.

Well, that is useful information.  Looks like I was not clear.  I am reading

> as to how Xen proceeds after it has

 > decided to panic().


As a yes, but you start with a no.  And I am getting "crash host" to 
mean "calls panic()".

    -Don Slutz

> ~Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-26 21:51     ` Don Slutz
@ 2014-08-26 23:01       ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2014-08-26 23:01 UTC (permalink / raw)
  To: Don Slutz; +Cc: Ross Lagerwall, Keir Fraser, Jan Beulich, Xen-devel

On 26/08/2014 22:51, Don Slutz wrote:
> On 08/26/14 12:51, Andrew Cooper wrote:
>> On 26/08/14 17:06, Don Slutz wrote:
>>> On 08/26/14 06:10, Ross Lagerwall wrote:
>>>> Change the watchdog handler to only "tick" if the corresponding perf
>>>> counter has overflowed; otherwise, return false from the NMI
>>>> handler to
>>>> indicate that the NMI is not a watchdog tick and let the other
>>>> handlers
>>>> handle it.  This allows externally injected NMIs to reliably crash the
>>>> host rather than be swallowed by the watchdog handler.
>>> If a crash kernel has been setup via kexec, does this change to
>>> "crash host" ends up jumping into the crash kernel?
>>>
>>>      -Don Slutz
>> No - this has no change of behaviour as to how Xen proceeds after it has
>> decided to panic().
>>
>> It does however change whether Xen decided to panic, depending on
>> whether the NMI was a result of the watchdog, or some otherwise
>> unidentified NMI.
>>
>> Basically, without this change, the "inject fatal NMI" option in most
>> IPMI controllers doesn't work in combination with running the Xen
>> watchdog.  Only certain HP systems appear to set the IOCK bit in the
>> system control port B when injecting an NMI.  All other systems just
>> send an NMI with no change to the control ports, which get eaten by the
>> watchdog logic.
>>
>> This patch changes the watchdog logic to only consider an NMI as a
>> watchdog tick if the perf counter confirms that it injected the NMI.
>
> Well, that is useful information.  Looks like I was not clear.  I am
> reading
>
>> as to how Xen proceeds after it has
>
> > decided to panic().
>
>
> As a yes, but you start with a no.  And I am getting "crash host" to
> mean "calls panic()".
>
>    -Don Slutz
>
>> ~Andrew
>

Allow me to try again.

This patch will alter how NMIs are classified.  It does not alter the
actions of a particular classification of NMI.

Before this patch, any NMI which did not explicitly set the IOCK/SERR
bit in the system control port B would be considered a watchdog NMI, and
ignored if the watchdog was active.  The vast majority of "inject NMI"
options from IPMI controllers do not set the IOCK/SERR bit.

After this patch is applied, NMIs which are received but not generated
by the watchdog performance counters will be considered as external NMIs
*even if* the IOCK/SERR bits are not set.

The action taken upon discovery of these NMIs is still controlled by the
nmi=fatal/dom0/ignore command line option,  and in the case of
nmi=fatal, panic() is still called as before.

Realistically, it means that, with the NMI watchdog enabled, using the
"inject NMI" button on your Dell/SuperMicro/IBM/Quanta/Intel IPMI
interface will be classified as an external NMI rather than a watchdog
NMI, and in the case of nmi=fatal, will call panic().  (Certain HP
servers are the only ones we have encountered which reliably set the
IOCK bit when injecting an NMI from the iLO interface)

~Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-26 15:38     ` Jan Beulich
@ 2014-08-27 11:14       ` Ross Lagerwall
  2014-08-27 12:04         ` Jan Beulich
  0 siblings, 1 reply; 11+ messages in thread
From: Ross Lagerwall @ 2014-08-27 11:14 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Keir Fraser, Xen-devel

On 08/26/2014 04:38 PM, Jan Beulich wrote:
>>>> On 26.08.14 at 17:26, <ross.lagerwall@citrix.com> wrote:
>> On 08/26/2014 01:59 PM, Jan Beulich wrote:
>>>>>> On 26.08.14 at 12:10, <ross.lagerwall@citrix.com> wrote:
>>>> @@ -3323,7 +3323,7 @@ void do_nmi(const struct cpu_user_regs *regs)
>>>>                pci_serr_error(regs);
>>>>            if ( reason & 0x40 )
>>>>                io_check_error(regs);
>>>> -        if ( !(reason & 0xc0) && !nmi_watchdog )
>>>> +        if ( !(reason & 0xc0) )
>>>>                unknown_nmi_error(regs, reason);
>>>
>>> As much as I like the original idea, I'm afraid this won't fly: I do
>>> know of systems where bad motherboard design leads to neither
>>> of these two bits ever getting set. I.e. at the very minimum we'd
>>> need a command line option to restore old behavior. Personally I
>>> think it should in fact remain default behavior, and new behavior
>>> should only be enabled via command line option.
>>
>> Well the old behavior was different depending on whether the watchdog
>> was enabled or not. Since the watchdog was disabled by default, that's
>> no different from the behavior here.
>>
>> So are you thinking something like an ignore_unknown_nmi boolean
>> parameter that defaults to true?
>
> More like a "watchdog=force" one, but right, since the watchdog
> isn't being enabled by default, maybe making it an opt-out instead
> of opt-in would indeed be acceptable.
>

If bad motherboard design leads to neither of these bits being set (thus 
always giving an unknown nmi error), can't the user set nmi=ignore on 
the xen command-line to get the previous behavior?

We already have an tristate nmi parameter, a boolean watchdog parameter, 
and a watchdog timeout parameter. I'm loathe to introduce even more 
possible states.

Regards
-- 
Ross Lagerwall

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
  2014-08-27 11:14       ` Ross Lagerwall
@ 2014-08-27 12:04         ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2014-08-27 12:04 UTC (permalink / raw)
  To: Ross Lagerwall; +Cc: Keir Fraser, Xen-devel

>>> On 27.08.14 at 13:14, <ross.lagerwall@citrix.com> wrote:
> On 08/26/2014 04:38 PM, Jan Beulich wrote:
>>>>> On 26.08.14 at 17:26, <ross.lagerwall@citrix.com> wrote:
>>> On 08/26/2014 01:59 PM, Jan Beulich wrote:
>>>>>>> On 26.08.14 at 12:10, <ross.lagerwall@citrix.com> wrote:
>>>>> @@ -3323,7 +3323,7 @@ void do_nmi(const struct cpu_user_regs *regs)
>>>>>                pci_serr_error(regs);
>>>>>            if ( reason & 0x40 )
>>>>>                io_check_error(regs);
>>>>> -        if ( !(reason & 0xc0) && !nmi_watchdog )
>>>>> +        if ( !(reason & 0xc0) )
>>>>>                unknown_nmi_error(regs, reason);
>>>>
>>>> As much as I like the original idea, I'm afraid this won't fly: I do
>>>> know of systems where bad motherboard design leads to neither
>>>> of these two bits ever getting set. I.e. at the very minimum we'd
>>>> need a command line option to restore old behavior. Personally I
>>>> think it should in fact remain default behavior, and new behavior
>>>> should only be enabled via command line option.
>>>
>>> Well the old behavior was different depending on whether the watchdog
>>> was enabled or not. Since the watchdog was disabled by default, that's
>>> no different from the behavior here.
>>>
>>> So are you thinking something like an ignore_unknown_nmi boolean
>>> parameter that defaults to true?
>>
>> More like a "watchdog=force" one, but right, since the watchdog
>> isn't being enabled by default, maybe making it an opt-out instead
>> of opt-in would indeed be acceptable.
>>
> 
> If bad motherboard design leads to neither of these bits being set (thus 
> always giving an unknown nmi error), can't the user set nmi=ignore on 
> the xen command-line to get the previous behavior?

As opt-out that could be acceptable, but I'm still not certain we
wouldn't better go with an opt-in, in which case things ought to
continue to work the way they are currently namely without
any "nmi=" option specified (and you clearly don't want Dom0 to
see NMIs it previously didn't get to see).

Actually part of my apparent confusion about the intended new
behavior stems from me not having spotted that you bail from
do_nmi() on a positively identified watchdog NMI. That, however,
is wrong in any event: You absolutely have to look at the two
reason bits, as an multiple sources may have triggered at (almost)
the same time. Linux over the last few years went through quite
some hoops to deal with such situations, making me assume they
aren't purely theoretical. So I think setting a flag instead, and
changing to

        if ( !(reason & 0xc0) && !watchdog )
            unknown_nmi_error(regs, reason);

would already eliminate some of the concerns. (Then, quite
obviously, an NMI ought to be unknown too if seen on a CPU
other than the BSP, yet identified as not being a watchdog
one. Implying that you may want nmi_watchdog_tick() to
return a tristate allowing to distinguish positive/negative/
don't-know.)

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-08-27 12:04 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-26 10:10 [PATCH] x86/nmi: Make external NMI injection reliably crash the host Ross Lagerwall
2014-08-26 10:17 ` Andrew Cooper
2014-08-26 12:59 ` Jan Beulich
2014-08-26 15:26   ` Ross Lagerwall
2014-08-26 15:38     ` Jan Beulich
2014-08-27 11:14       ` Ross Lagerwall
2014-08-27 12:04         ` Jan Beulich
2014-08-26 16:06 ` Don Slutz
2014-08-26 16:51   ` Andrew Cooper
2014-08-26 21:51     ` Don Slutz
2014-08-26 23:01       ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.