linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86, UV: Fix NMI handler for UV platforms
@ 2011-03-21 16:01 Jack Steiner
  2011-03-21 16:14 ` Ingo Molnar
  0 siblings, 1 reply; 38+ messages in thread
From: Jack Steiner @ 2011-03-21 16:01 UTC (permalink / raw)
  To: mingo, tglx, hpa, x86; +Cc: linux-kernel


This fixes a problem seen on UV systems handling NMIs from the node controller.
The original code used the DIE notifier as the hook to get to the UV NMI
handler. This does not work if performance counters are active - the hw_perf
code consumes the NMI and the UV handler is not called.

Signed-off-by: Jack Steiner <steiner@sgi.com>

---
(This patch was needed to debug system hangs that occur only when running
performance tools (perf or oprofile) on large systems. Without the
patch the system hard hangs. Attempts to NMI the system or get into
a debugger fail. This patch allowed the problem to be debugger. The
hang will be fixed later)

I tried reordering notifier priorities so that the UV code was called first.
This can be made to work BUT requires knowledge in the UV nmi handler whether
any other NMI source is active. The UV NMI handler cannot return NOTIFY_STOP
if other NMI sources are active - if NOTIFY_STOP is returned, the other handlers
will not be called. I tried this reordering & hw_perf collection would ocassionally
hang due to a missed NMI. If the UV haandler returns NOTIFY_OK or NOTIFY_DONE
and hw_perf is NOT active, we get the "dazed & confused" messages.

I considered adding a NMI handling callout to x86_platform_ops. This
might be a cleaner approach. This would replace the UV-specific change
in traps.c. Thoughts???




 arch/x86/include/asm/uv/uv.h       |    2 
 arch/x86/include/asm/uv/uv_mmrs.h  |   16 ++++++
 arch/x86/kernel/apic/x2apic_uv_x.c |   88 +++++++++++++++++++++++++++----------
 arch/x86/kernel/traps.c            |    6 ++
 4 files changed, 87 insertions(+), 25 deletions(-)

Index: linux/arch/x86/include/asm/uv/uv.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv.h	2011-03-21 09:05:43.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv.h	2011-03-21 09:06:20.313497084 -0500
@@ -13,6 +13,7 @@ extern int is_uv_system(void);
 extern void uv_cpu_init(void);
 extern void uv_nmi_init(void);
 extern void uv_system_init(void);
+extern int uv_handle_nmi(struct pt_regs *regs, unsigned char reason);
 extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 						 struct mm_struct *mm,
 						 unsigned long va,
@@ -24,6 +25,7 @@ static inline enum uv_system_type get_uv
 static inline int is_uv_system(void)	{ return 0; }
 static inline void uv_cpu_init(void)	{ }
 static inline void uv_system_init(void)	{ }
+static inline int uv_handle_nmi(struct pt_regs *regs, unsigned char reason)	{ return 0; }
 static inline const struct cpumask *
 uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
 		    unsigned long va, unsigned int cpu)
Index: linux/arch/x86/include/asm/uv/uv_mmrs.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_mmrs.h	2011-03-21 09:05:56.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_mmrs.h	2011-03-21 09:09:01.101557321 -0500
@@ -5,7 +5,7 @@
  *
  * SGI UV MMR definitions
  *
- * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                               UVH_SCRATCH5                                */
+/* ========================================================================= */
+#define UVH_SCRATCH5 0x2d0200UL
+#define UVH_SCRATCH5_32 0x00778
+
+#define UVH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+union uvh_scratch5_u {
+    unsigned long	v;
+    struct uvh_scratch5_s {
+	unsigned long	scratch5 : 64;  /* RW, W1CS */
+    } s;
+};
 
 #endif /* __ASM_UV_MMRS_X86_H__ */
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-21 09:05:56.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-21 09:18:10.581558983 -0500
@@ -34,6 +34,12 @@
 #include <asm/ipi.h>
 #include <asm/smp.h>
 #include <asm/x86_init.h>
+#include <asm/perf_event.h>
+
+/* BMC sets this MMR non-zero before sending an NMI */
+#define UVH_NMI_MMR				UVH_SCRATCH5
+#define UVH_NMI_MMR_CLEAR			(UVH_NMI_MMR + 8)
+#define UV_NMI_PENDING_MASK			(1UL << 63)
 
 DEFINE_PER_CPU(int, x2apic_extra_bits);
 
@@ -48,6 +54,12 @@ unsigned int uv_apicid_hibits;
 EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 static DEFINE_SPINLOCK(uv_nmi_lock);
 
+/* Should be part of uv_hub_info but that breas the KABI */
+static struct uv_nmi_info {
+	spinlock_t	nmi_lock;
+	unsigned long	nmi_count;
+} *uv_nmi_info;
+
 static unsigned long __init uv_early_read_mmr(unsigned long addr)
 {
 	unsigned long val, *mmr;
@@ -635,36 +647,60 @@ void __cpuinit uv_cpu_init(void)
 }
 
 /*
- * When NMI is received, print a stack trace.
+ * When an NMI from the BMC is received:
+ * 	- print a stack trace
  */
-int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
+DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
+static unsigned long last_nmi_jiffies;
+
+int uv_handle_nmi(struct pt_regs *regs, unsigned char reason)
 {
-	if (reason != DIE_NMIUNKNOWN)
-		return NOTIFY_OK;
+	unsigned long real_uv_nmi;
+	int blade;
 
 	if (in_crash_kexec)
 		/* do nothing if entering the crash kernel */
-		return NOTIFY_OK;
+		return 0;
+
 	/*
-	 * Use a lock so only one cpu prints at a time
-	 * to prevent intermixed output.
+	 * Each blade has an MMR that indicates when an NMI has been sent
+	 * to cpus on the blade. If an NMI is detected, atomically
+	 * clear the MMR and update a per-blade NMI count used to
+	 * cause each cpu on the blade to notice a new NMI.
+	 */
+	blade = uv_numa_blade_id();
+	real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+	if (unlikely(real_uv_nmi)) {
+		spin_lock(&uv_nmi_info[blade].nmi_lock);
+		real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+		if (real_uv_nmi) {
+			uv_nmi_info[blade].nmi_count++;
+			mb();
+			uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+		}
+		spin_unlock(&uv_nmi_info[blade].nmi_lock);
+	}
+
+	/*
+	 * Return "NMI handled" if an NMI has been seen within the preceeding
+	 * few seconds. This eliminates the "dazed.." message that can occur
+	 * if a hw_perf and BMC NMI are received at about the same time
+	 * and both events are processed with the first NMI.
+	 */
+	if (__get_cpu_var(cpu_last_nmi_count) == uv_nmi_info[blade].nmi_count)
+		return jiffies - last_nmi_jiffies < 10 * HZ;
+	__get_cpu_var(cpu_last_nmi_count) = uv_nmi_info[blade].nmi_count;
+
+	/*
+	 * Use a lock so only one cpu prints at a time.
+	 * This prevents intermixed output.
 	 */
 	spin_lock(&uv_nmi_lock);
-	pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
+	pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
 	dump_stack();
 	spin_unlock(&uv_nmi_lock);
-
-	return NOTIFY_STOP;
-}
-
-static struct notifier_block uv_dump_stack_nmi_nb = {
-	.notifier_call	= uv_handle_nmi
-};
-
-void uv_register_nmi_notifier(void)
-{
-	if (register_die_notifier(&uv_dump_stack_nmi_nb))
-		printk(KERN_WARNING "UV NMI handler failed to register\n");
+	last_nmi_jiffies = jiffies;
+	return 1;
 }
 
 void uv_nmi_init(void)
@@ -717,10 +753,17 @@ void __init uv_system_init(void)
 	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+	uv_blade_info = kzalloc(bytes, GFP_KERNEL);
 	BUG_ON(!uv_blade_info);
-	for (blade = 0; blade < uv_num_possible_blades(); blade++)
+
+	bytes = sizeof(uv_nmi_info[0]) * num_possible_cpus();
+	uv_nmi_info = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_nmi_info);
+
+	for (blade = 0; blade < uv_num_possible_blades(); blade++) {
 		uv_blade_info[blade].memory_nid = -1;
+		spin_lock_init(&uv_nmi_info[blade].nmi_lock);
+	}
 
 	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
 
@@ -805,7 +848,6 @@ void __init uv_system_init(void)
 
 	uv_cpu_init();
 	uv_scir_register_cpu_notifier();
-	uv_register_nmi_notifier();
 	proc_mkdir("sgi_uv", NULL);
 
 	/* register Legacy VGA I/O redirection handler */
Index: linux/arch/x86/kernel/traps.c
===================================================================
--- linux.orig/arch/x86/kernel/traps.c	2011-03-21 09:05:43.000000000 -0500
+++ linux/arch/x86/kernel/traps.c	2011-03-21 09:13:01.306555675 -0500
@@ -57,6 +57,7 @@
 #include <asm/mce.h>
 
 #include <asm/mach_traps.h>
+#include <asm/uv/uv.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -397,13 +398,16 @@ unknown_nmi_error(unsigned char reason,
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
+	int handled;
 
 	/*
 	 * CPU-specific NMI must be processed before non-CPU-specific
 	 * NMI, otherwise we may lose it, because the CPU-specific
 	 * NMI can not be detected/processed on other CPUs.
 	 */
-	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+	handled = uv_handle_nmi(regs, reason);
+	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP ||
+	    		handled)
 		return;
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 16:01 [PATCH] x86, UV: Fix NMI handler for UV platforms Jack Steiner
@ 2011-03-21 16:14 ` Ingo Molnar
  2011-03-21 16:26   ` Cyrill Gorcunov
  2011-03-21 16:56   ` Jack Steiner
  0 siblings, 2 replies; 38+ messages in thread
From: Ingo Molnar @ 2011-03-21 16:14 UTC (permalink / raw)
  To: Jack Steiner
  Cc: tglx, hpa, x86, linux-kernel, Peter Zijlstra, Cyrill Gorcunov


* Jack Steiner <steiner@sgi.com> wrote:

> This fixes a problem seen on UV systems handling NMIs from the node controller.
> The original code used the DIE notifier as the hook to get to the UV NMI
> handler. This does not work if performance counters are active - the hw_perf
> code consumes the NMI and the UV handler is not called.

Sigh:

> --- linux.orig/arch/x86/kernel/traps.c	2011-03-21 09:05:43.000000000 -0500
> +++ linux/arch/x86/kernel/traps.c	2011-03-21 09:13:01.306555675 -0500
> @@ -57,6 +57,7 @@
>  #include <asm/mce.h>
>  
>  #include <asm/mach_traps.h>
> +#include <asm/uv/uv.h>
>  
>  #ifdef CONFIG_X86_64
>  #include <asm/x86_init.h>
> @@ -397,13 +398,16 @@ unknown_nmi_error(unsigned char reason,
>  static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
>  {
>  	unsigned char reason = 0;
> +	int handled;
>  
>  	/*
>  	 * CPU-specific NMI must be processed before non-CPU-specific
>  	 * NMI, otherwise we may lose it, because the CPU-specific
>  	 * NMI can not be detected/processed on other CPUs.
>  	 */
> -	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> +	handled = uv_handle_nmi(regs, reason);
> +	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP ||
> +	    		handled)
>  		return;

Such code is extremely ugly. Please *reduce* the number of is_uv_system() type 
of hacks in core x86 code, not increase it!

Any reason why a higher priority for the UV NMI handler cannot solve the 'perf 
eats the NMI' problem?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 16:14 ` Ingo Molnar
@ 2011-03-21 16:26   ` Cyrill Gorcunov
  2011-03-21 16:43     ` Cyrill Gorcunov
  2011-03-21 17:51     ` Don Zickus
  2011-03-21 16:56   ` Jack Steiner
  1 sibling, 2 replies; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-21 16:26 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jack Steiner, tglx, hpa, x86, linux-kernel, Peter Zijlstra, Don Zickus

On 03/21/2011 07:14 PM, Ingo Molnar wrote:
> 
> * Jack Steiner <steiner@sgi.com> wrote:
> 
>> This fixes a problem seen on UV systems handling NMIs from the node controller.
>> The original code used the DIE notifier as the hook to get to the UV NMI
>> handler. This does not work if performance counters are active - the hw_perf
>> code consumes the NMI and the UV handler is not called.
> 
> Sigh:
> 
>> --- linux.orig/arch/x86/kernel/traps.c	2011-03-21 09:05:43.000000000 -0500
>> +++ linux/arch/x86/kernel/traps.c	2011-03-21 09:13:01.306555675 -0500
>> @@ -57,6 +57,7 @@
>>  #include <asm/mce.h>
>>  
>>  #include <asm/mach_traps.h>
>> +#include <asm/uv/uv.h>
>>  
>>  #ifdef CONFIG_X86_64
>>  #include <asm/x86_init.h>
>> @@ -397,13 +398,16 @@ unknown_nmi_error(unsigned char reason,
>>  static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
>>  {
>>  	unsigned char reason = 0;
>> +	int handled;
>>  
>>  	/*
>>  	 * CPU-specific NMI must be processed before non-CPU-specific
>>  	 * NMI, otherwise we may lose it, because the CPU-specific
>>  	 * NMI can not be detected/processed on other CPUs.
>>  	 */
>> -	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
>> +	handled = uv_handle_nmi(regs, reason);
>> +	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP ||
>> +	    		handled)
>>  		return;
> 
> Such code is extremely ugly. Please *reduce* the number of is_uv_system() type 
> of hacks in core x86 code, not increase it!
> 
> Any reason why a higher priority for the UV NMI handler cannot solve the 'perf 
> eats the NMI' problem?
> 
> Thanks,
> 
> 	Ingo

Yeah, Don has made priority system for NMI notifiers. Need to look in.

-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 16:26   ` Cyrill Gorcunov
@ 2011-03-21 16:43     ` Cyrill Gorcunov
  2011-03-21 17:00       ` Cyrill Gorcunov
  2011-03-21 17:53       ` Don Zickus
  2011-03-21 17:51     ` Don Zickus
  1 sibling, 2 replies; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-21 16:43 UTC (permalink / raw)
  To: Ingo Molnar, Don Zickus
  Cc: Jack Steiner, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On 03/21/2011 07:26 PM, Cyrill Gorcunov wrote:
...
>>
>> Such code is extremely ugly. Please *reduce* the number of is_uv_system() type 
>> of hacks in core x86 code, not increase it!
>>
>> Any reason why a higher priority for the UV NMI handler cannot solve the 'perf 
>> eats the NMI' problem?
>>
>> Thanks,
>>
>> 	Ingo
> 
> Yeah, Don has made priority system for NMI notifiers. Need to look in.
> 

I think Jack might need to setup priority for his notifier, like

static struct notifier_block uv_dump_stack_nmi_nb = {
	.notifier_call	= uv_handle_nmi,
	.priority	= NMI_LOCAL_HIGH_PRIOR+1,
};

so it would be called before perf nmi. Don, am I right?

Since for perf nmis we do have

static __read_mostly struct notifier_block perf_event_nmi_notifier = {
	.notifier_call		= perf_event_nmi_handler,
	.next			= NULL,
	.priority		= NMI_LOCAL_LOW_PRIOR,
};

-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 16:14 ` Ingo Molnar
  2011-03-21 16:26   ` Cyrill Gorcunov
@ 2011-03-21 16:56   ` Jack Steiner
  2011-03-21 18:05     ` Ingo Molnar
  1 sibling, 1 reply; 38+ messages in thread
From: Jack Steiner @ 2011-03-21 16:56 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: tglx, hpa, x86, linux-kernel, Peter Zijlstra, Cyrill Gorcunov

On Mon, Mar 21, 2011 at 05:14:25PM +0100, Ingo Molnar wrote:
> 
> * Jack Steiner <steiner@sgi.com> wrote:
> 
> > This fixes a problem seen on UV systems handling NMIs from the node controller.
> > The original code used the DIE notifier as the hook to get to the UV NMI
> > handler. This does not work if performance counters are active - the hw_perf
> > code consumes the NMI and the UV handler is not called.
> 
> Sigh:

Agree. X86 architecture does not make it easy to use NMIs from multiple sources.


> 
> > --- linux.orig/arch/x86/kernel/traps.c	2011-03-21 09:05:43.000000000 -0500
> > +++ linux/arch/x86/kernel/traps.c	2011-03-21 09:13:01.306555675 -0500
> > @@ -57,6 +57,7 @@
> >  #include <asm/mce.h>
> >  
> >  #include <asm/mach_traps.h>
> > +#include <asm/uv/uv.h>
> >  
> >  #ifdef CONFIG_X86_64
> >  #include <asm/x86_init.h>
> > @@ -397,13 +398,16 @@ unknown_nmi_error(unsigned char reason,
> >  static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
> >  {
> >  	unsigned char reason = 0;
> > +	int handled;
> >  
> >  	/*
> >  	 * CPU-specific NMI must be processed before non-CPU-specific
> >  	 * NMI, otherwise we may lose it, because the CPU-specific
> >  	 * NMI can not be detected/processed on other CPUs.
> >  	 */
> > -	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> > +	handled = uv_handle_nmi(regs, reason);
> > +	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP ||
> > +	    		handled)
> >  		return;
> 
> Such code is extremely ugly. Please *reduce* the number of is_uv_system() type 
> of hacks in core x86 code, not increase it!
> 
> Any reason why a higher priority for the UV NMI handler cannot solve the 'perf 
> eats the NMI' problem?

Yes. I tried that.

If the UV handler needs to know if hwperf is active in order to know whether or not
to return NOTIFY_STOP:

	- if the UV NMI handler returns NOTIFY_STOP and hw_perf is active, hw_perf will miss
	  and NMI & counter sometimes stop working.

	- if the UV NMI handler does not return NOTIFY_STOP and hw_perf is not active,
	  we get the "dazed" messages.

A cleaner solution would be to hide the platform specific NMI action in a x86_platform_ops 
such as (untested):


Index: linux/arch/x86/include/asm/x86_init.h
===================================================================
--- linux.orig/arch/x86/include/asm/x86_init.h	2011-03-18 11:29:08.000000000 -0500
+++ linux/arch/x86/include/asm/x86_init.h	2011-03-21 11:52:36.413496546 -0500
@@ -153,6 +153,7 @@ struct x86_platform_ops {
 	void (*iommu_shutdown)(void);
 	bool (*is_untracked_pat_range)(u64 start, u64 end);
 	void (*nmi_init)(void);
+	int (*nmi_handler)(void *regs);
 	int (*i8042_detect)(void);
 };
 
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-21 11:40:36.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-21 11:45:14.134555108 -0500
@@ -115,6 +115,7 @@ static int __init uv_acpi_madt_oem_check
 		early_get_apic_pnode_shift();
 		x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
 		x86_platform.nmi_init = uv_nmi_init;
+		x86_platform.nmi_handler = uv_nmi_handler;
 		if (!strcmp(oem_table_id, "UVL"))
 			uv_system_type = UV_LEGACY_APIC;
 		else if (!strcmp(oem_table_id, "UVX"))
Index: linux/arch/x86/kernel/traps.c
===================================================================
--- linux.orig/arch/x86/kernel/traps.c	2011-03-21 11:40:36.000000000 -0500
+++ linux/arch/x86/kernel/traps.c	2011-03-21 11:52:21.057498053 -0500
@@ -55,6 +55,8 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/mce.h>
+#include <asm/x86_init.h>
+
 
 #include <asm/mach_traps.h>
 
@@ -397,13 +399,16 @@ unknown_nmi_error(unsigned char reason,
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
+	int handled;
 
 	/*
 	 * CPU-specific NMI must be processed before non-CPU-specific
 	 * NMI, otherwise we may lose it, because the CPU-specific
 	 * NMI can not be detected/processed on other CPUs.
 	 */
-	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+	handled = x86_platform.nmi_handler(regs);
+	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP ||
+	    			handled)
 		return;
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
Index: linux/arch/x86/kernel/x86_init.c
===================================================================
--- linux.orig/arch/x86/kernel/x86_init.c	2011-03-18 11:29:08.000000000 -0500
+++ linux/arch/x86/kernel/x86_init.c	2011-03-21 11:53:26.849496085 -0500
@@ -89,6 +89,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpu
 };
 
 static void default_nmi_init(void) { };
+static int default_nmi_handler(void *regs) { return 1; };
 static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform = {
@@ -98,6 +99,7 @@ struct x86_platform_ops x86_platform = {
 	.iommu_shutdown			= iommu_shutdown_noop,
 	.is_untracked_pat_range		= is_ISA_range,
 	.nmi_init			= default_nmi_init,
+	.nmi_handler			= default_nmi_handler,
 	.i8042_detect			= default_i8042_detect
 };
 



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 16:43     ` Cyrill Gorcunov
@ 2011-03-21 17:00       ` Cyrill Gorcunov
  2011-03-21 17:08         ` Jack Steiner
  2011-03-21 17:53       ` Don Zickus
  1 sibling, 1 reply; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-21 17:00 UTC (permalink / raw)
  To: Ingo Molnar, Don Zickus, Jack Steiner
  Cc: tglx, hpa, x86, linux-kernel, Peter Zijlstra

On 03/21/2011 07:43 PM, Cyrill Gorcunov wrote:
...
> 
> I think Jack might need to setup priority for his notifier, like
> 
> static struct notifier_block uv_dump_stack_nmi_nb = {
> 	.notifier_call	= uv_handle_nmi,
> 	.priority	= NMI_LOCAL_HIGH_PRIOR+1,
> };
> 
> so it would be called before perf nmi. Don, am I right?
> 
> Since for perf nmis we do have
> 
> static __read_mostly struct notifier_block perf_event_nmi_notifier = {
> 	.notifier_call		= perf_event_nmi_handler,
> 	.next			= NULL,
> 	.priority		= NMI_LOCAL_LOW_PRIOR,
> };
> 

  I must admit I've missed the fact that Jack has tried NMIs priorities, right?
x86_platform_ops seems to be a cleaner indeed (btw I think p4 pmu kgdb issue
is exactly the same problem) but same time this might end up in over-swelled
ideas behind this small code snippet. Dunno. Probably we need some per-cpu
system status for nmi reasons other than unknown nmis...

-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 17:00       ` Cyrill Gorcunov
@ 2011-03-21 17:08         ` Jack Steiner
  2011-03-21 17:19           ` Cyrill Gorcunov
  2011-03-21 18:15           ` Cyrill Gorcunov
  0 siblings, 2 replies; 38+ messages in thread
From: Jack Steiner @ 2011-03-21 17:08 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Ingo Molnar, Don Zickus, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On Mon, Mar 21, 2011 at 08:00:53PM +0300, Cyrill Gorcunov wrote:
> On 03/21/2011 07:43 PM, Cyrill Gorcunov wrote:
> ...
> > 
> > I think Jack might need to setup priority for his notifier, like
> > 
> > static struct notifier_block uv_dump_stack_nmi_nb = {
> > 	.notifier_call	= uv_handle_nmi,
> > 	.priority	= NMI_LOCAL_HIGH_PRIOR+1,
> > };
> > 
> > so it would be called before perf nmi. Don, am I right?
> > 
> > Since for perf nmis we do have
> > 
> > static __read_mostly struct notifier_block perf_event_nmi_notifier = {
> > 	.notifier_call		= perf_event_nmi_handler,
> > 	.next			= NULL,
> > 	.priority		= NMI_LOCAL_LOW_PRIOR,
> > };
> > 
> 
>   I must admit I've missed the fact that Jack has tried NMIs priorities, right?
> x86_platform_ops seems to be a cleaner indeed (btw I think p4 pmu kgdb issue
> is exactly the same problem) but same time this might end up in over-swelled
> ideas behind this small code snippet. Dunno. Probably we need some per-cpu
> system status for nmi reasons other than unknown nmis...

We use KDB internally, and yes, it has the same issue. The version of the
patch that uses KDB OR's the "handled" status for both KDB & the UV NMI handler.
If either KDB or the UV NMI handler returns "handled", the code in traps.c exits
after the call to the first die notifier.

Not particularily pretty but I could not find a better way to do it.

--- jack

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 17:08         ` Jack Steiner
@ 2011-03-21 17:19           ` Cyrill Gorcunov
  2011-03-21 17:34             ` Jack Steiner
  2011-03-21 18:15           ` Cyrill Gorcunov
  1 sibling, 1 reply; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-21 17:19 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Ingo Molnar, Don Zickus, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On 03/21/2011 08:08 PM, Jack Steiner wrote:
> On Mon, Mar 21, 2011 at 08:00:53PM +0300, Cyrill Gorcunov wrote:
>> On 03/21/2011 07:43 PM, Cyrill Gorcunov wrote:
>> ...
>>>
>>> I think Jack might need to setup priority for his notifier, like
>>>
>>> static struct notifier_block uv_dump_stack_nmi_nb = {
>>> 	.notifier_call	= uv_handle_nmi,
>>> 	.priority	= NMI_LOCAL_HIGH_PRIOR+1,
>>> };
>>>
>>> so it would be called before perf nmi. Don, am I right?
>>>
>>> Since for perf nmis we do have
>>>
>>> static __read_mostly struct notifier_block perf_event_nmi_notifier = {
>>> 	.notifier_call		= perf_event_nmi_handler,
>>> 	.next			= NULL,
>>> 	.priority		= NMI_LOCAL_LOW_PRIOR,
>>> };
>>>
>>
>>   I must admit I've missed the fact that Jack has tried NMIs priorities, right?
>> x86_platform_ops seems to be a cleaner indeed (btw I think p4 pmu kgdb issue
>> is exactly the same problem) but same time this might end up in over-swelled
>> ideas behind this small code snippet. Dunno. Probably we need some per-cpu
>> system status for nmi reasons other than unknown nmis...
> 
> We use KDB internally, and yes, it has the same issue. The version of the
> patch that uses KDB OR's the "handled" status for both KDB & the UV NMI handler.
> If either KDB or the UV NMI handler returns "handled", the code in traps.c exits
> after the call to the first die notifier.
> 
> Not particularily pretty but I could not find a better way to do it.
> 
> --- jack

  Another option might be to add pre-nmi notifier chain, which of course
not much differ from platform ops but I guess platform ops stands mostly
for one-shot events while chain might be more flexible. Ie I mean something
like

	if (notify_pre_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
		return;

-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 17:19           ` Cyrill Gorcunov
@ 2011-03-21 17:34             ` Jack Steiner
  2011-03-21 17:48               ` Cyrill Gorcunov
  0 siblings, 1 reply; 38+ messages in thread
From: Jack Steiner @ 2011-03-21 17:34 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Ingo Molnar, Don Zickus, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On Mon, Mar 21, 2011 at 08:19:09PM +0300, Cyrill Gorcunov wrote:
> On 03/21/2011 08:08 PM, Jack Steiner wrote:
> > On Mon, Mar 21, 2011 at 08:00:53PM +0300, Cyrill Gorcunov wrote:
> >> On 03/21/2011 07:43 PM, Cyrill Gorcunov wrote:
> >> ...
> >>>
> >>> I think Jack might need to setup priority for his notifier, like
> >>>
> >>> static struct notifier_block uv_dump_stack_nmi_nb = {
> >>> 	.notifier_call	= uv_handle_nmi,
> >>> 	.priority	= NMI_LOCAL_HIGH_PRIOR+1,
> >>> };
> >>>
> >>> so it would be called before perf nmi. Don, am I right?
> >>>
> >>> Since for perf nmis we do have
> >>>
> >>> static __read_mostly struct notifier_block perf_event_nmi_notifier = {
> >>> 	.notifier_call		= perf_event_nmi_handler,
> >>> 	.next			= NULL,
> >>> 	.priority		= NMI_LOCAL_LOW_PRIOR,
> >>> };
> >>>
> >>
> >>   I must admit I've missed the fact that Jack has tried NMIs priorities, right?
> >> x86_platform_ops seems to be a cleaner indeed (btw I think p4 pmu kgdb issue
> >> is exactly the same problem) but same time this might end up in over-swelled
> >> ideas behind this small code snippet. Dunno. Probably we need some per-cpu
> >> system status for nmi reasons other than unknown nmis...
> > 
> > We use KDB internally, and yes, it has the same issue. The version of the
> > patch that uses KDB OR's the "handled" status for both KDB & the UV NMI handler.
> > If either KDB or the UV NMI handler returns "handled", the code in traps.c exits
> > after the call to the first die notifier.
> > 
> > Not particularily pretty but I could not find a better way to do it.
> > 
> > --- jack
> 
>   Another option might be to add pre-nmi notifier chain, which of course
> not much differ from platform ops but I guess platform ops stands mostly
> for one-shot events while chain might be more flexible. Ie I mean something
> like
> 
> 	if (notify_pre_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> 		return;

You still need to process both chains in order to handle the case where both
hw_perf & the SGI BMC raise NMIs at about the same time.

--- jack

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 17:34             ` Jack Steiner
@ 2011-03-21 17:48               ` Cyrill Gorcunov
  2011-03-21 17:55                 ` Cyrill Gorcunov
  0 siblings, 1 reply; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-21 17:48 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Ingo Molnar, Don Zickus, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On 03/21/2011 08:34 PM, Jack Steiner wrote:
....
>>>>
>>>>   I must admit I've missed the fact that Jack has tried NMIs priorities, right?
>>>> x86_platform_ops seems to be a cleaner indeed (btw I think p4 pmu kgdb issue
>>>> is exactly the same problem) but same time this might end up in over-swelled
>>>> ideas behind this small code snippet. Dunno. Probably we need some per-cpu
>>>> system status for nmi reasons other than unknown nmis...
>>>
>>> We use KDB internally, and yes, it has the same issue. The version of the
>>> patch that uses KDB OR's the "handled" status for both KDB & the UV NMI handler.
>>> If either KDB or the UV NMI handler returns "handled", the code in traps.c exits
>>> after the call to the first die notifier.
>>>
>>> Not particularily pretty but I could not find a better way to do it.
>>>
>>> --- jack
>>
>>   Another option might be to add pre-nmi notifier chain, which of course
>> not much differ from platform ops but I guess platform ops stands mostly
>> for one-shot events while chain might be more flexible. Ie I mean something
>> like
>>
>> 	if (notify_pre_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
>> 		return;
> 
> You still need to process both chains in order to handle the case where both
> hw_perf & the SGI BMC raise NMIs at about the same time.
> 
> --- jack

yes, but I meant to simply call this chain before the regular notify_die. Anyway
it would look ugly as hell too.

-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 16:26   ` Cyrill Gorcunov
  2011-03-21 16:43     ` Cyrill Gorcunov
@ 2011-03-21 17:51     ` Don Zickus
  2011-03-21 18:00       ` Cyrill Gorcunov
  2011-03-21 18:22       ` Jack Steiner
  1 sibling, 2 replies; 38+ messages in thread
From: Don Zickus @ 2011-03-21 17:51 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Ingo Molnar, Jack Steiner, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On Mon, Mar 21, 2011 at 07:26:51PM +0300, Cyrill Gorcunov wrote:
> On 03/21/2011 07:14 PM, Ingo Molnar wrote:
> > 
> > * Jack Steiner <steiner@sgi.com> wrote:
> > 
> >> This fixes a problem seen on UV systems handling NMIs from the node controller.
> >> The original code used the DIE notifier as the hook to get to the UV NMI
> >> handler. This does not work if performance counters are active - the hw_perf
> >> code consumes the NMI and the UV handler is not called.

Well that is a bug in the perf code.  We have been dealing with 'perf'
swallowing NMIs for a couple of releases now.  I think we got rid of most
of the cases (p4 and acme's core2 quad are the only cases I know that are
still an issue).

I would much prefer to investigate the reason why this is happening
because the perf nmi handler is supposed to check the global interrupt bit
to determine if the perf counters caused the nmi or not otherwise fall
through to other handler like SGI's nmi button in this case.

My first impression is the skip nmi logic in the perf handler is probably
accidentally thinking the SGI external nmi is the perf's 'extra' nmi it is
supposed to skip and thus swallows it.  At least that is the impression I
get from the RedHat bugzilla which says SGI is running 'perf top', getting
a hang, then pressing their nmi button to see the stack traces.

Jack,

I worked through a number of these issues upstream and I already talked to
George and Russ over here at RedHat about working through the issue over
here with them.  They can help me get access to your box to help debug.

Cheers,
Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 16:43     ` Cyrill Gorcunov
  2011-03-21 17:00       ` Cyrill Gorcunov
@ 2011-03-21 17:53       ` Don Zickus
  1 sibling, 0 replies; 38+ messages in thread
From: Don Zickus @ 2011-03-21 17:53 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Ingo Molnar, Jack Steiner, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On Mon, Mar 21, 2011 at 07:43:46PM +0300, Cyrill Gorcunov wrote:
> I think Jack might need to setup priority for his notifier, like
> 
> static struct notifier_block uv_dump_stack_nmi_nb = {
> 	.notifier_call	= uv_handle_nmi,
> 	.priority	= NMI_LOCAL_HIGH_PRIOR+1,
> };
> 
> so it would be called before perf nmi. Don, am I right?

Unless they added register to detect the external nmi button has been
pressed this shouldn't work and in fact if you run 'perf' you will
probably trigger stack traces for all the cpus on your first NMI.  That is
what has been explained to me privately.

Cheers,
Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 17:48               ` Cyrill Gorcunov
@ 2011-03-21 17:55                 ` Cyrill Gorcunov
  0 siblings, 0 replies; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-21 17:55 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Ingo Molnar, Don Zickus, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra, Jason Wessel

On 03/21/2011 08:48 PM, Cyrill Gorcunov wrote:
...
>>>   Another option might be to add pre-nmi notifier chain, which of course
>>> not much differ from platform ops but I guess platform ops stands mostly
>>> for one-shot events while chain might be more flexible. Ie I mean something
>>> like
>>>
>>> 	if (notify_pre_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
>>> 		return;
>>
>> You still need to process both chains in order to handle the case where both
>> hw_perf & the SGI BMC raise NMIs at about the same time.
>>
>> --- jack
> 
> yes, but I meant to simply call this chain before the regular notify_die. Anyway
> it would look ugly as hell too.
> 

And if I'm not missing something kgdb still might call IPI inside NMI handler
which is look somehow strange to me...

-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 17:51     ` Don Zickus
@ 2011-03-21 18:00       ` Cyrill Gorcunov
  2011-03-21 18:22       ` Jack Steiner
  1 sibling, 0 replies; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-21 18:00 UTC (permalink / raw)
  To: Don Zickus
  Cc: Ingo Molnar, Jack Steiner, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra, Jason Wessel

On 03/21/2011 08:51 PM, Don Zickus wrote:
> On Mon, Mar 21, 2011 at 07:26:51PM +0300, Cyrill Gorcunov wrote:
>> On 03/21/2011 07:14 PM, Ingo Molnar wrote:
>>>
>>> * Jack Steiner <steiner@sgi.com> wrote:
>>>
>>>> This fixes a problem seen on UV systems handling NMIs from the node controller.
>>>> The original code used the DIE notifier as the hook to get to the UV NMI
>>>> handler. This does not work if performance counters are active - the hw_perf
>>>> code consumes the NMI and the UV handler is not called.
> 
> Well that is a bug in the perf code.  We have been dealing with 'perf'
> swallowing NMIs for a couple of releases now.  I think we got rid of most
> of the cases (p4 and acme's core2 quad are the only cases I know that are
> still an issue).

p4 has the issue if only smp-kgdb case happens as far as i know, which in turn
'cause of IPI called inside nmi handler and other cpus are waiting for such nmi
arrival and if perf is enabled same time we might end up that ipi nmi sent by kgdb
will be eaten by perf subsystem (if my analysis is correct, Jason?). So for this
case we might need pre-regular nmi notifier call chain I guess or platform ops
as Jack proposed but still all become incredibly messy for me :(

> 
> I would much prefer to investigate the reason why this is happening
> because the perf nmi handler is supposed to check the global interrupt bit
> to determine if the perf counters caused the nmi or not otherwise fall
> through to other handler like SGI's nmi button in this case.
> 
> My first impression is the skip nmi logic in the perf handler is probably
> accidentally thinking the SGI external nmi is the perf's 'extra' nmi it is
> supposed to skip and thus swallows it.  At least that is the impression I
> get from the RedHat bugzilla which says SGI is running 'perf top', getting
> a hang, then pressing their nmi button to see the stack traces.
> 
> Jack,
> 
> I worked through a number of these issues upstream and I already talked to
> George and Russ over here at RedHat about working through the issue over
> here with them.  They can help me get access to your box to help debug.
> 
> Cheers,
> Don


-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 16:56   ` Jack Steiner
@ 2011-03-21 18:05     ` Ingo Molnar
  2011-03-21 19:23       ` [PATCH V2] " Jack Steiner
  0 siblings, 1 reply; 38+ messages in thread
From: Ingo Molnar @ 2011-03-21 18:05 UTC (permalink / raw)
  To: Jack Steiner
  Cc: tglx, hpa, x86, linux-kernel, Peter Zijlstra, Cyrill Gorcunov


* Jack Steiner <steiner@sgi.com> wrote:

>  static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
>  {
>  	unsigned char reason = 0;
> +	int handled;
>  
>  	/*
>  	 * CPU-specific NMI must be processed before non-CPU-specific
>  	 * NMI, otherwise we may lose it, because the CPU-specific
>  	 * NMI can not be detected/processed on other CPUs.
>  	 */
> -	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> +	handled = x86_platform.nmi_handler(regs);
> +	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP ||
> +	    			handled)
>  		return;

This would indeed be cleaner and would work better - given how unreliable it is 
to demultiplex NMI reasons.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 17:08         ` Jack Steiner
  2011-03-21 17:19           ` Cyrill Gorcunov
@ 2011-03-21 18:15           ` Cyrill Gorcunov
  2011-03-21 18:24             ` Jack Steiner
  1 sibling, 1 reply; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-21 18:15 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Ingo Molnar, Don Zickus, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On 03/21/2011 08:08 PM, Jack Steiner wrote:
...
> 
> We use KDB internally, and yes, it has the same issue. The version of the
> patch that uses KDB OR's the "handled" status for both KDB & the UV NMI handler.
> If either KDB or the UV NMI handler returns "handled", the code in traps.c exits
> after the call to the first die notifier.
> 
> Not particularily pretty but I could not find a better way to do it.
> 
> --- jack

  Btw Jack, I somehow missed (sorry) this patch only handles UV NMI handler so
for KGDB case you need some other patch on top?

-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 17:51     ` Don Zickus
  2011-03-21 18:00       ` Cyrill Gorcunov
@ 2011-03-21 18:22       ` Jack Steiner
  2011-03-21 19:37         ` Don Zickus
  1 sibling, 1 reply; 38+ messages in thread
From: Jack Steiner @ 2011-03-21 18:22 UTC (permalink / raw)
  To: Don Zickus
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Mon, Mar 21, 2011 at 01:51:10PM -0400, Don Zickus wrote:
> On Mon, Mar 21, 2011 at 07:26:51PM +0300, Cyrill Gorcunov wrote:
> > On 03/21/2011 07:14 PM, Ingo Molnar wrote:
> > > 
> > > * Jack Steiner <steiner@sgi.com> wrote:
> > > 
> > >> This fixes a problem seen on UV systems handling NMIs from the node controller.
> > >> The original code used the DIE notifier as the hook to get to the UV NMI
> > >> handler. This does not work if performance counters are active - the hw_perf
> > >> code consumes the NMI and the UV handler is not called.
> 
> Well that is a bug in the perf code.  We have been dealing with 'perf'
> swallowing NMIs for a couple of releases now.  I think we got rid of most
> of the cases (p4 and acme's core2 quad are the only cases I know that are
> still an issue).
> 
> I would much prefer to investigate the reason why this is happening
> because the perf nmi handler is supposed to check the global interrupt bit
> to determine if the perf counters caused the nmi or not otherwise fall
> through to other handler like SGI's nmi button in this case.

The patch that I posted is based on a RHEL6.1 patch that I'm running internally.
Unless something has very recently changed in the RH sources, the perf
NMI handler unconditionally returns NOTIFY_STOP if it handles an NMI.
If no NMI was handled, it returns NOTIFY_DONE. This sometimes works
and allows the platform generated NMI to be processed but if both NMI
sources trigger at about he same time, the lower priority event
will be lost.

The root cause of the problem is that architecturally, x86 does not
have a way to identifies the source(s) that cause an NMI. If multiple
events occur at about the same time, there is no way that I can see that the
OS can detect it.

> 
> My first impression is the skip nmi logic in the perf handler is probably
> accidentally thinking the SGI external nmi is the perf's 'extra' nmi it is
> supposed to skip and thus swallows it.  At least that is the impression I

Agree


> get from the RedHat bugzilla which says SGI is running 'perf top', getting
> a hang, then pressing their nmi button to see the stack traces.
> 
> Jack,
> 
> I worked through a number of these issues upstream and I already talked to
> George and Russ over here at RedHat about working through the issue over
> here with them.  They can help me get access to your box to help debug.

Russ is right down the hall.


--- jack

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 18:15           ` Cyrill Gorcunov
@ 2011-03-21 18:24             ` Jack Steiner
  0 siblings, 0 replies; 38+ messages in thread
From: Jack Steiner @ 2011-03-21 18:24 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Ingo Molnar, Don Zickus, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On Mon, Mar 21, 2011 at 09:15:04PM +0300, Cyrill Gorcunov wrote:
> On 03/21/2011 08:08 PM, Jack Steiner wrote:
> ...
> > 
> > We use KDB internally, and yes, it has the same issue. The version of the
> > patch that uses KDB OR's the "handled" status for both KDB & the UV NMI handler.
> > If either KDB or the UV NMI handler returns "handled", the code in traps.c exits
> > after the call to the first die notifier.
> > 
> > Not particularily pretty but I could not find a better way to do it.
> > 
> > --- jack
> 
>   Btw Jack, I somehow missed (sorry) this patch only handles UV NMI handler so
> for KGDB case you need some other patch on top?

Yes. Internally we usually apply the older KDB patches to our tree.


--- jack

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V2] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 18:05     ` Ingo Molnar
@ 2011-03-21 19:23       ` Jack Steiner
  0 siblings, 0 replies; 38+ messages in thread
From: Jack Steiner @ 2011-03-21 19:23 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: tglx, hpa, x86, linux-kernel, Peter Zijlstra, Cyrill Gorcunov

This fixes a problem seen on UV systems handling NMIs from the node controller.
The original code used the DIE notifier as the hook to get to the UV NMI
handler. This does not work if performance counters are active - the hw_perf
code consumes the NMI and the UV handler is not called.

Signed-off-by: Jack Steiner <steiner@sgi.com>

---
V2 - Use x86_platform_ops.

(This patch was needed to debug system hangs that occur only when running
performance tools (perf or oprofile) on large systems. Without the
patch the system hard hangs. Attempts to NMI the system or get into
a debugger fail. This patch allowed the problem to be debugger. The
hang will be fixed later)

I tried reordering notifier priorities so that the UV code was called first.
This can be made to work BUT requires knowledge in the UV nmi handler whether
any other NMI source is active. The UV NMI handler cannot return NOTIFY_STOP
if other NMI sources are active - if NOTIFY_STOP is returned, the other handlers
will not be called. I tried this reordering & hw_perf collection would ocassionally
hang due to a missed NMI. If the UV haandler returns NOTIFY_OK or NOTIFY_DONE
and hw_perf is NOT active, we get the "dazed & confused" messages.


 arch/x86/include/asm/uv/uv_mmrs.h  |   16 ++++++
 arch/x86/include/asm/x86_init.h    |    2 
 arch/x86/kernel/apic/x2apic_uv_x.c |   90 +++++++++++++++++++++++++++----------
 arch/x86/kernel/traps.c            |    6 ++
 arch/x86/kernel/x86_init.c         |    2 
 5 files changed, 91 insertions(+), 25 deletions(-)

Index: linux/arch/x86/include/asm/uv/uv_mmrs.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_mmrs.h	2011-03-21 14:04:49.629495972 -0500
+++ linux/arch/x86/include/asm/uv/uv_mmrs.h	2011-03-21 14:04:52.485509905 -0500
@@ -5,7 +5,7 @@
  *
  * SGI UV MMR definitions
  *
- * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                               UVH_SCRATCH5                                */
+/* ========================================================================= */
+#define UVH_SCRATCH5 0x2d0200UL
+#define UVH_SCRATCH5_32 0x00778
+
+#define UVH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+union uvh_scratch5_u {
+    unsigned long	v;
+    struct uvh_scratch5_s {
+	unsigned long	scratch5 : 64;  /* RW, W1CS */
+    } s;
+};
 
 #endif /* __ASM_UV_MMRS_X86_H__ */
Index: linux/arch/x86/include/asm/x86_init.h
===================================================================
--- linux.orig/arch/x86/include/asm/x86_init.h	2011-03-21 14:04:49.629495972 -0500
+++ linux/arch/x86/include/asm/x86_init.h	2011-03-21 14:04:52.489996907 -0500
@@ -7,6 +7,7 @@
 struct mpc_bus;
 struct mpc_cpu;
 struct mpc_table;
+struct pt_regs;
 
 /**
  * struct x86_init_mpparse - platform specific mpparse ops
@@ -153,6 +154,7 @@ struct x86_platform_ops {
 	void (*iommu_shutdown)(void);
 	bool (*is_untracked_pat_range)(u64 start, u64 end);
 	void (*nmi_init)(void);
+	int (*nmi_handler)(struct pt_regs *regs);
 	int (*i8042_detect)(void);
 };
 
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-21 14:04:49.629495972 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-21 14:04:52.533571712 -0500
@@ -34,6 +34,12 @@
 #include <asm/ipi.h>
 #include <asm/smp.h>
 #include <asm/x86_init.h>
+#include <asm/perf_event.h>
+
+/* BMC sets this MMR non-zero before sending an NMI */
+#define UVH_NMI_MMR				UVH_SCRATCH5
+#define UVH_NMI_MMR_CLEAR			(UVH_NMI_MMR + 8)
+#define UV_NMI_PENDING_MASK			(1UL << 63)
 
 DEFINE_PER_CPU(int, x2apic_extra_bits);
 
@@ -47,6 +53,13 @@ EXPORT_SYMBOL_GPL(uv_min_hub_revision_id
 unsigned int uv_apicid_hibits;
 EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 static DEFINE_SPINLOCK(uv_nmi_lock);
+static int uv_handle_nmi(struct pt_regs *regs);
+
+/* Should be part of uv_hub_info but that breas the KABI */
+static struct uv_nmi_info {
+	spinlock_t	nmi_lock;
+	unsigned long	nmi_count;
+} *uv_nmi_info;
 
 static unsigned long __init uv_early_read_mmr(unsigned long addr)
 {
@@ -115,6 +128,7 @@ static int __init uv_acpi_madt_oem_check
 		early_get_apic_pnode_shift();
 		x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
 		x86_platform.nmi_init = uv_nmi_init;
+		x86_platform.nmi_handler = uv_handle_nmi;
 		if (!strcmp(oem_table_id, "UVL"))
 			uv_system_type = UV_LEGACY_APIC;
 		else if (!strcmp(oem_table_id, "UVX"))
@@ -635,36 +649,60 @@ void __cpuinit uv_cpu_init(void)
 }
 
 /*
- * When NMI is received, print a stack trace.
+ * When an NMI from the BMC is received:
+ * 	- print a stack trace
  */
-int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
+DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
+static unsigned long last_nmi_jiffies;
+
+static int uv_handle_nmi(struct pt_regs *regs)
 {
-	if (reason != DIE_NMIUNKNOWN)
-		return NOTIFY_OK;
+	unsigned long real_uv_nmi;
+	int blade;
 
 	if (in_crash_kexec)
 		/* do nothing if entering the crash kernel */
-		return NOTIFY_OK;
+		return 0;
+
 	/*
-	 * Use a lock so only one cpu prints at a time
-	 * to prevent intermixed output.
+	 * Each blade has an MMR that indicates when an NMI has been sent
+	 * to cpus on the blade. If an NMI is detected, atomically
+	 * clear the MMR and update a per-blade NMI count used to
+	 * cause each cpu on the blade to notice a new NMI.
+	 */
+	blade = uv_numa_blade_id();
+	real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+	if (unlikely(real_uv_nmi)) {
+		spin_lock(&uv_nmi_info[blade].nmi_lock);
+		real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+		if (real_uv_nmi) {
+			uv_nmi_info[blade].nmi_count++;
+			mb();
+			uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+		}
+		spin_unlock(&uv_nmi_info[blade].nmi_lock);
+	}
+
+	/*
+	 * Return "NMI handled" if an NMI has been seen within the preceeding
+	 * few seconds. This eliminates the "dazed.." message that can occur
+	 * if a hw_perf and BMC NMI are received at about the same time
+	 * and both events are processed with the first NMI.
+	 */
+	if (__get_cpu_var(cpu_last_nmi_count) == uv_nmi_info[blade].nmi_count)
+		return jiffies - last_nmi_jiffies < 10 * HZ;
+	__get_cpu_var(cpu_last_nmi_count) = uv_nmi_info[blade].nmi_count;
+
+	/*
+	 * Use a lock so only one cpu prints at a time.
+	 * This prevents intermixed output.
 	 */
 	spin_lock(&uv_nmi_lock);
-	pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
+	pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
 	dump_stack();
 	spin_unlock(&uv_nmi_lock);
-
-	return NOTIFY_STOP;
-}
-
-static struct notifier_block uv_dump_stack_nmi_nb = {
-	.notifier_call	= uv_handle_nmi
-};
-
-void uv_register_nmi_notifier(void)
-{
-	if (register_die_notifier(&uv_dump_stack_nmi_nb))
-		printk(KERN_WARNING "UV NMI handler failed to register\n");
+	last_nmi_jiffies = jiffies;
+	return 1;
 }
 
 void uv_nmi_init(void)
@@ -717,10 +755,17 @@ void __init uv_system_init(void)
 	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+	uv_blade_info = kzalloc(bytes, GFP_KERNEL);
 	BUG_ON(!uv_blade_info);
-	for (blade = 0; blade < uv_num_possible_blades(); blade++)
+
+	bytes = sizeof(uv_nmi_info[0]) * num_possible_cpus();
+	uv_nmi_info = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_nmi_info);
+
+	for (blade = 0; blade < uv_num_possible_blades(); blade++) {
 		uv_blade_info[blade].memory_nid = -1;
+		spin_lock_init(&uv_nmi_info[blade].nmi_lock);
+	}
 
 	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
 
@@ -805,7 +850,6 @@ void __init uv_system_init(void)
 
 	uv_cpu_init();
 	uv_scir_register_cpu_notifier();
-	uv_register_nmi_notifier();
 	proc_mkdir("sgi_uv", NULL);
 
 	/* register Legacy VGA I/O redirection handler */
Index: linux/arch/x86/kernel/traps.c
===================================================================
--- linux.orig/arch/x86/kernel/traps.c	2011-03-21 14:04:49.629495972 -0500
+++ linux/arch/x86/kernel/traps.c	2011-03-21 14:08:44.609496310 -0500
@@ -55,6 +55,7 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/mce.h>
+#include <asm/x86_init.h>
 
 #include <asm/mach_traps.h>
 
@@ -397,13 +398,16 @@ unknown_nmi_error(unsigned char reason,
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
+	int handled;
 
 	/*
 	 * CPU-specific NMI must be processed before non-CPU-specific
 	 * NMI, otherwise we may lose it, because the CPU-specific
 	 * NMI can not be detected/processed on other CPUs.
 	 */
-	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+	handled = x86_platform.nmi_handler(regs);
+	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP ||
+	    			handled)
 		return;
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
Index: linux/arch/x86/kernel/x86_init.c
===================================================================
--- linux.orig/arch/x86/kernel/x86_init.c	2011-03-21 14:04:49.629495972 -0500
+++ linux/arch/x86/kernel/x86_init.c	2011-03-21 14:06:52.129814554 -0500
@@ -89,6 +89,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpu
 };
 
 static void default_nmi_init(void) { };
+static int default_nmi_handler(struct pt_regs *regs) { return 0; };
 static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform = {
@@ -98,6 +99,7 @@ struct x86_platform_ops x86_platform = {
 	.iommu_shutdown			= iommu_shutdown_noop,
 	.is_untracked_pat_range		= is_ISA_range,
 	.nmi_init			= default_nmi_init,
+	.nmi_handler			= default_nmi_handler,
 	.i8042_detect			= default_i8042_detect
 };
 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 18:22       ` Jack Steiner
@ 2011-03-21 19:37         ` Don Zickus
  2011-03-21 20:37           ` Jack Steiner
  2011-03-22 17:11           ` Jack Steiner
  0 siblings, 2 replies; 38+ messages in thread
From: Don Zickus @ 2011-03-21 19:37 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Mon, Mar 21, 2011 at 01:22:35PM -0500, Jack Steiner wrote:
> On Mon, Mar 21, 2011 at 01:51:10PM -0400, Don Zickus wrote:
> > On Mon, Mar 21, 2011 at 07:26:51PM +0300, Cyrill Gorcunov wrote:
> > > On 03/21/2011 07:14 PM, Ingo Molnar wrote:
> > > > 
> > > > * Jack Steiner <steiner@sgi.com> wrote:
> > > > 
> > > >> This fixes a problem seen on UV systems handling NMIs from the node controller.
> > > >> The original code used the DIE notifier as the hook to get to the UV NMI
> > > >> handler. This does not work if performance counters are active - the hw_perf
> > > >> code consumes the NMI and the UV handler is not called.
> > 
> > Well that is a bug in the perf code.  We have been dealing with 'perf'
> > swallowing NMIs for a couple of releases now.  I think we got rid of most
> > of the cases (p4 and acme's core2 quad are the only cases I know that are
> > still an issue).
> > 
> > I would much prefer to investigate the reason why this is happening
> > because the perf nmi handler is supposed to check the global interrupt bit
> > to determine if the perf counters caused the nmi or not otherwise fall
> > through to other handler like SGI's nmi button in this case.
> 
> The patch that I posted is based on a RHEL6.1 patch that I'm running internally.
> Unless something has very recently changed in the RH sources, the perf
> NMI handler unconditionally returns NOTIFY_STOP if it handles an NMI.
> If no NMI was handled, it returns NOTIFY_DONE. This sometimes works
> and allows the platform generated NMI to be processed but if both NMI
> sources trigger at about he same time, the lower priority event
> will be lost.

Not necessarily, if both are triggered, you should still get _two_ NMIs.
It may get processed in the wrong order but it should still get correctly
processed.

> 
> The root cause of the problem is that architecturally, x86 does not
> have a way to identifies the source(s) that cause an NMI. If multiple
> events occur at about the same time, there is no way that I can see that the
> OS can detect it.

There are registers we can check to see who owns trigger the NMI (at least
for the perf code, the SGI code maybe not, which is why I set it to a
lower priority to be a catch-all).

I'm not aware of the x86 architecture dropping NMIs, so they should all
get processed.  It is just a matter of which subsystems get determine if
they are the source of the NMI or not.

> 
> > 
> > My first impression is the skip nmi logic in the perf handler is probably
> > accidentally thinking the SGI external nmi is the perf's 'extra' nmi it is
> > supposed to skip and thus swallows it.  At least that is the impression I
> 
> Agree
> 
> 
> > get from the RedHat bugzilla which says SGI is running 'perf top', getting
> > a hang, then pressing their nmi button to see the stack traces.
> > 
> > Jack,
> > 
> > I worked through a number of these issues upstream and I already talked to
> > George and Russ over here at RedHat about working through the issue over
> > here with them.  They can help me get access to your box to help debug.
> 
> Russ is right down the hall.

Great!

Cheers,
Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 19:37         ` Don Zickus
@ 2011-03-21 20:37           ` Jack Steiner
  2011-03-22 17:11           ` Jack Steiner
  1 sibling, 0 replies; 38+ messages in thread
From: Jack Steiner @ 2011-03-21 20:37 UTC (permalink / raw)
  To: Don Zickus
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Mon, Mar 21, 2011 at 03:37:40PM -0400, Don Zickus wrote:
> On Mon, Mar 21, 2011 at 01:22:35PM -0500, Jack Steiner wrote:
> > On Mon, Mar 21, 2011 at 01:51:10PM -0400, Don Zickus wrote:
> > > On Mon, Mar 21, 2011 at 07:26:51PM +0300, Cyrill Gorcunov wrote:
> > > > On 03/21/2011 07:14 PM, Ingo Molnar wrote:
> > > > > 
> > > > > * Jack Steiner <steiner@sgi.com> wrote:
> > > > > 
> > > > >> This fixes a problem seen on UV systems handling NMIs from the node controller.
> > > > >> The original code used the DIE notifier as the hook to get to the UV NMI
> > > > >> handler. This does not work if performance counters are active - the hw_perf
> > > > >> code consumes the NMI and the UV handler is not called.
> > > 
> > > Well that is a bug in the perf code.  We have been dealing with 'perf'
> > > swallowing NMIs for a couple of releases now.  I think we got rid of most
> > > of the cases (p4 and acme's core2 quad are the only cases I know that are
> > > still an issue).
> > > 
> > > I would much prefer to investigate the reason why this is happening
> > > because the perf nmi handler is supposed to check the global interrupt bit
> > > to determine if the perf counters caused the nmi or not otherwise fall
> > > through to other handler like SGI's nmi button in this case.
> > 
> > The patch that I posted is based on a RHEL6.1 patch that I'm running internally.
> > Unless something has very recently changed in the RH sources, the perf
> > NMI handler unconditionally returns NOTIFY_STOP if it handles an NMI.
> > If no NMI was handled, it returns NOTIFY_DONE. This sometimes works
> > and allows the platform generated NMI to be processed but if both NMI
> > sources trigger at about he same time, the lower priority event
> > will be lost.
> 
> Not necessarily, if both are triggered, you should still get _two_ NMIs.
> It may get processed in the wrong order but it should still get correctly
> processed.


Let me do some more testing with the UV NMI priority set higher than the hw_perf
priority. When I tried this earlier, I thought I saw problems but I'm
not certain that it was not caused by a different error.


> 
> > 
> > The root cause of the problem is that architecturally, x86 does not
> > have a way to identifies the source(s) that cause an NMI. If multiple
> > events occur at about the same time, there is no way that I can see that the
> > OS can detect it.
> 
> There are registers we can check to see who owns trigger the NMI (at least
> for the perf code, the SGI code maybe not, which is why I set it to a
> lower priority to be a catch-all).
> 
> I'm not aware of the x86 architecture dropping NMIs, so they should all
> get processed.  It is just a matter of which subsystems get determine if
> they are the source of the NMI or not.
> 
> > 
> > > 
> > > My first impression is the skip nmi logic in the perf handler is probably
> > > accidentally thinking the SGI external nmi is the perf's 'extra' nmi it is
> > > supposed to skip and thus swallows it.  At least that is the impression I
> > 
> > Agree
> > 
> > 
> > > get from the RedHat bugzilla which says SGI is running 'perf top', getting
> > > a hang, then pressing their nmi button to see the stack traces.
> > > 
> > > Jack,
> > > 
> > > I worked through a number of these issues upstream and I already talked to
> > > George and Russ over here at RedHat about working through the issue over
> > > here with them.  They can help me get access to your box to help debug.
> > 
> > Russ is right down the hall.
> 
> Great!
> 
> Cheers,
> Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-21 19:37         ` Don Zickus
  2011-03-21 20:37           ` Jack Steiner
@ 2011-03-22 17:11           ` Jack Steiner
  2011-03-22 18:44             ` Don Zickus
  1 sibling, 1 reply; 38+ messages in thread
From: Jack Steiner @ 2011-03-22 17:11 UTC (permalink / raw)
  To: Don Zickus
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Mon, Mar 21, 2011 at 03:37:40PM -0400, Don Zickus wrote:
> On Mon, Mar 21, 2011 at 01:22:35PM -0500, Jack Steiner wrote:
> > On Mon, Mar 21, 2011 at 01:51:10PM -0400, Don Zickus wrote:
> > > On Mon, Mar 21, 2011 at 07:26:51PM +0300, Cyrill Gorcunov wrote:
> > > > On 03/21/2011 07:14 PM, Ingo Molnar wrote:
> > > > > 
> > > > > * Jack Steiner <steiner@sgi.com> wrote:
> > > > > 
> > > > >> This fixes a problem seen on UV systems handling NMIs from the node controller.
> > > > >> The original code used the DIE notifier as the hook to get to the UV NMI
> > > > >> handler. This does not work if performance counters are active - the hw_perf
> > > > >> code consumes the NMI and the UV handler is not called.
> > > 
> > > Well that is a bug in the perf code.  We have been dealing with 'perf'
> > > swallowing NMIs for a couple of releases now.  I think we got rid of most
> > > of the cases (p4 and acme's core2 quad are the only cases I know that are
> > > still an issue).
> > > 
> > > I would much prefer to investigate the reason why this is happening
> > > because the perf nmi handler is supposed to check the global interrupt bit
> > > to determine if the perf counters caused the nmi or not otherwise fall
> > > through to other handler like SGI's nmi button in this case.
> > 
> > The patch that I posted is based on a RHEL6.1 patch that I'm running internally.
> > Unless something has very recently changed in the RH sources, the perf
> > NMI handler unconditionally returns NOTIFY_STOP if it handles an NMI.
> > If no NMI was handled, it returns NOTIFY_DONE. This sometimes works
> > and allows the platform generated NMI to be processed but if both NMI
> > sources trigger at about he same time, the lower priority event
> > will be lost.
> 
> Not necessarily, if both are triggered, you should still get _two_ NMIs.
> It may get processed in the wrong order but it should still get correctly
> processed.

How certain are you that multiple NMIs triggered at about the same time will
deliver discrete NMI events? I updated the patch so that I'm running with:

	- no special code in traps.c (I removed the traps.c code that was
	  in the patch I posted)
	- used die_notifier for calling the UV nmi handler
	- UV priority is higher than the hw_perf priority

Both hw_perf (perf top) & UV NMIs work correctly under light loads. However, if I
run for 10 - 15 minutes injecting UV NMIs at a rate of about 30/min, "perf top"
stops generating output. Strace shows that it continues to poll() but no data
is received.

While "perf top" is hung, if I inject an NMI into the system in a way that will NOT
be consumed by the UV nmi handler, "perf top" resumes output but will stop again after
a few minutes.


AFAICT, the UV nmi handler is not consuming extra NMI interrupts. I can't
rule out that I'm missing something but I don't see it.


Do you have any ideas or clues???


> 
> > 
> > The root cause of the problem is that architecturally, x86 does not
> > have a way to identifies the source(s) that cause an NMI. If multiple
> > events occur at about the same time, there is no way that I can see that the
> > OS can detect it.
> 
> There are registers we can check to see who owns trigger the NMI (at least
> for the perf code, the SGI code maybe not, which is why I set it to a
> lower priority to be a catch-all).
> 
> I'm not aware of the x86 architecture dropping NMIs, so they should all
> get processed.  It is just a matter of which subsystems get determine if
> they are the source of the NMI or not.
> 
> > 
> > > 
> > > My first impression is the skip nmi logic in the perf handler is probably
> > > accidentally thinking the SGI external nmi is the perf's 'extra' nmi it is
> > > supposed to skip and thus swallows it.  At least that is the impression I
> > 
> > Agree
> > 
> > 
> > > get from the RedHat bugzilla which says SGI is running 'perf top', getting
> > > a hang, then pressing their nmi button to see the stack traces.
> > > 
> > > Jack,
> > > 
> > > I worked through a number of these issues upstream and I already talked to
> > > George and Russ over here at RedHat about working through the issue over
> > > here with them.  They can help me get access to your box to help debug.
> > 
> > Russ is right down the hall.
> 
> Great!
> 
> Cheers,
> Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-22 17:11           ` Jack Steiner
@ 2011-03-22 18:44             ` Don Zickus
  2011-03-22 20:02               ` Jack Steiner
  2011-03-22 21:25               ` Jack Steiner
  0 siblings, 2 replies; 38+ messages in thread
From: Don Zickus @ 2011-03-22 18:44 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Tue, Mar 22, 2011 at 12:11:18PM -0500, Jack Steiner wrote:
> How certain are you that multiple NMIs triggered at about the same time will
> deliver discrete NMI events? I updated the patch so that I'm running with:

I think as long as there isn't more than two (1 active, 1 latched), you
would be ok.  A third one looks like it would get dropped.

> 
> 	- no special code in traps.c (I removed the traps.c code that was
> 	  in the patch I posted)
> 	- used die_notifier for calling the UV nmi handler
> 	- UV priority is higher than the hw_perf priority
> 
> Both hw_perf (perf top) & UV NMIs work correctly under light loads. However, if I
> run for 10 - 15 minutes injecting UV NMIs at a rate of about 30/min, "perf top"
> stops generating output. Strace shows that it continues to poll() but no data
> is received.

That's a low frequency and it still gets stuck?

> 
> While "perf top" is hung, if I inject an NMI into the system in a way that will NOT
> be consumed by the UV nmi handler, "perf top" resumes output but will stop again after
> a few minutes.

So that means the PMU set its interrupt bit but the cpu failed to get the
NMI.

> 
> 
> AFAICT, the UV nmi handler is not consuming extra NMI interrupts. I can't
> rule out that I'm missing something but I don't see it.

What happens if you put the UV nmi handler below the hw_perf handler in
priority?  I assume the DIE_NMIUNKNOWN snippet in the hw_perf handler will
swallow some of the UV NMIs, but more importantly does it still generate
the hang you see?

> 
> 
> Do you have any ideas or clues???

Part of the problem is most of the NMI testing is done with perf and maybe
kgdb.  So high frequency NMI sharing is probably exposing more bugs.

Also is it a problem to move your testing on to the latest upstream code
instead of RHEL-6?  Not all the latest NMI work is there.  I want to make
sure we are all starting at the same code. :-)

Cheers,
Don

> 
> 
> > 
> > > 
> > > The root cause of the problem is that architecturally, x86 does not
> > > have a way to identifies the source(s) that cause an NMI. If multiple
> > > events occur at about the same time, there is no way that I can see that the
> > > OS can detect it.
> > 
> > There are registers we can check to see who owns trigger the NMI (at least
> > for the perf code, the SGI code maybe not, which is why I set it to a
> > lower priority to be a catch-all).
> > 
> > I'm not aware of the x86 architecture dropping NMIs, so they should all
> > get processed.  It is just a matter of which subsystems get determine if
> > they are the source of the NMI or not.
> > 
> > > 
> > > > 
> > > > My first impression is the skip nmi logic in the perf handler is probably
> > > > accidentally thinking the SGI external nmi is the perf's 'extra' nmi it is
> > > > supposed to skip and thus swallows it.  At least that is the impression I
> > > 
> > > Agree
> > > 
> > > 
> > > > get from the RedHat bugzilla which says SGI is running 'perf top', getting
> > > > a hang, then pressing their nmi button to see the stack traces.
> > > > 
> > > > Jack,
> > > > 
> > > > I worked through a number of these issues upstream and I already talked to
> > > > George and Russ over here at RedHat about working through the issue over
> > > > here with them.  They can help me get access to your box to help debug.
> > > 
> > > Russ is right down the hall.
> > 
> > Great!
> > 
> > Cheers,
> > Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-22 18:44             ` Don Zickus
@ 2011-03-22 20:02               ` Jack Steiner
  2011-03-22 21:25               ` Jack Steiner
  1 sibling, 0 replies; 38+ messages in thread
From: Jack Steiner @ 2011-03-22 20:02 UTC (permalink / raw)
  To: Don Zickus
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Tue, Mar 22, 2011 at 02:44:50PM -0400, Don Zickus wrote:
> On Tue, Mar 22, 2011 at 12:11:18PM -0500, Jack Steiner wrote:
> > How certain are you that multiple NMIs triggered at about the same time will
> > deliver discrete NMI events? I updated the patch so that I'm running with:
> 
> I think as long as there isn't more than two (1 active, 1 latched), you
> would be ok.  A third one looks like it would get dropped.

Hmmm. Although extremely unlikely, would that mean that a problem exists
if there are 3 NMI sources: ie., kdb/kgdb, hw_perf & UV.


> 
> > 
> > 	- no special code in traps.c (I removed the traps.c code that was
> > 	  in the patch I posted)
> > 	- used die_notifier for calling the UV nmi handler
> > 	- UV priority is higher than the hw_perf priority
> > 
> > Both hw_perf (perf top) & UV NMIs work correctly under light loads. However, if I
> > run for 10 - 15 minutes injecting UV NMIs at a rate of about 30/min, "perf top"
> > stops generating output. Strace shows that it continues to poll() but no data
> > is received.
> 
> That's a low frequency and it still gets stuck?

Yes. Usually take about a minute.

The current NMI mechanism from our node controller limits the NMI
rate to about 1 every 2 sec for the current config that I'm running on.


> 
> > 
> > While "perf top" is hung, if I inject an NMI into the system in a way that will NOT
> > be consumed by the UV nmi handler, "perf top" resumes output but will stop again after
> > a few minutes.
> 
> So that means the PMU set its interrupt bit but the cpu failed to get the
> NMI.

That is what it looks like.


> 
> > 
> > 
> > AFAICT, the UV nmi handler is not consuming extra NMI interrupts. I can't
> > rule out that I'm missing something but I don't see it.
> 
> What happens if you put the UV nmi handler below the hw_perf handler in
> priority?  I assume the DIE_NMIUNKNOWN snippet in the hw_perf handler will
> swallow some of the UV NMIs, but more importantly does it still generate
> the hang you see?

I'll try that although it may be tomorrow AM before I get a chance.


> 
> > 
> > 
> > Do you have any ideas or clues???
> 
> Part of the problem is most of the NMI testing is done with perf and maybe
> kgdb.  So high frequency NMI sharing is probably exposing more bugs.
> 
> Also is it a problem to move your testing on to the latest upstream code
> instead of RHEL-6?  Not all the latest NMI work is there.  I want to make
> sure we are all starting at the same code. :-)

Sure.

--- jack

> 
> Cheers,
> Don
> 
> > 
> > 
> > > 
> > > > 
> > > > The root cause of the problem is that architecturally, x86 does not
> > > > have a way to identifies the source(s) that cause an NMI. If multiple
> > > > events occur at about the same time, there is no way that I can see that the
> > > > OS can detect it.
> > > 
> > > There are registers we can check to see who owns trigger the NMI (at least
> > > for the perf code, the SGI code maybe not, which is why I set it to a
> > > lower priority to be a catch-all).
> > > 
> > > I'm not aware of the x86 architecture dropping NMIs, so they should all
> > > get processed.  It is just a matter of which subsystems get determine if
> > > they are the source of the NMI or not.
> > > 
> > > > 
> > > > > 
> > > > > My first impression is the skip nmi logic in the perf handler is probably
> > > > > accidentally thinking the SGI external nmi is the perf's 'extra' nmi it is
> > > > > supposed to skip and thus swallows it.  At least that is the impression I
> > > > 
> > > > Agree
> > > > 
> > > > 
> > > > > get from the RedHat bugzilla which says SGI is running 'perf top', getting
> > > > > a hang, then pressing their nmi button to see the stack traces.
> > > > > 
> > > > > Jack,
> > > > > 
> > > > > I worked through a number of these issues upstream and I already talked to
> > > > > George and Russ over here at RedHat about working through the issue over
> > > > > here with them.  They can help me get access to your box to help debug.
> > > > 
> > > > Russ is right down the hall.
> > > 
> > > Great!
> > > 
> > > Cheers,
> > > Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-22 18:44             ` Don Zickus
  2011-03-22 20:02               ` Jack Steiner
@ 2011-03-22 21:25               ` Jack Steiner
  2011-03-22 22:02                 ` Cyrill Gorcunov
  2011-03-22 22:05                 ` Don Zickus
  1 sibling, 2 replies; 38+ messages in thread
From: Jack Steiner @ 2011-03-22 21:25 UTC (permalink / raw)
  To: Don Zickus
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Tue, Mar 22, 2011 at 02:44:50PM -0400, Don Zickus wrote:
> On Tue, Mar 22, 2011 at 12:11:18PM -0500, Jack Steiner wrote:
> > How certain are you that multiple NMIs triggered at about the same time will
> > deliver discrete NMI events? I updated the patch so that I'm running with:
> 
> I think as long as there isn't more than two (1 active, 1 latched), you
> would be ok.  A third one looks like it would get dropped.
> 
> > 
> > 	- no special code in traps.c (I removed the traps.c code that was
> > 	  in the patch I posted)
> > 	- used die_notifier for calling the UV nmi handler
> > 	- UV priority is higher than the hw_perf priority
> > 
> > Both hw_perf (perf top) & UV NMIs work correctly under light loads. However, if I
> > run for 10 - 15 minutes injecting UV NMIs at a rate of about 30/min, "perf top"
> > stops generating output. Strace shows that it continues to poll() but no data
> > is received.
> 
> That's a low frequency and it still gets stuck?
> 
> > 
> > While "perf top" is hung, if I inject an NMI into the system in a way that will NOT
> > be consumed by the UV nmi handler, "perf top" resumes output but will stop again after
> > a few minutes.
> 
> So that means the PMU set its interrupt bit but the cpu failed to get the
> NMI.
> 
> > 
> > 
> > AFAICT, the UV nmi handler is not consuming extra NMI interrupts. I can't
> > rule out that I'm missing something but I don't see it.
> 
> What happens if you put the UV nmi handler below the hw_perf handler in
> priority?  I assume the DIE_NMIUNKNOWN snippet in the hw_perf handler will
> swallow some of the UV NMIs, but more importantly does it still generate
> the hang you see?

I verified that the failures ("perf top" stops) are the same on both RHEL6.1 & the
latest x86 2.6.38+ tree.

I switched priorities & as expected, "perf top" no longer hangs. I see an occassional
missed UV NMI - about 1 every minute. I also see a few "dazed" messages as
well - 3 in a 5 minute period. This testing was done on a 2.6.38+ kernel.

I'm running on a 48p system.

Ideas?

> 
> > 
> > 
> > Do you have any ideas or clues???
> 
> Part of the problem is most of the NMI testing is done with perf and maybe
> kgdb.  So high frequency NMI sharing is probably exposing more bugs.
> 
> Also is it a problem to move your testing on to the latest upstream code
> instead of RHEL-6?  Not all the latest NMI work is there.  I want to make
> sure we are all starting at the same code. :-)
> 
> Cheers,
> Don
> 
> > 
> > 
> > > 
> > > > 
> > > > The root cause of the problem is that architecturally, x86 does not
> > > > have a way to identifies the source(s) that cause an NMI. If multiple
> > > > events occur at about the same time, there is no way that I can see that the
> > > > OS can detect it.
> > > 
> > > There are registers we can check to see who owns trigger the NMI (at least
> > > for the perf code, the SGI code maybe not, which is why I set it to a
> > > lower priority to be a catch-all).
> > > 
> > > I'm not aware of the x86 architecture dropping NMIs, so they should all
> > > get processed.  It is just a matter of which subsystems get determine if
> > > they are the source of the NMI or not.
> > > 
> > > > 
> > > > > 
> > > > > My first impression is the skip nmi logic in the perf handler is probably
> > > > > accidentally thinking the SGI external nmi is the perf's 'extra' nmi it is
> > > > > supposed to skip and thus swallows it.  At least that is the impression I
> > > > 
> > > > Agree
> > > > 
> > > > 
> > > > > get from the RedHat bugzilla which says SGI is running 'perf top', getting
> > > > > a hang, then pressing their nmi button to see the stack traces.
> > > > > 
> > > > > Jack,
> > > > > 
> > > > > I worked through a number of these issues upstream and I already talked to
> > > > > George and Russ over here at RedHat about working through the issue over
> > > > > here with them.  They can help me get access to your box to help debug.
> > > > 
> > > > Russ is right down the hall.
> > > 
> > > Great!
> > > 
> > > Cheers,
> > > Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-22 21:25               ` Jack Steiner
@ 2011-03-22 22:02                 ` Cyrill Gorcunov
  2011-03-23 13:36                   ` Jack Steiner
  2011-03-22 22:05                 ` Don Zickus
  1 sibling, 1 reply; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-22 22:02 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Don Zickus, Ingo Molnar, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On 03/23/2011 12:25 AM, Jack Steiner wrote:
> On Tue, Mar 22, 2011 at 02:44:50PM -0400, Don Zickus wrote:
>> On Tue, Mar 22, 2011 at 12:11:18PM -0500, Jack Steiner wrote:
>>> How certain are you that multiple NMIs triggered at about the same time will
>>> deliver discrete NMI events? I updated the patch so that I'm running with:
>>
>> I think as long as there isn't more than two (1 active, 1 latched), you
>> would be ok.  A third one looks like it would get dropped.
>>
>>>
>>> 	- no special code in traps.c (I removed the traps.c code that was
>>> 	  in the patch I posted)
>>> 	- used die_notifier for calling the UV nmi handler
>>> 	- UV priority is higher than the hw_perf priority
>>>
>>> Both hw_perf (perf top) & UV NMIs work correctly under light loads. However, if I
>>> run for 10 - 15 minutes injecting UV NMIs at a rate of about 30/min, "perf top"
>>> stops generating output. Strace shows that it continues to poll() but no data
>>> is received.
>>
>> That's a low frequency and it still gets stuck?
>>
>>>
>>> While "perf top" is hung, if I inject an NMI into the system in a way that will NOT
>>> be consumed by the UV nmi handler, "perf top" resumes output but will stop again after
>>> a few minutes.
>>
>> So that means the PMU set its interrupt bit but the cpu failed to get the
>> NMI.
>>
>>>
>>>
>>> AFAICT, the UV nmi handler is not consuming extra NMI interrupts. I can't
>>> rule out that I'm missing something but I don't see it.
>>
>> What happens if you put the UV nmi handler below the hw_perf handler in
>> priority?  I assume the DIE_NMIUNKNOWN snippet in the hw_perf handler will
>> swallow some of the UV NMIs, but more importantly does it still generate
>> the hang you see?
> 
> I verified that the failures ("perf top" stops) are the same on both RHEL6.1 & the
> latest x86 2.6.38+ tree.
> 
> I switched priorities & as expected, "perf top" no longer hangs. I see an occassional
> missed UV NMI - about 1 every minute. I also see a few "dazed" messages as
> well - 3 in a 5 minute period. This testing was done on a 2.6.38+ kernel.
> 
> I'm running on a 48p system.
> 
> Ideas?
> 

  I fear there is always a probability for eaten nmi (due to inflight nmi logic
we have) or missed nmi (due to non-instant deliery of nmi).  Say the following
scenario may happen:

1) perf-nmi-0 (from counter 0) issued
2) uv-nmi issued
3) perf-nmi-0 latched
4) perf-nmi-1 (from counter 1) not yet issued but couter overflowed
5) nmi-handler
6) uv-nmi-latched
7) nmi-handler eats both nmis from perf-nmi-0 and uv-nmi because of in-flight
   nmi logic we have
8) finally perf-nmi-1 should appear on line but counter already pulled down so
   no nmi

and here you get missed nmi you expect from uv. I *guess*, not sure if it's possible.
If you disable nmi-watchdog on boot line, does it help?
-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-22 21:25               ` Jack Steiner
  2011-03-22 22:02                 ` Cyrill Gorcunov
@ 2011-03-22 22:05                 ` Don Zickus
  2011-03-23 16:32                   ` Jack Steiner
  1 sibling, 1 reply; 38+ messages in thread
From: Don Zickus @ 2011-03-22 22:05 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Tue, Mar 22, 2011 at 04:25:19PM -0500, Jack Steiner wrote:
> > > AFAICT, the UV nmi handler is not consuming extra NMI interrupts. I can't
> > > rule out that I'm missing something but I don't see it.
> > 
> > What happens if you put the UV nmi handler below the hw_perf handler in
> > priority?  I assume the DIE_NMIUNKNOWN snippet in the hw_perf handler will
> > swallow some of the UV NMIs, but more importantly does it still generate
> > the hang you see?
> 
> I verified that the failures ("perf top" stops) are the same on both RHEL6.1 & the
> latest x86 2.6.38+ tree.

Thanks for testing that.

> 
> I switched priorities & as expected, "perf top" no longer hangs. I see an occassional
> missed UV NMI - about 1 every minute. I also see a few "dazed" messages as
> well - 3 in a 5 minute period. This testing was done on a 2.6.38+ kernel.
> 
> I'm running on a 48p system.
> 
> Ideas?

Wow, interesting.

The first thing is in 'uv_handle_nmi' can you change that from
DIE_NMIUNKNOWN back to DIE_NMI.  Originally I set it to DIE_NMIUNKNOWN
because I didn't think you guys had the ability to determine if your BMC
generated the NMI or not.  Recently George B. said you guys add a register
bit to determine this, so I am wondering if by promoting this would fix
the missed UV NMI.  I am speculating this is being swallowed by the
hw_perf DIE_NMIUNKNOWN exception path.

Second the "dazed" messages are being seen on other machines (currently
core2quads) when using perf with lots of NMI events.  So you might be
seeing a second more common issue there.  I still need to find time to
debug that.

Finally, I am trying to scratch my head about the 'perf top' no longer
hangs part.  The only thing I can think of is under high perf load (with
out extra NMIs by your BMC), we have seen extra NMIs get generated while
processing the current NMI (mainly because Nehalems have I think 4 or 8
PMUs that can be activate at once, so multiple NMIs can trigger here).
But we can recover from this because we check _all_ the PMIs during the
NMI (which currently always comes from the PMU).

Now this extra NMI from the PMU can also happen on a singlely activated
PMU because we reload the PMU, then check the events to see if we should
disable it.  By the time we finish checking (and determine we are not done
yet), the event could have rolled over and generated another NMI before we
have finished processing the current one.

So throw in an external NMI into the above situation (which gets dropped
as the third NMI I believe if I read the history of these NMI things
correctly), then it is possible that if uv_handle_nmi is called first it
could swallow the extra NMI as its own and leave the hw_perf hanging.
(that's a mouthful, huh?)

Then again with the priorities switched I guess the opposite is true too,
that your BMC is left missing an event.

This sort of supports the need for your patch earlier or something similar
which says ignore the handler's return code and process all the events on
the die_chain anyway.  And if noone has handled the NMI, then trigger an
unknown NMI.

Unless there is a way to determine if an NMI is latched or not before
issuing the iret and if so assumed we dropped an NMI and process everyone.

I'll need to think of a way to prove all this in the morning (or maybe
later).

I hope that makes some sense as it is late and my brain is shutting down.

Cheers,
Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-22 22:02                 ` Cyrill Gorcunov
@ 2011-03-23 13:36                   ` Jack Steiner
  0 siblings, 0 replies; 38+ messages in thread
From: Jack Steiner @ 2011-03-23 13:36 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Don Zickus, Ingo Molnar, tglx, hpa, x86, linux-kernel, Peter Zijlstra

On Wed, Mar 23, 2011 at 01:02:59AM +0300, Cyrill Gorcunov wrote:
> On 03/23/2011 12:25 AM, Jack Steiner wrote:
> > On Tue, Mar 22, 2011 at 02:44:50PM -0400, Don Zickus wrote:
> >> On Tue, Mar 22, 2011 at 12:11:18PM -0500, Jack Steiner wrote:
> >>> How certain are you that multiple NMIs triggered at about the same time will
> >>> deliver discrete NMI events? I updated the patch so that I'm running with:
> >>
> >> I think as long as there isn't more than two (1 active, 1 latched), you
> >> would be ok.  A third one looks like it would get dropped.
> >>
> >>>
> >>> 	- no special code in traps.c (I removed the traps.c code that was
> >>> 	  in the patch I posted)
> >>> 	- used die_notifier for calling the UV nmi handler
> >>> 	- UV priority is higher than the hw_perf priority
> >>>
> >>> Both hw_perf (perf top) & UV NMIs work correctly under light loads. However, if I
> >>> run for 10 - 15 minutes injecting UV NMIs at a rate of about 30/min, "perf top"
> >>> stops generating output. Strace shows that it continues to poll() but no data
> >>> is received.
> >>
> >> That's a low frequency and it still gets stuck?
> >>
> >>>
> >>> While "perf top" is hung, if I inject an NMI into the system in a way that will NOT
> >>> be consumed by the UV nmi handler, "perf top" resumes output but will stop again after
> >>> a few minutes.
> >>
> >> So that means the PMU set its interrupt bit but the cpu failed to get the
> >> NMI.
> >>
> >>>
> >>>
> >>> AFAICT, the UV nmi handler is not consuming extra NMI interrupts. I can't
> >>> rule out that I'm missing something but I don't see it.
> >>
> >> What happens if you put the UV nmi handler below the hw_perf handler in
> >> priority?  I assume the DIE_NMIUNKNOWN snippet in the hw_perf handler will
> >> swallow some of the UV NMIs, but more importantly does it still generate
> >> the hang you see?
> > 
> > I verified that the failures ("perf top" stops) are the same on both RHEL6.1 & the
> > latest x86 2.6.38+ tree.
> > 
> > I switched priorities & as expected, "perf top" no longer hangs. I see an occassional
> > missed UV NMI - about 1 every minute. I also see a few "dazed" messages as
> > well - 3 in a 5 minute period. This testing was done on a 2.6.38+ kernel.
> > 
> > I'm running on a 48p system.
> > 
> > Ideas?
> > 
> 
>   I fear there is always a probability for eaten nmi (due to inflight nmi logic
> we have) or missed nmi (due to non-instant deliery of nmi).  Say the following
> scenario may happen:
> 
> 1) perf-nmi-0 (from counter 0) issued
> 2) uv-nmi issued
> 3) perf-nmi-0 latched
> 4) perf-nmi-1 (from counter 1) not yet issued but couter overflowed
> 5) nmi-handler
> 6) uv-nmi-latched
> 7) nmi-handler eats both nmis from perf-nmi-0 and uv-nmi because of in-flight
>    nmi logic we have
> 8) finally perf-nmi-1 should appear on line but counter already pulled down so
>    no nmi
> 
> and here you get missed nmi you expect from uv. I *guess*, not sure if it's possible.

Makes sense.


> If you disable nmi-watchdog on boot line, does it help?

Nmi_watchdog is disabled by default on our platforms.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-22 22:05                 ` Don Zickus
@ 2011-03-23 16:32                   ` Jack Steiner
  2011-03-23 17:53                     ` Don Zickus
  0 siblings, 1 reply; 38+ messages in thread
From: Jack Steiner @ 2011-03-23 16:32 UTC (permalink / raw)
  To: Don Zickus
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Tue, Mar 22, 2011 at 06:05:05PM -0400, Don Zickus wrote:
> On Tue, Mar 22, 2011 at 04:25:19PM -0500, Jack Steiner wrote:
> > > > AFAICT, the UV nmi handler is not consuming extra NMI interrupts. I can't
> > > > rule out that I'm missing something but I don't see it.
> > > 
> > > What happens if you put the UV nmi handler below the hw_perf handler in
> > > priority?  I assume the DIE_NMIUNKNOWN snippet in the hw_perf handler will
> > > swallow some of the UV NMIs, but more importantly does it still generate
> > > the hang you see?
> > 
> > I verified that the failures ("perf top" stops) are the same on both RHEL6.1 & the
> > latest x86 2.6.38+ tree.
> 
> Thanks for testing that.
> 
> > 
> > I switched priorities & as expected, "perf top" no longer hangs. I see an occassional
> > missed UV NMI - about 1 every minute. I also see a few "dazed" messages as
> > well - 3 in a 5 minute period. This testing was done on a 2.6.38+ kernel.
> > 
> > I'm running on a 48p system.
> > 
> > Ideas?
> 
> Wow, interesting.
> 
> The first thing is in 'uv_handle_nmi' can you change that from
> DIE_NMIUNKNOWN back to DIE_NMI.  Originally I set it to DIE_NMIUNKNOWN
> because I didn't think you guys had the ability to determine if your BMC
> generated the NMI or not.  Recently George B. said you guys add a register
> bit to determine this, so I am wondering if by promoting this would fix
> the missed UV NMI.  I am speculating this is being swallowed by the
> hw_perf DIE_NMIUNKNOWN exception path.

Correct. I recently added a register that indicates the BMC sent an NMI.

Hmmm. Looks like I have been running with DIE_NMI. I think that came
from porting the patch from RHEL6 to upstream.

However, neither DIE_NMIUNKNOWN  or DIE_NMI gives the desired behavior (2.6.38+).

	- Using DIE_NMIUNKNOWN, I see many more "dazed" messages but no
	  perf top lockup. I see ~3 "dazed" messages per minute. UV NMIs are
	  being sent at a rate of 30/min, ie. ~10% failure rate.

	- Using DIE_NMI, no "dazed" messages but perf top hangs about once a
	  minute (rough estimate).


I wonder if we need a different approach to handling NMIs. Instead of using
the die_notifier list, introduce a new notifier list reserved exclusively
for NMIs. When an NMI occurs, all registered functions are unconditionally called.
If any function accepts the NMI, the remaining functions are still called but
the NMI is considered to have been valid (handled) & the "dazed" message
is suppressed.

This is more-or-less functionally equivalent to the last patch I posted but
may be cleaner. At a minimum, it is easier to understand the interactions
between the various handlers.



> 
> Second the "dazed" messages are being seen on other machines (currently
> core2quads) when using perf with lots of NMI events.  So you might be
> seeing a second more common issue there.  I still need to find time to
> debug that.
> 
> Finally, I am trying to scratch my head about the 'perf top' no longer
> hangs part.  The only thing I can think of is under high perf load (with
> out extra NMIs by your BMC), we have seen extra NMIs get generated while
> processing the current NMI (mainly because Nehalems have I think 4 or 8
> PMUs that can be activate at once, so multiple NMIs can trigger here).
> But we can recover from this because we check _all_ the PMIs during the
> NMI (which currently always comes from the PMU).
> 
> Now this extra NMI from the PMU can also happen on a singlely activated
> PMU because we reload the PMU, then check the events to see if we should
> disable it.  By the time we finish checking (and determine we are not done
> yet), the event could have rolled over and generated another NMI before we
> have finished processing the current one.
> 
> So throw in an external NMI into the above situation (which gets dropped
> as the third NMI I believe if I read the history of these NMI things
> correctly), then it is possible that if uv_handle_nmi is called first it
> could swallow the extra NMI as its own and leave the hw_perf hanging.
> (that's a mouthful, huh?)
> 
> Then again with the priorities switched I guess the opposite is true too,
> that your BMC is left missing an event.
> 
> This sort of supports the need for your patch earlier or something similar
> which says ignore the handler's return code and process all the events on
> the die_chain anyway.  And if noone has handled the NMI, then trigger an
> unknown NMI.
> 
> Unless there is a way to determine if an NMI is latched or not before
> issuing the iret and if so assumed we dropped an NMI and process everyone.
> 
> I'll need to think of a way to prove all this in the morning (or maybe
> later).
> 
> I hope that makes some sense as it is late and my brain is shutting down.
> 
> Cheers,
> Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-23 16:32                   ` Jack Steiner
@ 2011-03-23 17:53                     ` Don Zickus
  2011-03-23 20:00                       ` Don Zickus
  0 siblings, 1 reply; 38+ messages in thread
From: Don Zickus @ 2011-03-23 17:53 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Wed, Mar 23, 2011 at 11:32:55AM -0500, Jack Steiner wrote:
> > The first thing is in 'uv_handle_nmi' can you change that from
> > DIE_NMIUNKNOWN back to DIE_NMI.  Originally I set it to DIE_NMIUNKNOWN
> > because I didn't think you guys had the ability to determine if your BMC
> > generated the NMI or not.  Recently George B. said you guys add a register
> > bit to determine this, so I am wondering if by promoting this would fix
> > the missed UV NMI.  I am speculating this is being swallowed by the
> > hw_perf DIE_NMIUNKNOWN exception path.
> 
> Correct. I recently added a register that indicates the BMC sent an NMI.
> 
> Hmmm. Looks like I have been running with DIE_NMI. I think that came
> from porting the patch from RHEL6 to upstream.
> 
> However, neither DIE_NMIUNKNOWN  or DIE_NMI gives the desired behavior (2.6.38+).
> 
> 	- Using DIE_NMIUNKNOWN, I see many more "dazed" messages but no
> 	  perf top lockup. I see ~3 "dazed" messages per minute. UV NMIs are
> 	  being sent at a rate of 30/min, ie. ~10% failure rate.
> 
> 	- Using DIE_NMI, no "dazed" messages but perf top hangs about once a
> 	  minute (rough estimate).
> 
> 
> I wonder if we need a different approach to handling NMIs. Instead of using
> the die_notifier list, introduce a new notifier list reserved exclusively
> for NMIs. When an NMI occurs, all registered functions are unconditionally called.
> If any function accepts the NMI, the remaining functions are still called but
> the NMI is considered to have been valid (handled) & the "dazed" message
> is suppressed.
> 
> This is more-or-less functionally equivalent to the last patch I posted but
> may be cleaner. At a minimum, it is easier to understand the interactions
> between the various handlers.

This is the same approach I was realizing last night when I went to bed.
I think the more concurrent NMIs we have, the more tricky things get.  

I hacked up an ugly patch that might fix the 'dazed' message you are
seeing.  The original skip logic assumed the back-to-back nmis would stop
after 3 nmis.  Under load, those nmis could go on forever if the time it
takes to handle the nmi matches the period in which the nmi is being
generated (I assume all the stack dumping from the BMC nmi probably
lengthens the time it takes to handle the nmi?).

For example,  the first NMI might notice two perf counters triggered.  But
it doesn't know if it triggered under one or two NMIs, so it marks the
next NMI as a possible candidate to 'swallow' if no one claims it.

Once it is finished, it notices the next nmi came from perf too (reading
the status register).  Again we don't know if this is from the second NMI
that we have not 'swallowed' yet or from the third event (because the
second NMI was never generated).

Once that finishes, another nmi comes along.  The current code says that
one has to be the one we 'swallow' or if perf 'handles' it then assume
there are no 'extra' NMIs waiting to be swallowed.

This is where the problem is, as I have seen on my machine.  The
back-to-back nmis have gone up to 4 in-a-row before spitting out the extra
nmi the code was hoping to 'swallow'.

Let me know if the patch fixes that problem.  Then it will be one less
thing to worry about. :-)

Cheers,
Don


diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 19fbcad..f9dcd81 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1327,7 +1327,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 	if ((handled > 1) ||
 		/* the next nmi could be a back-to-back nmi */
 	    ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
-	     (__get_cpu_var(pmu_nmi).handled > 1))) {
+	     (__get_cpu_var(pmu_nmi).handled > 0) && handled && this_nmi)) {
 		/*
 		 * We could have two subsequent back-to-back nmis: The
 		 * first handles more than one counter, the 2nd
@@ -1338,6 +1338,8 @@ perf_event_nmi_handler(struct notifier_block *self,
 		 * handling more than one counter. We will mark the
 		 * next (3rd) and then drop it if unhandled.
 		 */
+		//if ((__get_cpu_var(pmu_nmi).handled == 1) && (handled == 1))
+		//	trace_printk("!! fixed?\n");
 		__get_cpu_var(pmu_nmi).marked	= this_nmi + 1;
 		__get_cpu_var(pmu_nmi).handled	= handled;
 	}

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-23 17:53                     ` Don Zickus
@ 2011-03-23 20:00                       ` Don Zickus
  2011-03-23 20:41                         ` Cyrill Gorcunov
                                           ` (2 more replies)
  0 siblings, 3 replies; 38+ messages in thread
From: Don Zickus @ 2011-03-23 20:00 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Wed, Mar 23, 2011 at 01:53:20PM -0400, Don Zickus wrote:
> Let me know if the patch fixes that problem.  Then it will be one less
> thing to worry about. :-)

Ok, I was an idiot and made the patch against RHEL-6.  Here is the one
against 2.6.38.  Sorry about that.

Cheers,
Don


diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 87eab4a..62ec8e9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1375,7 +1375,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 	if ((handled > 1) ||
 		/* the next nmi could be a back-to-back nmi */
 	    ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
-	     (__this_cpu_read(pmu_nmi.handled) > 1))) {
+	     (__this_cpu_read(pmu_nmi.handled) > 0) && handled && this_nmi)) {
 		/*
 		 * We could have two subsequent back-to-back nmis: The
 		 * first handles more than one counter, the 2nd
@@ -1386,6 +1386,8 @@ perf_event_nmi_handler(struct notifier_block *self,
 		 * handling more than one counter. We will mark the
 		 * next (3rd) and then drop it if unhandled.
 		 */
+		//if ((__this_cpu_read(pmu_nmi.handled) == 1) && (handled == 1))
+		//	trace_printk("!! fixed?\n");
 		__this_cpu_write(pmu_nmi.marked, this_nmi + 1);
 		__this_cpu_write(pmu_nmi.handled, handled);
 	}

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-23 20:00                       ` Don Zickus
@ 2011-03-23 20:41                         ` Cyrill Gorcunov
  2011-03-23 20:45                         ` Cyrill Gorcunov
  2011-03-23 20:46                         ` Jack Steiner
  2 siblings, 0 replies; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-23 20:41 UTC (permalink / raw)
  To: Don Zickus
  Cc: Jack Steiner, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra, Robert Richter

On 03/23/2011 11:00 PM, Don Zickus wrote:
> On Wed, Mar 23, 2011 at 01:53:20PM -0400, Don Zickus wrote:
>> Let me know if the patch fixes that problem.  Then it will be one less
>> thing to worry about. :-)
> 
> Ok, I was an idiot and made the patch against RHEL-6.  Here is the one
> against 2.6.38.  Sorry about that.
> 
> Cheers,
> Don
> 

Interesting. (CC'ing Robert)

-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-23 20:00                       ` Don Zickus
  2011-03-23 20:41                         ` Cyrill Gorcunov
@ 2011-03-23 20:45                         ` Cyrill Gorcunov
  2011-03-23 21:22                           ` Don Zickus
  2011-03-23 20:46                         ` Jack Steiner
  2 siblings, 1 reply; 38+ messages in thread
From: Cyrill Gorcunov @ 2011-03-23 20:45 UTC (permalink / raw)
  To: Don Zickus
  Cc: Jack Steiner, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra, Robert Richter

On 03/23/2011 11:00 PM, Don Zickus wrote:
> On Wed, Mar 23, 2011 at 01:53:20PM -0400, Don Zickus wrote:
>> Let me know if the patch fixes that problem.  Then it will be one less
>> thing to worry about. :-)
> 
> Ok, I was an idiot and made the patch against RHEL-6.  Here is the one
> against 2.6.38.  Sorry about that.
> 
> Cheers,
> Don
> 
> 
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 87eab4a..62ec8e9 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1375,7 +1375,7 @@ perf_event_nmi_handler(struct notifier_block *self,
>  	if ((handled > 1) ||
>  		/* the next nmi could be a back-to-back nmi */
>  	    ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
> -	     (__this_cpu_read(pmu_nmi.handled) > 1))) {
> +	     (__this_cpu_read(pmu_nmi.handled) > 0) && handled && this_nmi)) {

Don, why do you need to check for this_nmi here? it's zero for first nmi in a
system (right?), so I fail to get the reason for such check. What I miss?

>  		/*
>  		 * We could have two subsequent back-to-back nmis: The
>  		 * first handles more than one counter, the 2nd
> @@ -1386,6 +1386,8 @@ perf_event_nmi_handler(struct notifier_block *self,
>  		 * handling more than one counter. We will mark the
>  		 * next (3rd) and then drop it if unhandled.
>  		 */
> +		//if ((__this_cpu_read(pmu_nmi.handled) == 1) && (handled == 1))
> +		//	trace_printk("!! fixed?\n");
>  		__this_cpu_write(pmu_nmi.marked, this_nmi + 1);
>  		__this_cpu_write(pmu_nmi.handled, handled);
>  	}


-- 
    Cyrill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-23 20:00                       ` Don Zickus
  2011-03-23 20:41                         ` Cyrill Gorcunov
  2011-03-23 20:45                         ` Cyrill Gorcunov
@ 2011-03-23 20:46                         ` Jack Steiner
  2011-03-23 21:23                           ` Don Zickus
  2 siblings, 1 reply; 38+ messages in thread
From: Jack Steiner @ 2011-03-23 20:46 UTC (permalink / raw)
  To: Don Zickus
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Wed, Mar 23, 2011 at 04:00:08PM -0400, Don Zickus wrote:
> On Wed, Mar 23, 2011 at 01:53:20PM -0400, Don Zickus wrote:
> > Let me know if the patch fixes that problem.  Then it will be one less
> > thing to worry about. :-)
> 
> Ok, I was an idiot and made the patch against RHEL-6.  Here is the one
> against 2.6.38.  Sorry about that.

No problem.

I applied the patch below. However, I still see the "dazed" messages with
about the same frequency.

> 
> Cheers,
> Don
> 
> 
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 87eab4a..62ec8e9 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1375,7 +1375,7 @@ perf_event_nmi_handler(struct notifier_block *self,
>  	if ((handled > 1) ||
>  		/* the next nmi could be a back-to-back nmi */
>  	    ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
> -	     (__this_cpu_read(pmu_nmi.handled) > 1))) {
> +	     (__this_cpu_read(pmu_nmi.handled) > 0) && handled && this_nmi)) {
>  		/*
>  		 * We could have two subsequent back-to-back nmis: The
>  		 * first handles more than one counter, the 2nd
> @@ -1386,6 +1386,8 @@ perf_event_nmi_handler(struct notifier_block *self,
>  		 * handling more than one counter. We will mark the
>  		 * next (3rd) and then drop it if unhandled.
>  		 */
> +		//if ((__this_cpu_read(pmu_nmi.handled) == 1) && (handled == 1))
> +		//	trace_printk("!! fixed?\n");
>  		__this_cpu_write(pmu_nmi.marked, this_nmi + 1);
>  		__this_cpu_write(pmu_nmi.handled, handled);
>  	}

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-23 20:45                         ` Cyrill Gorcunov
@ 2011-03-23 21:22                           ` Don Zickus
  0 siblings, 0 replies; 38+ messages in thread
From: Don Zickus @ 2011-03-23 21:22 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Jack Steiner, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra, Robert Richter

On Wed, Mar 23, 2011 at 11:45:20PM +0300, Cyrill Gorcunov wrote:
> On 03/23/2011 11:00 PM, Don Zickus wrote:
> > On Wed, Mar 23, 2011 at 01:53:20PM -0400, Don Zickus wrote:
> >> Let me know if the patch fixes that problem.  Then it will be one less
> >> thing to worry about. :-)
> > 
> > Ok, I was an idiot and made the patch against RHEL-6.  Here is the one
> > against 2.6.38.  Sorry about that.
> > 
> > Cheers,
> > Don
> > 
> > 
> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > index 87eab4a..62ec8e9 100644
> > --- a/arch/x86/kernel/cpu/perf_event.c
> > +++ b/arch/x86/kernel/cpu/perf_event.c
> > @@ -1375,7 +1375,7 @@ perf_event_nmi_handler(struct notifier_block *self,
> >  	if ((handled > 1) ||
> >  		/* the next nmi could be a back-to-back nmi */
> >  	    ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
> > -	     (__this_cpu_read(pmu_nmi.handled) > 1))) {
> > +	     (__this_cpu_read(pmu_nmi.handled) > 0) && handled && this_nmi)) {
> 
> Don, why do you need to check for this_nmi here? it's zero for first nmi in a
> system (right?), so I fail to get the reason for such check. What I miss?

It was a stupid optimization, otherwise it _always_ traverses on the
first nmi.  I wasn't sure that is what I wanted.  Mainly I was trying to
wrap my head around the problem.  You can remove it to see if the problem
is still fixed.

I'm not a fan of this fix as it is getting a little ugly, but for now...

Cheers,
Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-23 20:46                         ` Jack Steiner
@ 2011-03-23 21:23                           ` Don Zickus
  2011-03-24 17:09                             ` Jack Steiner
  0 siblings, 1 reply; 38+ messages in thread
From: Don Zickus @ 2011-03-23 21:23 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Wed, Mar 23, 2011 at 03:46:47PM -0500, Jack Steiner wrote:
> On Wed, Mar 23, 2011 at 04:00:08PM -0400, Don Zickus wrote:
> > On Wed, Mar 23, 2011 at 01:53:20PM -0400, Don Zickus wrote:
> > > Let me know if the patch fixes that problem.  Then it will be one less
> > > thing to worry about. :-)
> > 
> > Ok, I was an idiot and made the patch against RHEL-6.  Here is the one
> > against 2.6.38.  Sorry about that.
> 
> No problem.
> 
> I applied the patch below. However, I still see the "dazed" messages with
> about the same frequency.

Crap.  It fixed acme's problem though where he was using 3 counters at
high frequency.  The problem must be elsewhere.  I'll have to figure out a
new strategy.  I'll probably put together a patch full of trace_printk
output to see if I can characterize it.

Cheers,
Don

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-23 21:23                           ` Don Zickus
@ 2011-03-24 17:09                             ` Jack Steiner
  2011-03-24 18:43                               ` Don Zickus
  0 siblings, 1 reply; 38+ messages in thread
From: Jack Steiner @ 2011-03-24 17:09 UTC (permalink / raw)
  To: Don Zickus
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Wed, Mar 23, 2011 at 05:23:58PM -0400, Don Zickus wrote:
> On Wed, Mar 23, 2011 at 03:46:47PM -0500, Jack Steiner wrote:
> > On Wed, Mar 23, 2011 at 04:00:08PM -0400, Don Zickus wrote:
> > > On Wed, Mar 23, 2011 at 01:53:20PM -0400, Don Zickus wrote:
> > > > Let me know if the patch fixes that problem.  Then it will be one less
> > > > thing to worry about. :-)
> > > 
> > > Ok, I was an idiot and made the patch against RHEL-6.  Here is the one
> > > against 2.6.38.  Sorry about that.
> > 
> > No problem.
> > 
> > I applied the patch below. However, I still see the "dazed" messages with
> > about the same frequency.
> 
> Crap.  It fixed acme's problem though where he was using 3 counters at
> high frequency.  The problem must be elsewhere.  I'll have to figure out a
> new strategy.  I'll probably put together a patch full of trace_printk
> output to see if I can characterize it.


I added tracing to see if I could get more clues on the cause
of the "dazed" message. Unfortunately, I don't see anything - maybe
you do.

I used a tracing module that I've used for other things. I'm sure
there are other facilities available, but I've used this for a long time & it's
easy to update for specific purposes.
	rtc = usec clock
	rtc-delta = usec since previous trace entry
	id  = trace identifier (not particularily useful here)
	p1, p2 = tracepoint specific data. See patch below
	   For hw_perf
		p1 [63:32] this_nmi
		   [31:0]  handled
		p2 [63:32] pmu_nmi.marked
		   [31:0]  pmu_nmi.handled


Here is a trace leading up to a failure. Times are in usec:

 cpu              rtc    rtc-delta   id               p1               p2 desc
  10         80996952        44005    1                0                0 NMI handler
  10         80996952            0   40                0                0 perf_event_nmi_handler
  10         80996952            0   40                0                0 perf_event_nmi_handler NMI
  10         80996955            3   40     343000000001     33bc00000002 perf_event_nmi_handler NMI handled - this/handled pmumarked/handled
  10         80996955            0    1                0                0 NMI handler OK

  10         81036965        40010    1                0                0 NMI handler
  10         81036965            0   40                0                0 perf_event_nmi_handler
  10         81036966            1   40                0                0 perf_event_nmi_handler NMI
  10         81036968            2   40     343100000001     33bc00000002 perf_event_nmi_handler NMI handled - this/handled pmumarked/handled
  10         81036968            0    1                0                0 NMI handler OK

  10         81064135        27167    1                0                0 NMI handler
  10         81064136            1   40                0                0 perf_event_nmi_handler
  10         81064137            1   40                0                0 perf_event_nmi_handler NMI
  10         81064138            1   40                0                0 perf_event_nmi_handler - not handled
  10         81064138            0    3                0                0 NMI handler failed
  10         81064146            8    4                0                0 Unknown NMI handler
  10         81064147            1   20               95                0 UV NMI not received
  10         81064147            0   40                0                0 perf_event_nmi_handler
  10         81064148            1   40             3432             33bc perf_event_nmi_handler NMIUNKNOWN
  10         81064148            0   99                0                0 Unknown NMI handler


The last trace is just prior to a "dazed" failure.

I dont see anything unusual. Just looks like a spurious NMI with no cause. The PMU did not
see an NMI cause. The previous couple of NMIs looked (at least to me) normal.
NMIs are occuring every ~40msec. No UV NMIs were recently received. No multiple PMU
events handled.

Here is a trace where a UV NMI was received:

   0        371742833         2453    1                0                0 NMI handler
   0        371742834            1   40                0                0 perf_event_nmi_handler
   0        371742834            0   40                0                0 perf_event_nmi_handler NMI
   0        371742836            2   40                0                0 perf_event_nmi_handler - not handled
   0        371742836            0    3                0                0 NMI handler failed
   0        371742856           20    4                0                0 Unknown NMI handler
   0        371742913           57   21               f1                0 UV NMI received



I've include the patch (latest x86 tree) so you can see exactly where the trace points
were inserted.



Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-23 10:30:35.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-24 10:47:59.865562087 -0500
@@ -23,6 +23,7 @@
 #include <linux/io.h>
 #include <linux/pci.h>
 #include <linux/kdebug.h>
+#include <linux/utrace.h>
 
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
@@ -54,6 +55,9 @@ unsigned int uv_apicid_hibits;
 EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 static DEFINE_SPINLOCK(uv_nmi_lock);
 
+void (*utrace_func)(int id, unsigned long, unsigned long, const char*);
+EXPORT_SYMBOL_GPL(utrace_func);
+
 /* Should be part of uv_hub_info but that breas the KABI */
 static struct uv_nmi_info {
 	spinlock_t	nmi_lock;
@@ -692,11 +696,14 @@ int uv_handle_nmi(struct notifier_block
 	 * if a hw_perf and BMC NMI are received at about the same time
 	 * and both events are processed with the first NMI.
 	 */
-	if (__get_cpu_var(cpu_last_nmi_count) == uv_nmi_info[blade].nmi_count)
+	if (__get_cpu_var(cpu_last_nmi_count) == uv_nmi_info[blade].nmi_count) {
+		UTRACE(20, __get_cpu_var(cpu_last_nmi_count), 0, "UV NMI not received");
 		return NOTIFY_DONE;
+	}
 
 	printk("ZZZ:%d NMI %ld %ld\n", smp_processor_id(), __get_cpu_var(cpu_last_nmi_count), uv_nmi_info[blade].nmi_count);
 	__get_cpu_var(cpu_last_nmi_count) = uv_nmi_info[blade].nmi_count;
+	UTRACE(21, __get_cpu_var(cpu_last_nmi_count), 0, "UV NMI received");
 
 	/*
 	 * Use a lock so only one cpu prints at a time.
Index: linux/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/perf_event.c	2011-03-23 15:33:48.000000000 -0500
+++ linux/arch/x86/kernel/cpu/perf_event.c	2011-03-24 10:47:20.101496911 -0500
@@ -25,6 +25,7 @@
 #include <linux/highmem.h>
 #include <linux/cpu.h>
 #include <linux/bitops.h>
+#include <linux/utrace.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -1341,15 +1342,19 @@ perf_event_nmi_handler(struct notifier_b
 	struct die_args *args = __args;
 	unsigned int this_nmi;
 	int handled;
+	unsigned long tmp1, tmp2;
 
 	if (!atomic_read(&active_events))
 		return NOTIFY_DONE;
 
+	UTRACE(40, 0, 0, "perf_event_nmi_handler");
 	switch (cmd) {
 	case DIE_NMI:
+		UTRACE(40, 0, 0, "perf_event_nmi_handler NMI");
 		break;
 	case DIE_NMIUNKNOWN:
 		this_nmi = percpu_read(irq_stat.__nmi_count);
+		UTRACE(40, this_nmi, __this_cpu_read(pmu_nmi.marked), "perf_event_nmi_handler NMIUNKNOWN");
 		if (this_nmi != __this_cpu_read(pmu_nmi.marked))
 			/* let the kernel handle the unknown nmi */
 			return NOTIFY_DONE;
@@ -1368,10 +1373,15 @@ perf_event_nmi_handler(struct notifier_b
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 
 	handled = x86_pmu.handle_irq(args->regs);
-	if (!handled)
+	if (!handled) {
+		UTRACE(40, handled, 0, "perf_event_nmi_handler - not handled");
 		return NOTIFY_DONE;
+	}
 
 	this_nmi = percpu_read(irq_stat.__nmi_count);
+	tmp1 = ((unsigned long)this_nmi << 32) | handled;
+	tmp2 = ((unsigned long)__this_cpu_read(pmu_nmi.marked) << 32) | __this_cpu_read(pmu_nmi.handled);
+	UTRACE(40, tmp1, tmp2, "perf_event_nmi_handler NMI handled - this/handled pmumarked/handled");
 	if ((handled > 1) ||
 		/* the next nmi could be a back-to-back nmi */
 	    ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
Index: linux/arch/x86/kernel/traps.c
===================================================================
--- linux.orig/arch/x86/kernel/traps.c	2011-03-22 15:10:36.000000000 -0500
+++ linux/arch/x86/kernel/traps.c	2011-03-24 10:35:15.410168027 -0500
@@ -31,6 +31,7 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/utrace.h>
 
 #ifdef CONFIG_EISA
 #include <linux/ioport.h>
@@ -371,9 +372,11 @@ io_check_error(unsigned char reason, str
 static notrace __kprobes void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
+	UTRACE(4, 0, 0, "Unknown NMI handler");
 	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
 			NOTIFY_STOP)
 		return;
+	UTRACE(99, 0, 0, "Unknown NMI handler");
 #ifdef CONFIG_MCA
 	/*
 	 * Might actually be able to figure out what the guilty party
@@ -403,8 +406,12 @@ static notrace __kprobes void default_do
 	 * NMI, otherwise we may lose it, because the CPU-specific
 	 * NMI can not be detected/processed on other CPUs.
 	 */
-	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+	UTRACE(1, 0, 0, "NMI handler");
+	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) {
+		UTRACE(1, 0, 0, "NMI handler OK");
 		return;
+	}
+	UTRACE(3, 0, 0, "NMI handler failed");
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
 	raw_spin_lock(&nmi_reason_lock);
Index: linux/include/linux/utrace.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/include/linux/utrace.h	2011-03-24 10:30:52.438555195 -0500
@@ -0,0 +1,14 @@
+#ifndef _LINUX_UTRACE_H_
+#define _LINUX_UTRACE_H_
+
+
+extern void (*utrace_func)(int id, unsigned long, unsigned long, const char *);
+
+#define UTRACE(id, a, b, c)						\
+       do {								\
+               if (unlikely(utrace_func))				\
+                       (*utrace_func)(id, a, b, c);			\
+       } while (0)
+
+#endif         /* _LINUX_UTRACE_H_ */
+


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] x86, UV: Fix NMI handler for UV platforms
  2011-03-24 17:09                             ` Jack Steiner
@ 2011-03-24 18:43                               ` Don Zickus
  0 siblings, 0 replies; 38+ messages in thread
From: Don Zickus @ 2011-03-24 18:43 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Cyrill Gorcunov, Ingo Molnar, tglx, hpa, x86, linux-kernel,
	Peter Zijlstra

On Thu, Mar 24, 2011 at 12:09:44PM -0500, Jack Steiner wrote:
> 
> I added tracing to see if I could get more clues on the cause
> of the "dazed" message. Unfortunately, I don't see anything - maybe
> you do.

There goes my other theory where the back-to-back nmi logic broke down
because the UV nmi jumped in the middle of the chain but continued with
the back-to-back nmis.

> 
> I used a tracing module that I've used for other things. I'm sure
> there are other facilities available, but I've used this for a long time & it's
> easy to update for specific purposes.
> 	rtc = usec clock
> 	rtc-delta = usec since previous trace entry
> 	id  = trace identifier (not particularily useful here)
> 	p1, p2 = tracepoint specific data. See patch below
> 	   For hw_perf
> 		p1 [63:32] this_nmi
> 		   [31:0]  handled
> 		p2 [63:32] pmu_nmi.marked
> 		   [31:0]  pmu_nmi.handled

I have done similar stuff using trace_printk around all the wrmsrl and
rdmsrls.  I have noticed that the counter is shutdown in prep to sched
out a task (it calls x86_pmu_del, which calls x86_pmu_stop).  This is in a
non-nmi context.  Shortly after the pmu is stopped an unknown nmi comes in
and causes the 'Dazed' messages.  I thought is was the x86_pmu.disable
call racing with the disabling of the active_mask, but that didn't fix my
problem. :-/

Unfortunately, I am very busy at work and was hoping to postpone further
debugging for a couple of weeks until things quiet down.  I know Russ has
a bug opened for it, we can track it (so I don't forget :-p ).

Cheers,
Don

> 
> 
> Here is a trace leading up to a failure. Times are in usec:
> 
>  cpu              rtc    rtc-delta   id               p1               p2 desc
>   10         80996952        44005    1                0                0 NMI handler
>   10         80996952            0   40                0                0 perf_event_nmi_handler
>   10         80996952            0   40                0                0 perf_event_nmi_handler NMI
>   10         80996955            3   40     343000000001     33bc00000002 perf_event_nmi_handler NMI handled - this/handled pmumarked/handled
>   10         80996955            0    1                0                0 NMI handler OK
> 
>   10         81036965        40010    1                0                0 NMI handler
>   10         81036965            0   40                0                0 perf_event_nmi_handler
>   10         81036966            1   40                0                0 perf_event_nmi_handler NMI
>   10         81036968            2   40     343100000001     33bc00000002 perf_event_nmi_handler NMI handled - this/handled pmumarked/handled
>   10         81036968            0    1                0                0 NMI handler OK
> 
>   10         81064135        27167    1                0                0 NMI handler
>   10         81064136            1   40                0                0 perf_event_nmi_handler
>   10         81064137            1   40                0                0 perf_event_nmi_handler NMI
>   10         81064138            1   40                0                0 perf_event_nmi_handler - not handled
>   10         81064138            0    3                0                0 NMI handler failed
>   10         81064146            8    4                0                0 Unknown NMI handler
>   10         81064147            1   20               95                0 UV NMI not received
>   10         81064147            0   40                0                0 perf_event_nmi_handler
>   10         81064148            1   40             3432             33bc perf_event_nmi_handler NMIUNKNOWN
>   10         81064148            0   99                0                0 Unknown NMI handler
> 
> 
> The last trace is just prior to a "dazed" failure.
> 
> I dont see anything unusual. Just looks like a spurious NMI with no cause. The PMU did not
> see an NMI cause. The previous couple of NMIs looked (at least to me) normal.
> NMIs are occuring every ~40msec. No UV NMIs were recently received. No multiple PMU
> events handled.
> 
> Here is a trace where a UV NMI was received:
> 
>    0        371742833         2453    1                0                0 NMI handler
>    0        371742834            1   40                0                0 perf_event_nmi_handler
>    0        371742834            0   40                0                0 perf_event_nmi_handler NMI
>    0        371742836            2   40                0                0 perf_event_nmi_handler - not handled
>    0        371742836            0    3                0                0 NMI handler failed
>    0        371742856           20    4                0                0 Unknown NMI handler
>    0        371742913           57   21               f1                0 UV NMI received
> 
> 
> 
> I've include the patch (latest x86 tree) so you can see exactly where the trace points
> were inserted.
> 
> 
> 
> Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
> ===================================================================
> --- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-23 10:30:35.000000000 -0500
> +++ linux/arch/x86/kernel/apic/x2apic_uv_x.c	2011-03-24 10:47:59.865562087 -0500
> @@ -23,6 +23,7 @@
>  #include <linux/io.h>
>  #include <linux/pci.h>
>  #include <linux/kdebug.h>
> +#include <linux/utrace.h>
>  
>  #include <asm/uv/uv_mmrs.h>
>  #include <asm/uv/uv_hub.h>
> @@ -54,6 +55,9 @@ unsigned int uv_apicid_hibits;
>  EXPORT_SYMBOL_GPL(uv_apicid_hibits);
>  static DEFINE_SPINLOCK(uv_nmi_lock);
>  
> +void (*utrace_func)(int id, unsigned long, unsigned long, const char*);
> +EXPORT_SYMBOL_GPL(utrace_func);
> +
>  /* Should be part of uv_hub_info but that breas the KABI */
>  static struct uv_nmi_info {
>  	spinlock_t	nmi_lock;
> @@ -692,11 +696,14 @@ int uv_handle_nmi(struct notifier_block
>  	 * if a hw_perf and BMC NMI are received at about the same time
>  	 * and both events are processed with the first NMI.
>  	 */
> -	if (__get_cpu_var(cpu_last_nmi_count) == uv_nmi_info[blade].nmi_count)
> +	if (__get_cpu_var(cpu_last_nmi_count) == uv_nmi_info[blade].nmi_count) {
> +		UTRACE(20, __get_cpu_var(cpu_last_nmi_count), 0, "UV NMI not received");
>  		return NOTIFY_DONE;
> +	}
>  
>  	printk("ZZZ:%d NMI %ld %ld\n", smp_processor_id(), __get_cpu_var(cpu_last_nmi_count), uv_nmi_info[blade].nmi_count);
>  	__get_cpu_var(cpu_last_nmi_count) = uv_nmi_info[blade].nmi_count;
> +	UTRACE(21, __get_cpu_var(cpu_last_nmi_count), 0, "UV NMI received");
>  
>  	/*
>  	 * Use a lock so only one cpu prints at a time.
> Index: linux/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux.orig/arch/x86/kernel/cpu/perf_event.c	2011-03-23 15:33:48.000000000 -0500
> +++ linux/arch/x86/kernel/cpu/perf_event.c	2011-03-24 10:47:20.101496911 -0500
> @@ -25,6 +25,7 @@
>  #include <linux/highmem.h>
>  #include <linux/cpu.h>
>  #include <linux/bitops.h>
> +#include <linux/utrace.h>
>  
>  #include <asm/apic.h>
>  #include <asm/stacktrace.h>
> @@ -1341,15 +1342,19 @@ perf_event_nmi_handler(struct notifier_b
>  	struct die_args *args = __args;
>  	unsigned int this_nmi;
>  	int handled;
> +	unsigned long tmp1, tmp2;
>  
>  	if (!atomic_read(&active_events))
>  		return NOTIFY_DONE;
>  
> +	UTRACE(40, 0, 0, "perf_event_nmi_handler");
>  	switch (cmd) {
>  	case DIE_NMI:
> +		UTRACE(40, 0, 0, "perf_event_nmi_handler NMI");
>  		break;
>  	case DIE_NMIUNKNOWN:
>  		this_nmi = percpu_read(irq_stat.__nmi_count);
> +		UTRACE(40, this_nmi, __this_cpu_read(pmu_nmi.marked), "perf_event_nmi_handler NMIUNKNOWN");
>  		if (this_nmi != __this_cpu_read(pmu_nmi.marked))
>  			/* let the kernel handle the unknown nmi */
>  			return NOTIFY_DONE;
> @@ -1368,10 +1373,15 @@ perf_event_nmi_handler(struct notifier_b
>  	apic_write(APIC_LVTPC, APIC_DM_NMI);
>  
>  	handled = x86_pmu.handle_irq(args->regs);
> -	if (!handled)
> +	if (!handled) {
> +		UTRACE(40, handled, 0, "perf_event_nmi_handler - not handled");
>  		return NOTIFY_DONE;
> +	}
>  
>  	this_nmi = percpu_read(irq_stat.__nmi_count);
> +	tmp1 = ((unsigned long)this_nmi << 32) | handled;
> +	tmp2 = ((unsigned long)__this_cpu_read(pmu_nmi.marked) << 32) | __this_cpu_read(pmu_nmi.handled);
> +	UTRACE(40, tmp1, tmp2, "perf_event_nmi_handler NMI handled - this/handled pmumarked/handled");
>  	if ((handled > 1) ||
>  		/* the next nmi could be a back-to-back nmi */
>  	    ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
> Index: linux/arch/x86/kernel/traps.c
> ===================================================================
> --- linux.orig/arch/x86/kernel/traps.c	2011-03-22 15:10:36.000000000 -0500
> +++ linux/arch/x86/kernel/traps.c	2011-03-24 10:35:15.410168027 -0500
> @@ -31,6 +31,7 @@
>  #include <linux/mm.h>
>  #include <linux/smp.h>
>  #include <linux/io.h>
> +#include <linux/utrace.h>
>  
>  #ifdef CONFIG_EISA
>  #include <linux/ioport.h>
> @@ -371,9 +372,11 @@ io_check_error(unsigned char reason, str
>  static notrace __kprobes void
>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
>  {
> +	UTRACE(4, 0, 0, "Unknown NMI handler");
>  	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
>  			NOTIFY_STOP)
>  		return;
> +	UTRACE(99, 0, 0, "Unknown NMI handler");
>  #ifdef CONFIG_MCA
>  	/*
>  	 * Might actually be able to figure out what the guilty party
> @@ -403,8 +406,12 @@ static notrace __kprobes void default_do
>  	 * NMI, otherwise we may lose it, because the CPU-specific
>  	 * NMI can not be detected/processed on other CPUs.
>  	 */
> -	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> +	UTRACE(1, 0, 0, "NMI handler");
> +	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) {
> +		UTRACE(1, 0, 0, "NMI handler OK");
>  		return;
> +	}
> +	UTRACE(3, 0, 0, "NMI handler failed");
>  
>  	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
>  	raw_spin_lock(&nmi_reason_lock);
> Index: linux/include/linux/utrace.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux/include/linux/utrace.h	2011-03-24 10:30:52.438555195 -0500
> @@ -0,0 +1,14 @@
> +#ifndef _LINUX_UTRACE_H_
> +#define _LINUX_UTRACE_H_
> +
> +
> +extern void (*utrace_func)(int id, unsigned long, unsigned long, const char *);
> +
> +#define UTRACE(id, a, b, c)						\
> +       do {								\
> +               if (unlikely(utrace_func))				\
> +                       (*utrace_func)(id, a, b, c);			\
> +       } while (0)
> +
> +#endif         /* _LINUX_UTRACE_H_ */
> +
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2011-03-24 18:43 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-21 16:01 [PATCH] x86, UV: Fix NMI handler for UV platforms Jack Steiner
2011-03-21 16:14 ` Ingo Molnar
2011-03-21 16:26   ` Cyrill Gorcunov
2011-03-21 16:43     ` Cyrill Gorcunov
2011-03-21 17:00       ` Cyrill Gorcunov
2011-03-21 17:08         ` Jack Steiner
2011-03-21 17:19           ` Cyrill Gorcunov
2011-03-21 17:34             ` Jack Steiner
2011-03-21 17:48               ` Cyrill Gorcunov
2011-03-21 17:55                 ` Cyrill Gorcunov
2011-03-21 18:15           ` Cyrill Gorcunov
2011-03-21 18:24             ` Jack Steiner
2011-03-21 17:53       ` Don Zickus
2011-03-21 17:51     ` Don Zickus
2011-03-21 18:00       ` Cyrill Gorcunov
2011-03-21 18:22       ` Jack Steiner
2011-03-21 19:37         ` Don Zickus
2011-03-21 20:37           ` Jack Steiner
2011-03-22 17:11           ` Jack Steiner
2011-03-22 18:44             ` Don Zickus
2011-03-22 20:02               ` Jack Steiner
2011-03-22 21:25               ` Jack Steiner
2011-03-22 22:02                 ` Cyrill Gorcunov
2011-03-23 13:36                   ` Jack Steiner
2011-03-22 22:05                 ` Don Zickus
2011-03-23 16:32                   ` Jack Steiner
2011-03-23 17:53                     ` Don Zickus
2011-03-23 20:00                       ` Don Zickus
2011-03-23 20:41                         ` Cyrill Gorcunov
2011-03-23 20:45                         ` Cyrill Gorcunov
2011-03-23 21:22                           ` Don Zickus
2011-03-23 20:46                         ` Jack Steiner
2011-03-23 21:23                           ` Don Zickus
2011-03-24 17:09                             ` Jack Steiner
2011-03-24 18:43                               ` Don Zickus
2011-03-21 16:56   ` Jack Steiner
2011-03-21 18:05     ` Ingo Molnar
2011-03-21 19:23       ` [PATCH V2] " Jack Steiner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).