[RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants

All of lore.kernel.org
 help / color / mirror / Atom feed

* [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
@ 2010-09-10  2:51 Huang Ying
  2010-09-10  2:51 ` [RFC 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
                   ` (6 more replies)
  0 siblings, 7 replies; 69+ messages in thread
From: Huang Ying @ 2010-09-10  2:51 UTC (permalink / raw)
  To: Ingo Molnar, H. Peter Anvin; +Cc: linux-kernel, Andi Kleen, Huang Ying

Replace the NMI related magic numbers with symbol constants.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/mach_traps.h |   12 +++++++++++-
 arch/x86/kernel/traps.c           |   18 +++++++++---------
 2 files changed, 20 insertions(+), 10 deletions(-)

--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -7,9 +7,19 @@
 
 #include <asm/mc146818rtc.h>
 
+#define NMI_REASON_PORT		0x61
+
+#define NMI_REASON_MEMPAR	0x80
+#define NMI_REASON_IOCHK	0x40
+#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
+
+#define NMI_REASON_CLEAR_MEMPAR	0x04
+#define NMI_REASON_CLEAR_IOCHK	0x08
+#define NMI_REASON_CLEAR_MASK	0x0f
+
 static inline unsigned char get_nmi_reason(void)
 {
-	return inb(0x61);
+	return inb(NMI_REASON_PORT);
 }
 
 static inline void reassert_nmi(void)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -323,8 +323,8 @@ mem_parity_error(unsigned char reason, s
 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 
 	/* Clear and disable the memory parity error line. */
-	reason = (reason & 0xf) | 4;
-	outb(reason, 0x61);
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_MEMPAR;
+	outb(reason, NMI_REASON_PORT);
 }
 
 static notrace __kprobes void
@@ -339,15 +339,15 @@ io_check_error(unsigned char reason, str
 		panic("NMI IOCK error: Not continuing");
 
 	/* Re-enable the IOCK line, wait for a few seconds */
-	reason = (reason & 0xf) | 8;
-	outb(reason, 0x61);
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
 
 	i = 2000;
 	while (--i)
 		udelay(1000);
 
-	reason &= ~8;
-	outb(reason, 0x61);
+	reason &= ~NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
 }
 
 static notrace __kprobes void
@@ -388,7 +388,7 @@ static notrace __kprobes void default_do
 	if (!cpu)
 		reason = get_nmi_reason();
 
-	if (!(reason & 0xc0)) {
+	if (!(reason & NMI_REASON_MASK)) {
 		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 								== NOTIFY_STOP)
 			return;
@@ -418,9 +418,9 @@ static notrace __kprobes void default_do
 		return;
 
 	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-	if (reason & 0x80)
+	if (reason & NMI_REASON_MEMPAR)
 		mem_parity_error(reason, regs);
-	if (reason & 0x40)
+	if (reason & NMI_REASON_IOCHK)
 		io_check_error(reason, regs);
 #ifdef CONFIG_X86_32
 	/*

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [RFC 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay
  2010-09-10  2:51 [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
@ 2010-09-10  2:51 ` Huang Ying
  2010-09-10  2:51 ` [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error Huang Ying
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 69+ messages in thread
From: Huang Ying @ 2010-09-10  2:51 UTC (permalink / raw)
  To: Ingo Molnar, H. Peter Anvin; +Cc: linux-kernel, Andi Kleen, Huang Ying

Prevent the long delay in io_check_error make NMI watchdog timeout.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/kernel/traps.c |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -342,9 +342,11 @@ io_check_error(unsigned char reason, str
 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
 	outb(reason, NMI_REASON_PORT);
 
-	i = 2000;
-	while (--i)
-		udelay(1000);
+	i = 20000;
+	while (--i) {
+		touch_nmi_watchdog();
+		udelay(100);
+	}
 
 	reason &= ~NMI_REASON_CLEAR_IOCHK;
 	outb(reason, NMI_REASON_PORT);

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-10  2:51 [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
  2010-09-10  2:51 ` [RFC 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
@ 2010-09-10  2:51 ` Huang Ying
  2010-09-13  1:02   ` Robert Richter
  2010-09-21 23:04   ` Maciej W. Rozycki
  2010-09-10  2:51 ` [RFC 4/6] x86, NMI, Rewrite NMI handler Huang Ying
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 69+ messages in thread
From: Huang Ying @ 2010-09-10  2:51 UTC (permalink / raw)
  To: Ingo Molnar, H. Peter Anvin; +Cc: linux-kernel, Andi Kleen, Huang Ying

memory parity error is only valid for IBM PC-AT, newer machine use 7
bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
reported via MCE. So corresponding function name and kernel log string
is changed.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/mach_traps.h |    6 +++---
 arch/x86/kernel/traps.c           |   24 ++++++------------------
 2 files changed, 9 insertions(+), 21 deletions(-)

--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -9,11 +9,11 @@
 
 #define NMI_REASON_PORT		0x61
 
-#define NMI_REASON_MEMPAR	0x80
+#define NMI_REASON_SERR		0x80
 #define NMI_REASON_IOCHK	0x40
-#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
+#define NMI_REASON_MASK		(NMI_REASON_SERR | NMI_REASON_IOCHK)
 
-#define NMI_REASON_CLEAR_MEMPAR	0x04
+#define NMI_REASON_CLEAR_SERR	0x04
 #define NMI_REASON_CLEAR_IOCHK	0x08
 #define NMI_REASON_CLEAR_MASK	0x0f
 
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -301,29 +301,17 @@ gp_in_kernel:
 }
 
 static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
-	printk(KERN_EMERG
-		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-			reason, smp_processor_id());
-
-	printk(KERN_EMERG
-		"You have some hardware problem, likely on the PCI bus.\n");
-
-#if defined(CONFIG_EDAC)
-	if (edac_handler_set()) {
-		edac_atomic_assert_error();
-		return;
-	}
-#endif
+	printk(KERN_EMERG "NMI: PCI system error (SERR).\n");
 
 	if (panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
 
 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 
-	/* Clear and disable the memory parity error line. */
-	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_MEMPAR;
+	/* Clear and disable the PCI SERR error line. */
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
 	outb(reason, NMI_REASON_PORT);
 }
 
@@ -420,8 +408,8 @@ static notrace __kprobes void default_do
 		return;
 
 	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-	if (reason & NMI_REASON_MEMPAR)
-		mem_parity_error(reason, regs);
+	if (reason & NMI_REASON_SERR)
+		pci_serr_error(reason, regs);
 	if (reason & NMI_REASON_IOCHK)
 		io_check_error(reason, regs);
 #ifdef CONFIG_X86_32

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [RFC 4/6] x86, NMI, Rewrite NMI handler
  2010-09-10  2:51 [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
  2010-09-10  2:51 ` [RFC 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
  2010-09-10  2:51 ` [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error Huang Ying
@ 2010-09-10  2:51 ` Huang Ying
  2010-09-10 15:56   ` Don Zickus
  2010-09-13  1:16   ` Robert Richter
  2010-09-10  2:51 ` [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI Huang Ying
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 69+ messages in thread
From: Huang Ying @ 2010-09-10  2:51 UTC (permalink / raw)
  To: Ingo Molnar, H. Peter Anvin; +Cc: linux-kernel, Andi Kleen, Huang Ying

The original NMI handler is quite outdated in many aspects. This patch
try to fix it.

In original code, NMI reason io port (0x61) is only processed on
BSP. This makes it impossible to hot-remove BSP. To solve the issue,
a raw spinlock is used to make the port can be processed on any CPU.

Because the io port 0x61 and other non-CPU-specific NMI sources can be
processed on any CPU, the order to process the NMI sources are changed
as follow:

notify_die(DIE_NMI_IPI);
notify_die(DIE_NMI);
/* process io port 0x61 */
nmi_watchdog_touch();
notify_die(DIE_NMIUNKNOWN);
unknown_nmi();

DIE_NMI_IPI is used to process CPU specific NMI sources, such as perf
event, oprofile, crash IPI, etc. While DIE_NMI is used to process
non-CPU-specific NMI sources, such as APEI (ACPI Platform Error
Interface) GHES (Generic Hardware Error Source), etc.

DIE_NMI_IPI must be processed before DIE_NMI. For example, perf event
trigger a NMI on CPU 1, at the same time, APEI GHES trigger another
NMI on CPU 0. If DIE_NMI is processed before DIE_NMI_IPI, it is
possible that APEI GHES is processed on CPU 1, while unknown NMI is
gotten on CPU 0.

In this new order of processing, performance sensitive NMI sources
such as oprofile or perf event will have better performance because
the time consuming IO port reading is done after them.

Only one NMI is eaten for each NMI handler call, even for PCI SERR and
IOCHK NMIs. Because one NMI should be raised for each of them, eating
too many NMI will cause unnecessary unknown NMI.

The die value used in NMI sources are fixed accordingly.

The NMI handler in the patch is designed by Andi Kleen.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/kernel/cpu/perf_event.c  |    1 
 arch/x86/kernel/traps.c           |   86 +++++++++++++++++++-------------------
 arch/x86/oprofile/nmi_int.c       |    1 
 arch/x86/oprofile/nmi_timer_int.c |    2 
 drivers/char/ipmi/ipmi_watchdog.c |    2 
 drivers/watchdog/hpwdt.c          |    2 
 6 files changed, 48 insertions(+), 46 deletions(-)

--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1237,7 +1237,6 @@ perf_event_nmi_handler(struct notifier_b
 		return NOTIFY_DONE;
 
 	switch (cmd) {
-	case DIE_NMI:
 	case DIE_NMI_IPI:
 		break;
 	case DIE_NMIUNKNOWN:
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,12 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+/*
+ * Prevent NMI reason port (0x61) being accessed simultaneously, can
+ * only be used in NMI handler.
+ */
+static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -343,9 +349,6 @@ io_check_error(unsigned char reason, str
 static notrace __kprobes void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
-	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
-			NOTIFY_STOP)
-		return;
 #ifdef CONFIG_MCA
 	/*
 	 * Might actually be able to figure out what the guilty party
@@ -370,55 +373,56 @@ unknown_nmi_error(unsigned char reason,
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
-	int cpu;
-
-	cpu = smp_processor_id();
 
-	/* Only the BSP gets external NMIs from the system. */
-	if (!cpu)
-		reason = get_nmi_reason();
+	/*
+	 * CPU-specific NMI must be processed before non-CPU-specific
+	 * NMI, otherwise we may lose it, because the CPU-specific
+	 * NMI can not be detected/processed on other CPUs.
+	 */
 
-	if (!(reason & NMI_REASON_MASK)) {
-		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
-								== NOTIFY_STOP)
-			return;
+	/*
+	 * CPU-specific NMI: send to specific CPU or NMI sources must
+	 * be processed on specific CPU
+	 */
+	if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
+	    == NOTIFY_STOP)
+		return;
 
-#ifdef CONFIG_X86_LOCAL_APIC
-		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-							== NOTIFY_STOP)
-			return;
+	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
+	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+		return;
 
-#ifndef CONFIG_LOCKUP_DETECTOR
+	raw_spin_lock(&nmi_reason_lock);
+	reason = get_nmi_reason();
+	if (reason & NMI_REASON_MASK) {
+		if (reason & NMI_REASON_SERR)
+			pci_serr_error(reason, regs);
+		else if (reason & NMI_REASON_IOCHK)
+			io_check_error(reason, regs);
+#ifdef CONFIG_X86_32
 		/*
-		 * Ok, so this is none of the documented NMI sources,
-		 * so it must be the NMI watchdog.
+		 * Reassert NMI in case it became active
+		 * meanwhile as it's edge-triggered:
 		 */
-		if (nmi_watchdog_tick(regs, reason))
-			return;
-		if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
-			unknown_nmi_error(reason, regs);
-#else
-		unknown_nmi_error(reason, regs);
+		reassert_nmi();
 #endif
-
+		raw_spin_unlock(&nmi_reason_lock);
 		return;
 	}
-	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
-		return;
+	raw_spin_unlock(&nmi_reason_lock);
 
-	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-	if (reason & NMI_REASON_SERR)
-		pci_serr_error(reason, regs);
-	if (reason & NMI_REASON_IOCHK)
-		io_check_error(reason, regs);
-#ifdef CONFIG_X86_32
-	/*
-	 * Reassert NMI in case it became active meanwhile
-	 * as it's edge-triggered:
-	 */
-	reassert_nmi();
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
+	if (nmi_watchdog_tick(regs, reason))
+		return;
+	if (do_nmi_callback(regs, smp_processor_id()))
+		return;
 #endif
+
+	if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
+	    == NOTIFY_STOP)
+		return;
+
+	unknown_nmi_error(reason, regs);
 }
 
 dotraplinkage notrace __kprobes void
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -64,7 +64,6 @@ static int profile_exceptions_notify(str
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI:
 	case DIE_NMI_IPI:
 		if (ctr_running)
 			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -25,7 +25,7 @@ static int profile_timer_exceptions_noti
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI:
+	case DIE_NMI_IPI:
 		oprofile_add_sample(args->regs, 0);
 		ret = NOTIFY_STOP;
 		break;
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -1080,7 +1080,7 @@ ipmi_nmi(struct notifier_block *self, un
 {
 	struct die_args *args = data;
 
-	if (val != DIE_NMI)
+	if (val != DIE_NMIUNKNOWN)
 		return NOTIFY_OK;
 
 	/* Hack, if it's a memory or I/O error, ignore it. */
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -469,7 +469,7 @@ static int hpwdt_pretimeout(struct notif
 	unsigned long rom_pl;
 	static int die_nmi_called;
 
-	if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI)
+	if (ulReason != DIE_NMIUNKNOWN)
 		goto out;
 
 	if (!hpwdt_nmi_decoding)

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-10  2:51 [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
                   ` (2 preceding siblings ...)
  2010-09-10  2:51 ` [RFC 4/6] x86, NMI, Rewrite NMI handler Huang Ying
@ 2010-09-10  2:51 ` Huang Ying
  2010-09-10 16:02   ` Don Zickus
  2010-09-10  2:51 ` [RFC 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-10  2:51 UTC (permalink / raw)
  To: Ingo Molnar, H. Peter Anvin; +Cc: linux-kernel, Andi Kleen, Huang Ying

On some platforms, fatal hardware error may be notified via unknown
NMI.

For example, on some platform with APEI firmware first mode support,
firmware generates NMI for fatal error but without error record. The
unknown NMI should be treated as notification of fatal hardware
error. The unknown_nmi_for_hwerr is added for these platform, if it is
not zero, system will treat unknown NMI as notification of fatal
hardware error.

These platforms are identified via the presentation of APEI HEST or
some PCI ID of the host bridge. The PCI ID of host bridge instead of
DMI ID is used, so that the checking can be done based on the platform
type instead of motherboard. This should be simpler and sufficient.

The method to identify the platforms is designed by Andi Kleen.

# TODO

- Report unknown NMI as fatal hardware error with new hardware error
  reporting interface, so that the error record can be gotten after
  system reboot.

- Because printk is not safe in NMI handler, all printk except that
  called after bust_spinlocks will be called in a delayed IRQ context
  to prevent system deadlock.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/nmi.h |    1 
 arch/x86/kernel/Makefile   |    2 +
 arch/x86/kernel/hwerr.c    |   55 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/traps.c    |   10 ++++++++
 drivers/acpi/apei/hest.c   |    8 ++++++
 5 files changed, 76 insertions(+)
 create mode 100644 arch/x86/kernel/hwerr.c

--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -44,6 +44,7 @@ struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
+extern int unknown_nmi_for_hwerr;
 
 void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -116,6 +116,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION)
 
 obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 
+obj-y					+= hwerr.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
--- /dev/null
+++ b/arch/x86/kernel/hwerr.c
@@ -0,0 +1,55 @@
+/*
+ * Hardware error architecture dependent processing
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/nmi.h>
+
+/*
+ * On some platform, hardware errors may be notified via unknown
+ * NMI. These platform is identified via the PCI ID of host bridge.
+ *
+ * The PCI ID of host bridge instead of DMI ID is used, so that the
+ * checking can be done based on the platform instead of motherboard.
+ * This should be simpler and sufficient.
+ */
+static const
+struct pci_device_id unknown_nmi_for_hwerr_platform[] __initdata = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3406) },
+	{ 0, }
+};
+
+int __init check_unknown_nmi_for_hwerr(void)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev) {
+		if (pci_match_id(unknown_nmi_for_hwerr_platform, dev)) {
+			pr_info(
+"Host bridge is identified, will treat unknown NMI as hardware error!\n");
+			unknown_nmi_for_hwerr = 1;
+			break;
+		}
+	}
+
+	return 0;
+}
+late_initcall(check_unknown_nmi_for_hwerr);
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+int unknown_nmi_for_hwerr;
+
 /*
  * Prevent NMI reason port (0x61) being accessed simultaneously, can
  * only be used in NMI handler.
@@ -349,6 +351,14 @@ io_check_error(unsigned char reason, str
 static notrace __kprobes void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
+	/*
+	 * On some platforms, hardware errors may be notified via
+	 * unknown NMI
+	 */
+	if (unknown_nmi_for_hwerr)
+		panic("NMI for hardware error without error record: "
+		      "Not continuing");
+
 #ifdef CONFIG_MCA
 	/*
 	 * Might actually be able to figure out what the guilty party
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -35,6 +35,7 @@
 #include <linux/highmem.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/nmi.h>
 #include <acpi/apei.h>
 
 #include "apei-internal.h"
@@ -222,6 +223,13 @@ static int __init hest_init(void)
 	if (rc)
 		goto err;
 
+	/*
+	 * System has proper HEST should treat unknown NMI as fatal
+	 * hardware error notification
+	 */
+	pr_info("HEST is valid, will treat unknown NMI as hardware error!\n");
+	unknown_nmi_for_hwerr = 1;
+
 	rc = hest_ghes_dev_register(ghes_count);
 	if (rc)
 		goto err;

^ permalink raw reply	[flat|nested] 69+ messages in thread

* [RFC 6/6] x86, NMI, Remove do_nmi_callback logic
  2010-09-10  2:51 [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
                   ` (3 preceding siblings ...)
  2010-09-10  2:51 ` [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI Huang Ying
@ 2010-09-10  2:51 ` Huang Ying
  2010-09-10 16:13   ` Don Zickus
  2010-09-10 20:37 ` [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Peter Zijlstra
  2010-09-21 21:48 ` Don Zickus
  6 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-10  2:51 UTC (permalink / raw)
  To: Ingo Molnar, H. Peter Anvin; +Cc: linux-kernel, Andi Kleen, Huang Ying

do_nmi_callback related logic is removed, because it is useless, just
adds code complexity.

unknown_nmi_panic sysctl is reserved to keep kernel ABI unchanged.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/nmi.h    |   10 +++++++++-
 arch/x86/kernel/apic/hw_nmi.c |    1 -
 arch/x86/kernel/apic/nmi.c    |   29 +----------------------------
 arch/x86/kernel/traps.c       |   16 ++++++++++------
 4 files changed, 20 insertions(+), 36 deletions(-)

--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -30,9 +30,17 @@ extern void setup_apic_nmi_watchdog(void
 extern void stop_apic_nmi_watchdog(void *);
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
 extern void cpu_nmi_set_wd_enabled(void);
 
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
+extern int nmi_watchdog_tick(struct pt_regs *regs);
+#else
+static inline int nmi_watchdog_tick(struct pt_regs *regs)
+{
+	return 0;
+}
+#endif
+
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;
 #define NMI_NONE	0
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -100,7 +100,6 @@ void acpi_nmi_disable(void) { return; }
 #endif
 atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
-int unknown_nmi_panic;
 void cpu_nmi_set_wd_enabled(void) { return; }
 void stop_apic_nmi_watchdog(void *unused) { return; }
 void setup_apic_nmi_watchdog(void *unused) { return; }
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -37,7 +37,6 @@
 
 #include <asm/mach_traps.h>
 
-int unknown_nmi_panic;
 int nmi_watchdog_enabled;
 
 /* For reliability, we're prepared to waste bits here. */
@@ -389,7 +388,7 @@ void touch_nmi_watchdog(void)
 EXPORT_SYMBOL(touch_nmi_watchdog);
 
 notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
+nmi_watchdog_tick(struct pt_regs *regs)
 {
 	/*
 	 * Since current_thread_info()-> is always on the stack, and we
@@ -483,23 +482,6 @@ static void disable_ioapic_nmi_watchdog(
 	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
 }
 
-static int __init setup_unknown_nmi_panic(char *str)
-{
-	unknown_nmi_panic = 1;
-	return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
-	unsigned char reason = get_nmi_reason();
-	char buf[64];
-
-	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-	die_nmi(buf, regs, 1); /* Always panic here */
-	return 0;
-}
-
 /*
  * proc handler for /proc/sys/kernel/nmi
  */
@@ -540,15 +522,6 @@ int proc_nmi_enabled(struct ctl_table *t
 
 #endif /* CONFIG_SYSCTL */
 
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
-	if (unknown_nmi_panic)
-		return unknown_nmi_panic_callback(regs, cpu);
-#endif
-	return 0;
-}
-
 void arch_trigger_all_cpu_backtrace(void)
 {
 	int i;
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
 static int ignore_nmis;
 
 int unknown_nmi_for_hwerr;
+int unknown_nmi_panic;
 
 /*
  * Prevent NMI reason port (0x61) being accessed simultaneously, can
@@ -308,6 +309,13 @@ gp_in_kernel:
 	die("general protection fault", regs, error_code);
 }
 
+static int __init setup_unknown_nmi_panic(char *str)
+{
+	unknown_nmi_panic = 1;
+	return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
 static notrace __kprobes void
 pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
@@ -374,7 +382,7 @@ unknown_nmi_error(unsigned char reason,
 			reason, smp_processor_id());
 
 	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-	if (panic_on_unrecovered_nmi)
+	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
 
 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -421,12 +429,8 @@ static notrace __kprobes void default_do
 	}
 	raw_spin_unlock(&nmi_reason_lock);
 
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
-	if (nmi_watchdog_tick(regs, reason))
-		return;
-	if (do_nmi_callback(regs, smp_processor_id()))
+	if (nmi_watchdog_tick(regs))
 		return;
-#endif
 
 	if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
 	    == NOTIFY_STOP)

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 4/6] x86, NMI, Rewrite NMI handler
  2010-09-10  2:51 ` [RFC 4/6] x86, NMI, Rewrite NMI handler Huang Ying
@ 2010-09-10 15:56   ` Don Zickus
  2010-09-10 16:03     ` Andi Kleen
  2010-09-13  1:16   ` Robert Richter
  1 sibling, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-10 15:56 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Fri, Sep 10, 2010 at 10:51:03AM +0800, Huang Ying wrote:
> The original NMI handler is quite outdated in many aspects. This patch
> try to fix it.
> 
> In original code, NMI reason io port (0x61) is only processed on
> BSP. This makes it impossible to hot-remove BSP. To solve the issue,
> a raw spinlock is used to make the port can be processed on any CPU.

Do we really want to use a spinlock inside the nmi handler?

I thought the NMIs sent to the io port are only routed to one cpu as
determined by the io-apic?  Is it spread out to other cpus now?

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-10  2:51 ` [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI Huang Ying
@ 2010-09-10 16:02   ` Don Zickus
  2010-09-10 16:19     ` Andi Kleen
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-10 16:02 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

> @@ -349,6 +351,14 @@ io_check_error(unsigned char reason, str
>  static notrace __kprobes void
>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
>  {
> +	/*
> +	 * On some platforms, hardware errors may be notified via
> +	 * unknown NMI
> +	 */
> +	if (unknown_nmi_for_hwerr)
> +		panic("NMI for hardware error without error record: "
> +		      "Not continuing");
> +
>  #ifdef CONFIG_MCA

I'm not sure I agree with this.  I still see PCI SERR's not coming in
through port 0x61 and get routed to unknown_nmi_error.  Not sure we should
just assume that it is an APEI/HEST error and panic the box.

Also all the perf problems we have seen recently have been going through
that path as we slowly try to figure out why we are not catching those
unknown nmis.

I am grasping for straws here, but is there a register that APEI/HEST can
poke to see if it generated the NMI?

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 4/6] x86, NMI, Rewrite NMI handler
  2010-09-10 15:56   ` Don Zickus
@ 2010-09-10 16:03     ` Andi Kleen
  2010-09-10 18:29       ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-10 16:03 UTC (permalink / raw)
  To: Don Zickus; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Fri, 10 Sep 2010 11:56:05 -0400
Don Zickus <dzickus@redhat.com> wrote:

> On Fri, Sep 10, 2010 at 10:51:03AM +0800, Huang Ying wrote:
> > The original NMI handler is quite outdated in many aspects. This
> > patch try to fix it.
> > 
> > In original code, NMI reason io port (0x61) is only processed on
> > BSP. This makes it impossible to hot-remove BSP. To solve the issue,
> > a raw spinlock is used to make the port can be processed on any CPU.
> 
> Do we really want to use a spinlock inside the nmi handler?

As long as it's only between CPUs
(that is only ever used between different NMI handlers) 
that's fine.  It's certainly safer than having races between CPUs.

> I thought the NMIs sent to the io port are only routed to one cpu as
> determined by the io-apic?  Is it spread out to other cpus now?

There can be cases where it can happen I believe.

-Andi
 
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 6/6] x86, NMI, Remove do_nmi_callback logic
  2010-09-10  2:51 ` [RFC 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying
@ 2010-09-10 16:13   ` Don Zickus
  2010-09-13  2:27     ` Huang Ying
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-10 16:13 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Fri, Sep 10, 2010 at 10:51:05AM +0800, Huang Ying wrote:
>  
> +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> +extern int nmi_watchdog_tick(struct pt_regs *regs);
> +#else
> +static inline int nmi_watchdog_tick(struct pt_regs *regs)
> +{
> +	return 0;
> +}
> +#endif
> +
>  extern atomic_t nmi_active;
>  extern unsigned int nmi_watchdog;
>  #define NMI_NONE	0

<snip>

> @@ -421,12 +429,8 @@ static notrace __kprobes void default_do
>  	}
>  	raw_spin_unlock(&nmi_reason_lock);
>  
> -#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> -	if (nmi_watchdog_tick(regs, reason))
> -		return;
> -	if (do_nmi_callback(regs, smp_processor_id()))
> +	if (nmi_watchdog_tick(regs))
>  		return;
> -#endif
>  
>  	if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
>  	    == NOTIFY_STOP)

I wonder if these two chunks are going to confuse people when they read
the code.  The old nmi watchdog exists in the arch/x86 area but the new
nmi watchdog code is now in kernel/watchdog.c.

If someone sees nmi_watchdog_tick() here will they assume the nmi watchdog
code is still inside arch/x86?

I would suggest keep it wrapped with CONFIG_LOCKUP_DETECTOR to make it
obvious.  Thoughts?

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-10 16:02   ` Don Zickus
@ 2010-09-10 16:19     ` Andi Kleen
  2010-09-10 18:40       ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-10 16:19 UTC (permalink / raw)
  To: Don Zickus; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel


> I am grasping for straws here, but is there a register that APEI/HEST
> can poke to see if it generated the NMI?

HEST knows this yes.

But this is not about HEST errors, but about those without HEST
handling.

-Andi


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 4/6] x86, NMI, Rewrite NMI handler
  2010-09-10 16:03     ` Andi Kleen
@ 2010-09-10 18:29       ` Don Zickus
  2010-09-13  2:09         ` Huang Ying
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-10 18:29 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Fri, Sep 10, 2010 at 06:03:56PM +0200, Andi Kleen wrote:
> On Fri, 10 Sep 2010 11:56:05 -0400
> Don Zickus <dzickus@redhat.com> wrote:
> 
> > On Fri, Sep 10, 2010 at 10:51:03AM +0800, Huang Ying wrote:
> > > The original NMI handler is quite outdated in many aspects. This
> > > patch try to fix it.
> > > 
> > > In original code, NMI reason io port (0x61) is only processed on
> > > BSP. This makes it impossible to hot-remove BSP. To solve the issue,
> > > a raw spinlock is used to make the port can be processed on any CPU.
> > 
> > Do we really want to use a spinlock inside the nmi handler?
> 
> As long as it's only between CPUs
> (that is only ever used between different NMI handlers) 
> that's fine.  It's certainly safer than having races between CPUs.
> 
> > I thought the NMIs sent to the io port are only routed to one cpu as
> > determined by the io-apic?  Is it spread out to other cpus now?
> 
> There can be cases where it can happen I believe.

The reason I asked was, I thought it would be easier to have a global
variable that tells the nmi handler which cpu has the NMI's routed to its
io port.  This way if you want to swap out the bsp cpu, you could perhaps
just re-route the nmi to a new cpu and the global variable would be
updated accordingly?

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-10 16:19     ` Andi Kleen
@ 2010-09-10 18:40       ` Don Zickus
  2010-09-13  2:19         ` Huang Ying
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-10 18:40 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Fri, Sep 10, 2010 at 06:19:29PM +0200, Andi Kleen wrote:
> 
> > I am grasping for straws here, but is there a register that APEI/HEST
> > can poke to see if it generated the NMI?
> 
> HEST knows this yes.
> 
> But this is not about HEST errors, but about those without HEST
> handling.

Don't most unknown NMIs fall into the same boat, that they were not being
handled properly?

On the other hand could you use the die_notifier_chain(DIE_UNKNOWNNMI) for
the same purpose and keep the unknown_nmi_error() handler a little
cleaner?

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-10  2:51 [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
                   ` (4 preceding siblings ...)
  2010-09-10  2:51 ` [RFC 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying
@ 2010-09-10 20:37 ` Peter Zijlstra
  2010-09-10 22:58   ` H. Peter Anvin
  2010-09-11  8:50   ` Andi Kleen
  2010-09-21 21:48 ` Don Zickus
  6 siblings, 2 replies; 69+ messages in thread
From: Peter Zijlstra @ 2010-09-10 20:37 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Fri, 2010-09-10 at 10:51 +0800, Huang Ying wrote:
> +       return inb(NMI_REASON_PORT);

I've always wondered, where is this magic stuff documented?



^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-10 20:37 ` [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Peter Zijlstra
@ 2010-09-10 22:58   ` H. Peter Anvin
  2010-09-11  8:50   ` Andi Kleen
  1 sibling, 0 replies; 69+ messages in thread
From: H. Peter Anvin @ 2010-09-10 22:58 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Huang Ying, Ingo Molnar, linux-kernel, Andi Kleen

On 09/10/2010 01:37 PM, Peter Zijlstra wrote:
> On Fri, 2010-09-10 at 10:51 +0800, Huang Ying wrote:
>> +       return inb(NMI_REASON_PORT);
> 
> I've always wondered, where is this magic stuff documented?
> 

A lot of the pre-2000 stuff is documented in Ralf Brown's Interrupt
List, and the associated book "The Undocumented PC".  A lot was
originally documented in various now-out-of-print IBM technical manuals.

RBIL unfortunately hasn't been updated since 2000.  The good news is
that post 2000 there are a lot more formal standards out there, but not
collected in one convenient place like that.

	-hpa

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-10 20:37 ` [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Peter Zijlstra
  2010-09-10 22:58   ` H. Peter Anvin
@ 2010-09-11  8:50   ` Andi Kleen
  2010-09-13  1:30     ` Robert Richter
  1 sibling, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-11  8:50 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Fri, 10 Sep 2010 22:37:07 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> On Fri, 2010-09-10 at 10:51 +0800, Huang Ying wrote:
> > +       return inb(NMI_REASON_PORT);
> 
> I've always wondered, where is this magic stuff documented?

In the datasheet of the chipset. But sometimes you need
to dig deeper to get a global picture including all chipsets.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-10  2:51 ` [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error Huang Ying
@ 2010-09-13  1:02   ` Robert Richter
  2010-09-13  2:02     ` Huang Ying
  2010-09-21 23:04   ` Maciej W. Rozycki
  1 sibling, 1 reply; 69+ messages in thread
From: Robert Richter @ 2010-09-13  1:02 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 09.09.10 22:51:02, Huang Ying wrote:
> memory parity error is only valid for IBM PC-AT, newer machine use 7
> bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
> reported via MCE. So corresponding function name and kernel log string
> is changed.
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> ---
>  arch/x86/include/asm/mach_traps.h |    6 +++---
>  arch/x86/kernel/traps.c           |   24 ++++++------------------
>  2 files changed, 9 insertions(+), 21 deletions(-)
> 
> --- a/arch/x86/include/asm/mach_traps.h
> +++ b/arch/x86/include/asm/mach_traps.h
> @@ -9,11 +9,11 @@
>  
>  #define NMI_REASON_PORT		0x61
>  
> -#define NMI_REASON_MEMPAR	0x80
> +#define NMI_REASON_SERR		0x80
>  #define NMI_REASON_IOCHK	0x40
> -#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
> +#define NMI_REASON_MASK		(NMI_REASON_SERR | NMI_REASON_IOCHK)
>  
> -#define NMI_REASON_CLEAR_MEMPAR	0x04
> +#define NMI_REASON_CLEAR_SERR	0x04
>  #define NMI_REASON_CLEAR_IOCHK	0x08
>  #define NMI_REASON_CLEAR_MASK	0x0f

Haven't you introduced *_MEMPAR with patch 1/6?

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 4/6] x86, NMI, Rewrite NMI handler
  2010-09-10  2:51 ` [RFC 4/6] x86, NMI, Rewrite NMI handler Huang Ying
  2010-09-10 15:56   ` Don Zickus
@ 2010-09-13  1:16   ` Robert Richter
  1 sibling, 0 replies; 69+ messages in thread
From: Robert Richter @ 2010-09-13  1:16 UTC (permalink / raw)
  To: Huang Ying, Peter Zijlstra, Ingo Molnar
  Cc: H. Peter Anvin, linux-kernel, Andi Kleen

On 09.09.10 22:51:03, Huang Ying wrote:
> DIE_NMI_IPI is used to process CPU specific NMI sources, such as perf
> event, oprofile, crash IPI, etc. While DIE_NMI is used to process
> non-CPU-specific NMI sources, such as APEI (ACPI Platform Error
> Interface) GHES (Generic Hardware Error Source), etc.
> 
> DIE_NMI_IPI must be processed before DIE_NMI. For example, perf event
> trigger a NMI on CPU 1, at the same time, APEI GHES trigger another
> NMI on CPU 0. If DIE_NMI is processed before DIE_NMI_IPI, it is
> possible that APEI GHES is processed on CPU 1, while unknown NMI is
> gotten on CPU 0.

Ingo, Peter,

couldn't this be the reason for the regression we are observing with
unhandled nmis?

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-11  8:50   ` Andi Kleen
@ 2010-09-13  1:30     ` Robert Richter
  0 siblings, 0 replies; 69+ messages in thread
From: Robert Richter @ 2010-09-13  1:30 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Peter Zijlstra, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On 11.09.10 04:50:01, Andi Kleen wrote:
> On Fri, 10 Sep 2010 22:37:07 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > On Fri, 2010-09-10 at 10:51 +0800, Huang Ying wrote:
> > > +       return inb(NMI_REASON_PORT);
> > 
> > I've always wondered, where is this magic stuff documented?
> 
> In the datasheet of the chipset. But sometimes you need
> to dig deeper to get a global picture including all chipsets.

It is also here:

 http://support.amd.com/us/Embedded_TechDocs/43009_sb7xx_rrg_pub_1.00.pdf
 2.3.3.1.1 IO-Mapped Control Registers

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-13  1:02   ` Robert Richter
@ 2010-09-13  2:02     ` Huang Ying
  2010-09-16  8:18       ` Robert Richter
  0 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-13  2:02 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Mon, 2010-09-13 at 09:02 +0800, Robert Richter wrote:
> On 09.09.10 22:51:02, Huang Ying wrote:
> > memory parity error is only valid for IBM PC-AT, newer machine use 7
> > bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
> > reported via MCE. So corresponding function name and kernel log string
> > is changed.
> > 
> > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > ---
> >  arch/x86/include/asm/mach_traps.h |    6 +++---
> >  arch/x86/kernel/traps.c           |   24 ++++++------------------
> >  2 files changed, 9 insertions(+), 21 deletions(-)
> > 
> > --- a/arch/x86/include/asm/mach_traps.h
> > +++ b/arch/x86/include/asm/mach_traps.h
> > @@ -9,11 +9,11 @@
> >  
> >  #define NMI_REASON_PORT		0x61
> >  
> > -#define NMI_REASON_MEMPAR	0x80
> > +#define NMI_REASON_SERR		0x80
> >  #define NMI_REASON_IOCHK	0x40
> > -#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
> > +#define NMI_REASON_MASK		(NMI_REASON_SERR | NMI_REASON_IOCHK)
> >  
> > -#define NMI_REASON_CLEAR_MEMPAR	0x04
> > +#define NMI_REASON_CLEAR_SERR	0x04
> >  #define NMI_REASON_CLEAR_IOCHK	0x08
> >  #define NMI_REASON_CLEAR_MASK	0x0f
> 
> Haven't you introduced *_MEMPAR with patch 1/6?

Yes. And I suggest to rename it to *_SERR in this patch to reflect the
contemporary hardware better. I think they are two steps: naming the
magic number and changing the name of constant and function.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 4/6] x86, NMI, Rewrite NMI handler
  2010-09-10 18:29       ` Don Zickus
@ 2010-09-13  2:09         ` Huang Ying
  2010-09-13 14:04           ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-13  2:09 UTC (permalink / raw)
  To: Don Zickus; +Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel

On Sat, 2010-09-11 at 02:29 +0800, Don Zickus wrote:
> On Fri, Sep 10, 2010 at 06:03:56PM +0200, Andi Kleen wrote:
> > On Fri, 10 Sep 2010 11:56:05 -0400
> > Don Zickus <dzickus@redhat.com> wrote:
> > 
> > > On Fri, Sep 10, 2010 at 10:51:03AM +0800, Huang Ying wrote:
> > > > The original NMI handler is quite outdated in many aspects. This
> > > > patch try to fix it.
> > > > 
> > > > In original code, NMI reason io port (0x61) is only processed on
> > > > BSP. This makes it impossible to hot-remove BSP. To solve the issue,
> > > > a raw spinlock is used to make the port can be processed on any CPU.
> > > 
> > > Do we really want to use a spinlock inside the nmi handler?
> > 
> > As long as it's only between CPUs
> > (that is only ever used between different NMI handlers) 
> > that's fine.  It's certainly safer than having races between CPUs.
> > 
> > > I thought the NMIs sent to the io port are only routed to one cpu as
> > > determined by the io-apic?  Is it spread out to other cpus now?
> > 
> > There can be cases where it can happen I believe.
> 
> The reason I asked was, I thought it would be easier to have a global
> variable that tells the nmi handler which cpu has the NMI's routed to its
> io port.  This way if you want to swap out the bsp cpu, you could perhaps
> just re-route the nmi to a new cpu and the global variable would be
> updated accordingly?

Then we need some kind of protection or race condition between
re-routing NMI and updating the variable. Do you think so?

Best Regards,
Huang Ying 


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-10 18:40       ` Don Zickus
@ 2010-09-13  2:19         ` Huang Ying
  2010-09-13 14:11           ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-13  2:19 UTC (permalink / raw)
  To: Don Zickus; +Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel

On Sat, 2010-09-11 at 02:40 +0800, Don Zickus wrote:
> On Fri, Sep 10, 2010 at 06:19:29PM +0200, Andi Kleen wrote:
> > 
> > > I am grasping for straws here, but is there a register that APEI/HEST
> > > can poke to see if it generated the NMI?
> > 
> > HEST knows this yes.
> > 
> > But this is not about HEST errors, but about those without HEST
> > handling.
> 
> Don't most unknown NMIs fall into the same boat, that they were not being
> handled properly?

As far as I know, at least on some platforms, unknown NMIs are used for
hardware error reporting. They will cause "Blue Screen" in Windows.

> On the other hand could you use the die_notifier_chain(DIE_UNKNOWNNMI) for
> the same purpose and keep the unknown_nmi_error() handler a little
> cleaner?

I think explicit function call has better readability than notifier
chain.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 6/6] x86, NMI, Remove do_nmi_callback logic
  2010-09-10 16:13   ` Don Zickus
@ 2010-09-13  2:27     ` Huang Ying
  2010-09-13  6:24       ` Ingo Molnar
  0 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-13  2:27 UTC (permalink / raw)
  To: Don Zickus; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Sat, 2010-09-11 at 00:13 +0800, Don Zickus wrote:
> On Fri, Sep 10, 2010 at 10:51:05AM +0800, Huang Ying wrote:
> >  
> > +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> > +extern int nmi_watchdog_tick(struct pt_regs *regs);
> > +#else
> > +static inline int nmi_watchdog_tick(struct pt_regs *regs)
> > +{
> > +	return 0;
> > +}
> > +#endif
> > +
> >  extern atomic_t nmi_active;
> >  extern unsigned int nmi_watchdog;
> >  #define NMI_NONE	0
> 
> <snip>
> 
> > @@ -421,12 +429,8 @@ static notrace __kprobes void default_do
> >  	}
> >  	raw_spin_unlock(&nmi_reason_lock);
> >  
> > -#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> > -	if (nmi_watchdog_tick(regs, reason))
> > -		return;
> > -	if (do_nmi_callback(regs, smp_processor_id()))
> > +	if (nmi_watchdog_tick(regs))
> >  		return;
> > -#endif
> >  
> >  	if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
> >  	    == NOTIFY_STOP)
> 
> I wonder if these two chunks are going to confuse people when they read
> the code.  The old nmi watchdog exists in the arch/x86 area but the new
> nmi watchdog code is now in kernel/watchdog.c.
> 
> If someone sees nmi_watchdog_tick() here will they assume the nmi watchdog
> code is still inside arch/x86?
> 
> I would suggest keep it wrapped with CONFIG_LOCKUP_DETECTOR to make it
> obvious.  Thoughts?

Is it planned to remove old NMI watchdog implementation in near future?

If it is not, I think it is better to document/comment the two
implementation more explicitly, maybe in nmi_watchdog_tick() comments. I
suspect nmi_watchdog_tick() wrapped with "!
defined(CONFIG_LOCKUP_DETECTOR)" will make people more confused.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 6/6] x86, NMI, Remove do_nmi_callback logic
  2010-09-13  2:27     ` Huang Ying
@ 2010-09-13  6:24       ` Ingo Molnar
  0 siblings, 0 replies; 69+ messages in thread
From: Ingo Molnar @ 2010-09-13  6:24 UTC (permalink / raw)
  To: Huang Ying; +Cc: Don Zickus, H. Peter Anvin, linux-kernel, Andi Kleen


* Huang Ying <ying.huang@intel.com> wrote:

> On Sat, 2010-09-11 at 00:13 +0800, Don Zickus wrote:
> > On Fri, Sep 10, 2010 at 10:51:05AM +0800, Huang Ying wrote:
> > >  
> > > +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> > > +extern int nmi_watchdog_tick(struct pt_regs *regs);
> > > +#else
> > > +static inline int nmi_watchdog_tick(struct pt_regs *regs)
> > > +{
> > > +	return 0;
> > > +}
> > > +#endif
> > > +
> > >  extern atomic_t nmi_active;
> > >  extern unsigned int nmi_watchdog;
> > >  #define NMI_NONE	0
> > 
> > <snip>
> > 
> > > @@ -421,12 +429,8 @@ static notrace __kprobes void default_do
> > >  	}
> > >  	raw_spin_unlock(&nmi_reason_lock);
> > >  
> > > -#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> > > -	if (nmi_watchdog_tick(regs, reason))
> > > -		return;
> > > -	if (do_nmi_callback(regs, smp_processor_id()))
> > > +	if (nmi_watchdog_tick(regs))
> > >  		return;
> > > -#endif
> > >  
> > >  	if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
> > >  	    == NOTIFY_STOP)
> > 
> > I wonder if these two chunks are going to confuse people when they read
> > the code.  The old nmi watchdog exists in the arch/x86 area but the new
> > nmi watchdog code is now in kernel/watchdog.c.
> > 
> > If someone sees nmi_watchdog_tick() here will they assume the nmi watchdog
> > code is still inside arch/x86?
> > 
> > I would suggest keep it wrapped with CONFIG_LOCKUP_DETECTOR to make it
> > obvious.  Thoughts?
> 
> Is it planned to remove old NMI watchdog implementation in near 
> future?

Yes. With the new Pentium4 PMU driver we've covered the last 
old-NMI-watchdog hardware support corner too, so we can and should 
remove the old code. (keeping the boot option to stay compatible)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 4/6] x86, NMI, Rewrite NMI handler
  2010-09-13  2:09         ` Huang Ying
@ 2010-09-13 14:04           ` Don Zickus
  2010-09-14  5:12             ` Huang Ying
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-13 14:04 UTC (permalink / raw)
  To: Huang Ying; +Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, Sep 13, 2010 at 10:09:30AM +0800, Huang Ying wrote:
> > The reason I asked was, I thought it would be easier to have a global
> > variable that tells the nmi handler which cpu has the NMI's routed to its
> > io port.  This way if you want to swap out the bsp cpu, you could perhaps
> > just re-route the nmi to a new cpu and the global variable would be
> > updated accordingly?
> 
> Then we need some kind of protection or race condition between
> re-routing NMI and updating the variable. Do you think so?

Well, I thought the only reasonable place to update the variable is when
the cpu is being taken offline, during the MTRR update.  Since no NMIs can
be processed when the cpu's are syncing their MTRR, there shouldn't be a
race condition, no?

Then again I am probably missing something obvious.  Like I don't know how
cpu's deal with interrupts/NMIs when they are going offline.

It was just a thought to avoid the spinlock.

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13  2:19         ` Huang Ying
@ 2010-09-13 14:11           ` Don Zickus
  2010-09-13 15:24             ` Andi Kleen
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-13 14:11 UTC (permalink / raw)
  To: Huang Ying; +Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, Sep 13, 2010 at 10:19:49AM +0800, Huang Ying wrote:
> On Sat, 2010-09-11 at 02:40 +0800, Don Zickus wrote:
> > On Fri, Sep 10, 2010 at 06:19:29PM +0200, Andi Kleen wrote:
> > > 
> > > > I am grasping for straws here, but is there a register that APEI/HEST
> > > > can poke to see if it generated the NMI?
> > > 
> > > HEST knows this yes.
> > > 
> > > But this is not about HEST errors, but about those without HEST
> > > handling.
> > 
> > Don't most unknown NMIs fall into the same boat, that they were not being
> > handled properly?
> 
> As far as I know, at least on some platforms, unknown NMIs are used for
> hardware error reporting. They will cause "Blue Screen" in Windows.

Unfortunately, most of the bugzillas I deal with, unkown NMIs are the
result of SERRs.  While you can consider that hardware error reporting,
the easiest way for me to debug those problems currently is to have
reporters run 'lspci -vvv' after the NMI is displayed to figure out who
caused the NMI.

My fear is that panic'ing the box on unknown NMIs on those platforms will
hinder my ability to easily debug those NMIs.

> 
> > On the other hand could you use the die_notifier_chain(DIE_UNKNOWNNMI) for
> > the same purpose and keep the unknown_nmi_error() handler a little
> > cleaner?
> 
> I think explicit function call has better readability than notifier
> chain.

Ok.  What criteria should we establish to determine which functions go on
the notifier chain and which ones can explicitly called?

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 14:11           ` Don Zickus
@ 2010-09-13 15:24             ` Andi Kleen
  2010-09-13 15:47               ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-13 15:24 UTC (permalink / raw)
  To: Don Zickus; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

Don,

> Unfortunately, most of the bugzillas I deal with, unkown NMIs are the
> result of SERRs.  While you can consider that hardware error
> reporting, the easiest way for me to debug those problems currently
> is to have reporters run 'lspci -vvv' after the NMI is displayed to
> figure out who caused the NMI.
> 
> My fear is that panic'ing the box on unknown NMIs on those platforms
> will hinder my ability to easily debug those NMIs.

The reason the NMI is sent is that there is a "lost" 
data corruption somewhere in the system and if you don't 
stop it the system the corruption may make it to disk,
become permanent etc. The hardware was designed
under the assumption that  the system is stopped by software
when this happens (same reason as why many machine
checks cause panics)

Then there isn't necessarily something to "debug": data corruption
can happen without any bugs being around (and in fact
that's the common case, assuming production systems)

So I'm not sure what you're debugging here. Are you being the support
technician for the system through bugzilla? That sounds
inefficient.

Anyways for hardware support we could probably dump some
more information at panic or better through error
serialization, but the word "debug" is IMHO an very wrong
way to think about that.

If this is about driver debugging it's entirely reasonable
to have a special setting (e.g. disable the panic), 
but the defaults should be set in a way to avoid
spreading data corruption,.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 15:24             ` Andi Kleen
@ 2010-09-13 15:47               ` Don Zickus
  2010-09-13 16:57                 ` Andi Kleen
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-13 15:47 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, Sep 13, 2010 at 05:24:38PM +0200, Andi Kleen wrote:
> 
> Don,
> 
> > Unfortunately, most of the bugzillas I deal with, unkown NMIs are the
> > result of SERRs.  While you can consider that hardware error
> > reporting, the easiest way for me to debug those problems currently
> > is to have reporters run 'lspci -vvv' after the NMI is displayed to
> > figure out who caused the NMI.
> > 
> > My fear is that panic'ing the box on unknown NMIs on those platforms
> > will hinder my ability to easily debug those NMIs.
> 
> 
> The reason the NMI is sent is that there is a "lost" 
> data corruption somewhere in the system and if you don't 
> stop it the system the corruption may make it to disk,
> become permanent etc. The hardware was designed
> under the assumption that  the system is stopped by software
> when this happens (same reason as why many machine
> checks cause panics)

Yeah, I know. I was being too ignorant perhaps.

> 
> Then there isn't necessarily something to "debug": data corruption
> can happen without any bugs being around (and in fact
> that's the common case, assuming production systems)
> 
> So I'm not sure what you're debugging here. Are you being the support
> technician for the system through bugzilla? That sounds
> inefficient.

The problem I repeatedly deal with for RHEL systems is a customer sees an
unknown NMI printed on their screen and sometimes the machine falls apart
shortly after, sometimes it doesn't.  Obviously they are going to file a
bug asking why.  A chunk of the problems are bad hardware/firmware.  But
the problem is which one.

Replacing a slot card is easy, replacing a motherboard is not.  So I
usually try to determine which device is failing by walking the pci bus
and looking for the serr bits or some of the pci-e status bits.

It is inefficient, but I haven't had time to figure out a way to clean it
up.  And just for the record, I usually see an unknown NMI report every
other week.

> 
> Anyways for hardware support we could probably dump some
> more information at panic or better through error
> serialization, but the word "debug" is IMHO an very wrong
> way to think about that.

Well, I can use 'diagnos' or 'determine' if you want.  But at the end of
the day, we have customers that see scary software messages and expect us
to give them reasonable direction to fix their problems.

> 
> If this is about driver debugging it's entirely reasonable
> to have a special setting (e.g. disable the panic), 
> but the defaults should be set in a way to avoid
> spreading data corruption,.

Ok.  I can accept that.

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 15:47               ` Don Zickus
@ 2010-09-13 16:57                 ` Andi Kleen
  2010-09-13 17:53                   ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-13 16:57 UTC (permalink / raw)
  To: Don Zickus; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, 13 Sep 2010 11:47:50 -0400
Don Zickus <dzickus@redhat.com> wrote:

> 
> > 
> > Then there isn't necessarily something to "debug": data corruption
> > can happen without any bugs being around (and in fact
> > that's the common case, assuming production systems)
> > 
> > So I'm not sure what you're debugging here. Are you being the
> > support technician for the system through bugzilla? That sounds
> > inefficient.
> 
> The problem I repeatedly deal with for RHEL systems is a customer
> sees an unknown NMI printed on their screen and sometimes the machine
> falls apart shortly after, sometimes it doesn't.  Obviously they are
> going to file a bug asking why.  A chunk of the problems are bad
> hardware/firmware.  But the problem is which one.

NMIs are usually hardware.

BTW one big issue here is that we don't display anything
on the screen so the system seems hung. KMS solves this,
but unfortunately not for the video chipsets 
often used in servers.

Part of it is solved by serializing the error
and defaulting to reboot after panic (currently NMI doesn't do that,
MCE already does, NMI should too imho) 

> 
> Replacing a slot card is easy, replacing a motherboard is not.  So I
> usually try to determine which device is failing by walking the pci
> bus and looking for the serr bits or some of the pci-e status bits.

You don't necessarily need to replace anything, it could
be just unlucky data corruption (e.g. a big enough cosmic ray
that flipped enough bits that the normal error correction
couldn't fix it anymore)

> 
> It is inefficient, but I haven't had time to figure out a way to
> clean it up.  And just for the record, I usually see an unknown NMI
> report every other week.

At least ignoring the data corruption is not the way to handle
it. I don't think you'll do your customers a favor this way.

> > Anyways for hardware support we could probably dump some
> > more information at panic or better through error
> > serialization, but the word "debug" is IMHO an very wrong
> > way to think about that.
> 
> Well, I can use 'diagnos' or 'determine' if you want.  But at the end
> of the day, we have customers that see scary software messages and
> expect us to give them reasonable direction to fix their problems.

Usually these problems shouldn't be handled by kernel hackers,
it's something for a hardware technician. If kernel
hackers have to handle it something is very wrong.

IMHO the software should give the customer enough information
to fix (or rather let their hardware technician) work it out.

If it's not good enough for this we have to improve it. But
ignoring the errors is not the way to do that.

BTW one issue is that the screen is not big enough for all
the information that is really useful for this. So I suspect
to have it really useful you need to accept that some information
will only be available through serialization (e.g. if you 
wanted to dump parts of the PCI config space)

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 16:57                 ` Andi Kleen
@ 2010-09-13 17:53                   ` Don Zickus
  2010-09-13 18:07                     ` Andi Kleen
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-13 17:53 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, Sep 13, 2010 at 06:57:21PM +0200, Andi Kleen wrote:
> On Mon, 13 Sep 2010 11:47:50 -0400
> Don Zickus <dzickus@redhat.com> wrote:
> 
> > 
> > > 
> > > Then there isn't necessarily something to "debug": data corruption
> > > can happen without any bugs being around (and in fact
> > > that's the common case, assuming production systems)
> > > 
> > > So I'm not sure what you're debugging here. Are you being the
> > > support technician for the system through bugzilla? That sounds
> > > inefficient.
> > 
> > The problem I repeatedly deal with for RHEL systems is a customer
> > sees an unknown NMI printed on their screen and sometimes the machine
> > falls apart shortly after, sometimes it doesn't.  Obviously they are
> > going to file a bug asking why.  A chunk of the problems are bad
> > hardware/firmware.  But the problem is which one.
> 
> NMIs are usually hardware.
> 
> BTW one big issue here is that we don't display anything
> on the screen so the system seems hung. KMS solves this,
> but unfortunately not for the video chipsets 
> often used in servers.

No most of our customer see messages being sent to the console or serial
part.  I haven't seen KMS hiding the info yet.

> 
> Part of it is solved by serializing the error
> and defaulting to reboot after panic (currently NMI doesn't do that,
> MCE already does, NMI should too imho) 
> 
> > 
> > Replacing a slot card is easy, replacing a motherboard is not.  So I
> > usually try to determine which device is failing by walking the pci
> > bus and looking for the serr bits or some of the pci-e status bits.
> 
> You don't necessarily need to replace anything, it could
> be just unlucky data corruption (e.g. a big enough cosmic ray
> that flipped enough bits that the normal error correction
> couldn't fix it anymore)

No, these are easily reproducible NMIs.  So far it they have been the
result of bad firmware (either features that are marked supported but not,
or register settings that changed between updates), nic cards that had
issues, or bad motherboards.

None of these issues went away because of a reboot.

> 
> > 
> > It is inefficient, but I haven't had time to figure out a way to
> > clean it up.  And just for the record, I usually see an unknown NMI
> > report every other week.
> 
> At least ignoring the data corruption is not the way to handle
> it. I don't think you'll do your customers a favor this way.

I never said I ignore them.  We try to resolve them quickly.

>  
> > > Anyways for hardware support we could probably dump some
> > > more information at panic or better through error
> > > serialization, but the word "debug" is IMHO an very wrong
> > > way to think about that.
> > 
> > Well, I can use 'diagnos' or 'determine' if you want.  But at the end
> > of the day, we have customers that see scary software messages and
> > expect us to give them reasonable direction to fix their problems.
> 
> Usually these problems shouldn't be handled by kernel hackers,
> it's something for a hardware technician. If kernel
> hackers have to handle it something is very wrong.
> 
> IMHO the software should give the customer enough information
> to fix (or rather let their hardware technician) work it out.

Yes, I agree, but the hardware folks usually like it when we give them a
better clue than 'hardware is broken'.  Something like the network stopped
working or your storage controller's firmware went bad, is usually more
helpful.

And the thing is, the hardware usually leaves a bread cumb trail of where
things went wrong.  It is just a matter of poking different chips to find
out who generated the error and report that.

> 
> BTW one issue is that the screen is not big enough for all
> the information that is really useful for this. So I suspect
> to have it really useful you need to accept that some information
> will only be available through serialization (e.g. if you 
> wanted to dump parts of the PCI config space)

Honestly, I don't think you need much screen real estate.  It would be
nice when an unknown NMI comes in, if the kernel just pokes around the hardware
registers and display a summary of what it found.  For example,

The following devices had error bits set in the status registers:
PCI device x:y.z - STATUS_BIT1 | STATUS_BIT2
HW device xyz - STATUS_BIT3
...

This at least gives the users some hardware they can remove/replace to see
if the problem goes away.

Right now I feel like it is one giant guessing game.

But I guess if we accept the fact that an unknown NMI will panic the box,
then we can probably be a little more liberal in breaking spinlocks and
poking around the hardware to display some userful info.

Just some thoughts.

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 17:53                   ` Don Zickus
@ 2010-09-13 18:07                     ` Andi Kleen
  2010-09-13 18:23                       ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-13 18:07 UTC (permalink / raw)
  To: Don Zickus; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel


> 
> Honestly, I don't think you need much screen real estate.  It would be
> nice when an unknown NMI comes in, if the kernel just pokes around
> the hardware registers and display a summary of what it found.  For
> example,
> 
> The following devices had error bits set in the status registers:
> PCI device x:y.z - STATUS_BIT1 | STATUS_BIT2
> HW device xyz - STATUS_BIT3
> ...

You mean data from the generic PCI config space?

I don't think i would feel comfortable with arbitrary driver callbacks
(the risk of the driver breaking the panic would be high)

But if it's generic if not on the screen it should
be at least in the error serialization data and logged after boot.

At least on PCI-E it may be enough to simply dump all recent AER
data.

> 
> But I guess if we accept the fact that an unknown NMI will panic the
> box, then we can probably be a little more liberal in breaking
> spinlocks and poking around the hardware to display some userful info.

You have to be a bit careful with that, you may caused nested errors
(e.g. machine checks or more NMIs). I suppose this could be checked for
though.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 18:07                     ` Andi Kleen
@ 2010-09-13 18:23                       ` Don Zickus
  2010-09-13 18:36                         ` Andi Kleen
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-13 18:23 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, Sep 13, 2010 at 08:07:07PM +0200, Andi Kleen wrote:
> 
> > 
> > Honestly, I don't think you need much screen real estate.  It would be
> > nice when an unknown NMI comes in, if the kernel just pokes around
> > the hardware registers and display a summary of what it found.  For
> > example,
> > 
> > The following devices had error bits set in the status registers:
> > PCI device x:y.z - STATUS_BIT1 | STATUS_BIT2
> > HW device xyz - STATUS_BIT3
> > ...
> 
> You mean data from the generic PCI config space?

Yes. I normally just look at the Status register.  With PCI-e I'll look at
the other status registers in the capabilities field too.

> 
> I don't think i would feel comfortable with arbitrary driver callbacks
> (the risk of the driver breaking the panic would be high)

Neither would I.

> 
> But if it's generic if not on the screen it should
> be at least in the error serialization data and logged after boot.

I guess I don't know what that is, 'error serialization data'.  Is there
somewhere I can read more about it?

> 
> At least on PCI-E it may be enough to simply dump all recent AER
> data.

This assumes AER is supported on the bridge?  Which for newer chips is
probably true, but I wasn't sure about older ones.

How would I dump AER data from within the kernel?

> 
> > 
> > But I guess if we accept the fact that an unknown NMI will panic the
> > box, then we can probably be a little more liberal in breaking
> > spinlocks and poking around the hardware to display some userful info.
> 
> You have to be a bit careful with that, you may caused nested errors
> (e.g. machine checks or more NMIs). I suppose this could be checked for
> though.

Of course.

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 18:23                       ` Don Zickus
@ 2010-09-13 18:36                         ` Andi Kleen
  2010-09-13 19:36                           ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-13 18:36 UTC (permalink / raw)
  To: Don Zickus; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel


> > 
> > But if it's generic if not on the screen it should
> > be at least in the error serialization data and logged after boot.
> 
> I guess I don't know what that is, 'error serialization data'.  Is
> there somewhere I can read more about it?

That's already supported in MCE -- saving the error record to NVRAM
and logging it after reboot. NMI should probably do the same.
It's much nicer than getting it from a console.

> > 
> > At least on PCI-E it may be enough to simply dump all recent AER
> > data.
> 
> This assumes AER is supported on the bridge?  Which for newer chips is
> probably true, but I wasn't sure about older ones.

Today's servers should usually have AER at least.

For old systems you only can get the few bits in PCI space.

> How would I dump AER data from within the kernel?

Would need a buffer that is dumped for past events and 
reading the registers for not yet reported. Right now some
infrastructure is needed.


-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 18:36                         ` Andi Kleen
@ 2010-09-13 19:36                           ` Don Zickus
  2010-09-13 20:49                             ` Andi Kleen
  2010-09-14 12:21                             ` Ingo Molnar
  0 siblings, 2 replies; 69+ messages in thread
From: Don Zickus @ 2010-09-13 19:36 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, Sep 13, 2010 at 08:36:54PM +0200, Andi Kleen wrote:
> 
> > > 
> > > But if it's generic if not on the screen it should
> > > be at least in the error serialization data and logged after boot.
> > 
> > I guess I don't know what that is, 'error serialization data'.  Is
> > there somewhere I can read more about it?
> 
> That's already supported in MCE -- saving the error record to NVRAM
> and logging it after reboot. NMI should probably do the same.
> It's much nicer than getting it from a console.

Hmm, that assumes these boxes have NVRAM.  I am not sure if many of the
boxes I see with problems have NVRAM on them.

> 
> > > 
> > > At least on PCI-E it may be enough to simply dump all recent AER
> > > data.
> > 
> > This assumes AER is supported on the bridge?  Which for newer chips is
> > probably true, but I wasn't sure about older ones.
> 
> Today's servers should usually have AER at least.
> 
> For old systems you only can get the few bits in PCI space.
> 
> > How would I dump AER data from within the kernel?
> 
> Would need a buffer that is dumped for past events and 
> reading the registers for not yet reported. Right now some
> infrastructure is needed.

Oh ok.

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 19:36                           ` Don Zickus
@ 2010-09-13 20:49                             ` Andi Kleen
  2010-09-13 21:25                               ` Valdis.Kletnieks
  2010-09-14 12:21                             ` Ingo Molnar
  1 sibling, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-13 20:49 UTC (permalink / raw)
  To: Don Zickus; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, 13 Sep 2010 15:36:55 -0400
Don Zickus <dzickus@redhat.com> wrote:

> On Mon, Sep 13, 2010 at 08:36:54PM +0200, Andi Kleen wrote:
> > 
> > > > 
> > > > But if it's generic if not on the screen it should
> > > > be at least in the error serialization data and logged after
> > > > boot.
> > > 
> > > I guess I don't know what that is, 'error serialization data'.  Is
> > > there somewhere I can read more about it?
> > 
> > That's already supported in MCE -- saving the error record to NVRAM
> > and logging it after reboot. NMI should probably do the same.
> > It's much nicer than getting it from a console.
> 
> Hmm, that assumes these boxes have NVRAM.  I am not sure if many of
> the boxes I see with problems have NVRAM on them.

Practically every PC has a small amount of NVRAM.

-Andi


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 20:49                             ` Andi Kleen
@ 2010-09-13 21:25                               ` Valdis.Kletnieks
  2010-09-14  7:48                                 ` Andi Kleen
  0 siblings, 1 reply; 69+ messages in thread
From: Valdis.Kletnieks @ 2010-09-13 21:25 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Don Zickus, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 885 bytes --]

On Mon, 13 Sep 2010 22:49:52 +0200, Andi Kleen said:

> > > That's already supported in MCE -- saving the error record to NVRAM
> > > and logging it after reboot. NMI should probably do the same.
> > > It's much nicer than getting it from a console.
> > 
> > Hmm, that assumes these boxes have NVRAM.  I am not sure if many of
> > the boxes I see with problems have NVRAM on them.
> 
> Practically every PC has a small amount of NVRAM.

The big question is how much NVRAM the PC has that is safe for our NMI code to
hijack/borrow across the reboot without scrogging something that the BIOS has
squirreled away in there. I recall one patch that saved progress indicators during
early boot or something - but at the expense of stomping on the saved clock
settings or whatever so you rebooted and then you knew where your previos boot
wedged, but your system thought it was 1970 again. 


[-- Attachment #2: Type: application/pgp-signature, Size: 227 bytes --]

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 4/6] x86, NMI, Rewrite NMI handler
  2010-09-13 14:04           ` Don Zickus
@ 2010-09-14  5:12             ` Huang Ying
  2010-09-14 13:37               ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-14  5:12 UTC (permalink / raw)
  To: Don Zickus; +Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, 2010-09-13 at 22:04 +0800, Don Zickus wrote:
> On Mon, Sep 13, 2010 at 10:09:30AM +0800, Huang Ying wrote:
> > > The reason I asked was, I thought it would be easier to have a global
> > > variable that tells the nmi handler which cpu has the NMI's routed to its
> > > io port.  This way if you want to swap out the bsp cpu, you could perhaps
> > > just re-route the nmi to a new cpu and the global variable would be
> > > updated accordingly?
> > 
> > Then we need some kind of protection or race condition between
> > re-routing NMI and updating the variable. Do you think so?
> 
> Well, I thought the only reasonable place to update the variable is when
> the cpu is being taken offline, during the MTRR update.  Since no NMIs can
> be processed when the cpu's are syncing their MTRR, there shouldn't be a
> race condition, no?
> 
> Then again I am probably missing something obvious.  Like I don't know how
> cpu's deal with interrupts/NMIs when they are going offline.
> 
> It was just a thought to avoid the spinlock.

Why do you hate spinlock inside NMI handler? I think it is safe and
simple if only used in NMI handler.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 21:25                               ` Valdis.Kletnieks
@ 2010-09-14  7:48                                 ` Andi Kleen
  2010-09-14 17:54                                   ` Valdis.Kletnieks
  0 siblings, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-14  7:48 UTC (permalink / raw)
  To: Valdis.Kletnieks
  Cc: Don Zickus, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Mon, 13 Sep 2010 17:25:21 -0400
Valdis.Kletnieks@vt.edu wrote:

> > Practically every PC has a small amount of NVRAM.
> 
> The big question is how much NVRAM the PC has that is safe for our
> NMI code to hijack/borrow across the reboot without scrogging
> something that the BIOS has squirreled away in there. I recall one
> patch that saved progress indicators during early boot or something -
> but at the expense of stomping on the saved clock settings or
> whatever so you rebooted and then you knew where your previos boot
> wedged, but your system thought it was 1970 again. 

It's already implemented for MCE and it works on servers.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-13 19:36                           ` Don Zickus
  2010-09-13 20:49                             ` Andi Kleen
@ 2010-09-14 12:21                             ` Ingo Molnar
  2010-09-14 13:45                               ` Don Zickus
  2010-09-14 19:34                               ` Cyrill Gorcunov
  1 sibling, 2 replies; 69+ messages in thread
From: Ingo Molnar @ 2010-09-14 12:21 UTC (permalink / raw)
  To: Don Zickus; +Cc: Andi Kleen, Huang Ying, H. Peter Anvin, linux-kernel


* Don Zickus <dzickus@redhat.com> wrote:

> > > > At least on PCI-E it may be enough to simply dump all recent AER 
> > > > data.
> > > 
> > > This assumes AER is supported on the bridge?  Which for newer 
> > > chips is probably true, but I wasn't sure about older ones.
> > 
> > Today's servers should usually have AER at least.
> > 
> > For old systems you only can get the few bits in PCI space.
> > 
> > > How would I dump AER data from within the kernel?
> > 
> > Would need a buffer that is dumped for past events and reading the 
> > registers for not yet reported. Right now some infrastructure is 
> > needed.
> 
> Oh ok.

The proper approach would be not to add hacks to the NMI code but to 
implement southbridge drivers - which would also have NMI callbacks. 
These are unchartered waters, but variance in that space is reducing 
systematically so it would be worth a shot.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 4/6] x86, NMI, Rewrite NMI handler
  2010-09-14  5:12             ` Huang Ying
@ 2010-09-14 13:37               ` Don Zickus
  0 siblings, 0 replies; 69+ messages in thread
From: Don Zickus @ 2010-09-14 13:37 UTC (permalink / raw)
  To: Huang Ying; +Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel

On Tue, Sep 14, 2010 at 01:12:41PM +0800, Huang Ying wrote:
> On Mon, 2010-09-13 at 22:04 +0800, Don Zickus wrote:
> > On Mon, Sep 13, 2010 at 10:09:30AM +0800, Huang Ying wrote:
> > > > The reason I asked was, I thought it would be easier to have a global
> > > > variable that tells the nmi handler which cpu has the NMI's routed to its
> > > > io port.  This way if you want to swap out the bsp cpu, you could perhaps
> > > > just re-route the nmi to a new cpu and the global variable would be
> > > > updated accordingly?
> > > 
> > > Then we need some kind of protection or race condition between
> > > re-routing NMI and updating the variable. Do you think so?
> > 
> > Well, I thought the only reasonable place to update the variable is when
> > the cpu is being taken offline, during the MTRR update.  Since no NMIs can
> > be processed when the cpu's are syncing their MTRR, there shouldn't be a
> > race condition, no?
> > 
> > Then again I am probably missing something obvious.  Like I don't know how
> > cpu's deal with interrupts/NMIs when they are going offline.
> > 
> > It was just a thought to avoid the spinlock.
> 
> Why do you hate spinlock inside NMI handler? I think it is safe and
> simple if only used in NMI handler.

I guess I always had the mentality that spinlocks in an NMI context was a
big no-no.  Never really thought about if there were safe use-cases or
not.

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-14 12:21                             ` Ingo Molnar
@ 2010-09-14 13:45                               ` Don Zickus
  2010-09-14 19:34                               ` Cyrill Gorcunov
  1 sibling, 0 replies; 69+ messages in thread
From: Don Zickus @ 2010-09-14 13:45 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andi Kleen, Huang Ying, H. Peter Anvin, linux-kernel

On Tue, Sep 14, 2010 at 02:21:31PM +0200, Ingo Molnar wrote:
> 
> * Don Zickus <dzickus@redhat.com> wrote:
> 
> > > > > At least on PCI-E it may be enough to simply dump all recent AER 
> > > > > data.
> > > > 
> > > > This assumes AER is supported on the bridge?  Which for newer 
> > > > chips is probably true, but I wasn't sure about older ones.
> > > 
> > > Today's servers should usually have AER at least.
> > > 
> > > For old systems you only can get the few bits in PCI space.
> > > 
> > > > How would I dump AER data from within the kernel?
> > > 
> > > Would need a buffer that is dumped for past events and reading the 
> > > registers for not yet reported. Right now some infrastructure is 
> > > needed.
> > 
> > Oh ok.
> 
> The proper approach would be not to add hacks to the NMI code but to 
> implement southbridge drivers - which would also have NMI callbacks. 
> These are unchartered waters, but variance in that space is reducing 
> systematically so it would be worth a shot.

Interesting.  I think the only southbridge I see regularly are Intel, AMD
and Nvidia (with Nvidia being more problematic than others).
Unfortunately, getting specs for Nvidia is very difficult.

But that might help narrow down where the NMI problem is.

Cheers,
Don

> 
> Thanks,
> 
> 	Ingo

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-14  7:48                                 ` Andi Kleen
@ 2010-09-14 17:54                                   ` Valdis.Kletnieks
  0 siblings, 0 replies; 69+ messages in thread
From: Valdis.Kletnieks @ 2010-09-14 17:54 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Don Zickus, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 455 bytes --]

On Tue, 14 Sep 2010 09:48:13 +0200, Andi Kleen said:

> It's already implemented for MCE and it works on servers.

I'm OK with snarfing up some NVRAM, as long as we have some way to check "yes,
there are NN bytes of NVRAM architected for our use".

Did I miss the memo where it says "This code will only run on chipsets that
have architected nvram space for this sort of use"?  Wouldn't be the first time
I've misunderstood what code is really doing. ;)

[-- Attachment #2: Type: application/pgp-signature, Size: 227 bytes --]

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-14 12:21                             ` Ingo Molnar
  2010-09-14 13:45                               ` Don Zickus
@ 2010-09-14 19:34                               ` Cyrill Gorcunov
  2010-09-15  9:29                                 ` Ingo Molnar
  1 sibling, 1 reply; 69+ messages in thread
From: Cyrill Gorcunov @ 2010-09-14 19:34 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Don Zickus, Andi Kleen, Huang Ying, H. Peter Anvin, linux-kernel

On Tue, Sep 14, 2010 at 02:21:31PM +0200, Ingo Molnar wrote:
> 
... 
> The proper approach would be not to add hacks to the NMI code but to 
> implement southbridge drivers - which would also have NMI callbacks. 
> These are unchartered waters, but variance in that space is reducing 
> systematically so it would be worth a shot.
> 
> Thanks,
> 
> 	Ingo

Hi Ingo,

while there is a conversation about makeing NMI handler robust/modern or
whatever, I think the naming Huang has implemented for NMI Stat&Ctrl
registers/ports look quite good and convenient (I thought about this times
ago when being merging nmi-32/64 code but didn't implemented it properly).
So I presume perhaps we could merge this snippets first? Or I miss something
on discussion in general?

	-- Cyrill

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-14 19:34                               ` Cyrill Gorcunov
@ 2010-09-15  9:29                                 ` Ingo Molnar
  0 siblings, 0 replies; 69+ messages in thread
From: Ingo Molnar @ 2010-09-15  9:29 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Don Zickus, Andi Kleen, Huang Ying, H. Peter Anvin, linux-kernel


* Cyrill Gorcunov <gorcunov@gmail.com> wrote:

> On Tue, Sep 14, 2010 at 02:21:31PM +0200, Ingo Molnar wrote:
> > 
> ... 
> > The proper approach would be not to add hacks to the NMI code but to 
> > implement southbridge drivers - which would also have NMI callbacks. 
> > These are unchartered waters, but variance in that space is reducing 
> > systematically so it would be worth a shot.
> > 
> > Thanks,
> > 
> > 	Ingo
> 
> Hi Ingo,
> 
> while there is a conversation about makeing NMI handler robust/modern 
> or whatever, I think the naming Huang has implemented for NMI 
> Stat&Ctrl registers/ports look quite good and convenient (I thought 
> about this times ago when being merging nmi-32/64 code but didn't 
> implemented it properly). So I presume perhaps we could merge this 
> snippets first? Or I miss something on discussion in general?

Yeah, i'm waiting for Don to pick out the good ones and send them to me 
after a bit of testing (he's been driving this topic lately). We can 
obviously make some progress.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-13  2:02     ` Huang Ying
@ 2010-09-16  8:18       ` Robert Richter
  2010-09-17  0:08         ` Huang Ying
  0 siblings, 1 reply; 69+ messages in thread
From: Robert Richter @ 2010-09-16  8:18 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 12.09.10 22:02:06, Huang Ying wrote:
> > Haven't you introduced *_MEMPAR with patch 1/6?
> 
> Yes. And I suggest to rename it to *_SERR in this patch to reflect the
> contemporary hardware better. I think they are two steps: naming the
> magic number and changing the name of constant and function.

Hmm, I would rather avoid introducing a name and then immediatly
renaming it again.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-16  8:18       ` Robert Richter
@ 2010-09-17  0:08         ` Huang Ying
  2010-09-17  9:14           ` Robert Richter
  0 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-17  0:08 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Thu, 2010-09-16 at 16:18 +0800, Robert Richter wrote:
> On 12.09.10 22:02:06, Huang Ying wrote:
> > > Haven't you introduced *_MEMPAR with patch 1/6?
> > 
> > Yes. And I suggest to rename it to *_SERR in this patch to reflect the
> > contemporary hardware better. I think they are two steps: naming the
> > magic number and changing the name of constant and function.
> 
> Hmm, I would rather avoid introducing a name and then immediatly
> renaming it again.

I split them into two patches because there is function named as
mem_parity_error already. The 1/6 names constants as *_MEMPAR to make it
consistent with mem_parity_error(). 3/6 rename both the constants,
function name and some string/comments to serr. Do you think this is
more clean?

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-17  0:08         ` Huang Ying
@ 2010-09-17  9:14           ` Robert Richter
  2010-09-19  0:20             ` Huang Ying
  0 siblings, 1 reply; 69+ messages in thread
From: Robert Richter @ 2010-09-17  9:14 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 16.09.10 20:08:25, Huang Ying wrote:
> On Thu, 2010-09-16 at 16:18 +0800, Robert Richter wrote:
> > On 12.09.10 22:02:06, Huang Ying wrote:
> > > > Haven't you introduced *_MEMPAR with patch 1/6?
> > > 
> > > Yes. And I suggest to rename it to *_SERR in this patch to reflect the
> > > contemporary hardware better. I think they are two steps: naming the
> > > magic number and changing the name of constant and function.
> > 
> > Hmm, I would rather avoid introducing a name and then immediatly
> > renaming it again.
> 
> I split them into two patches because there is function named as
> mem_parity_error already. The 1/6 names constants as *_MEMPAR to make it
> consistent with mem_parity_error(). 3/6 rename both the constants,
> function name and some string/comments to serr. Do you think this is
> more clean?

If I am not wrong the only real functional change is to rip out the
edac handler. So, just introduce the final names with your renaming in
patch #1 (and maybe make a comment in the commit message or change
patch order).

-Robert

> 
> Best Regards,
> Huang Ying
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-17  9:14           ` Robert Richter
@ 2010-09-19  0:20             ` Huang Ying
  2010-09-20  8:00               ` Robert Richter
  0 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-19  0:20 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Fri, 2010-09-17 at 17:14 +0800, Robert Richter wrote:
> On 16.09.10 20:08:25, Huang Ying wrote:
> > On Thu, 2010-09-16 at 16:18 +0800, Robert Richter wrote:
> > > On 12.09.10 22:02:06, Huang Ying wrote:
> > > > > Haven't you introduced *_MEMPAR with patch 1/6?
> > > > 
> > > > Yes. And I suggest to rename it to *_SERR in this patch to reflect the
> > > > contemporary hardware better. I think they are two steps: naming the
> > > > magic number and changing the name of constant and function.
> > > 
> > > Hmm, I would rather avoid introducing a name and then immediatly
> > > renaming it again.
> > 
> > I split them into two patches because there is function named as
> > mem_parity_error already. The 1/6 names constants as *_MEMPAR to make it
> > consistent with mem_parity_error(). 3/6 rename both the constants,
> > function name and some string/comments to serr. Do you think this is
> > more clean?
> 
> If I am not wrong the only real functional change is to rip out the
> edac handler. So, just introduce the final names with your renaming in
> patch #1 (and maybe make a comment in the commit message or change
> patch order).

If I merge the 1/6 and 3/6, can you suggest a patch subject, "Add symbol
definition for NMI magic constants and rename memory parity to PCI
SERR"?

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-19  0:20             ` Huang Ying
@ 2010-09-20  8:00               ` Robert Richter
  2010-09-20 12:59                 ` Borislav Petkov
  0 siblings, 1 reply; 69+ messages in thread
From: Robert Richter @ 2010-09-20  8:00 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 18.09.10 20:20:35, Huang Ying wrote:
> > If I am not wrong the only real functional change is to rip out the
> > edac handler. So, just introduce the final names with your renaming in
> > patch #1 (and maybe make a comment in the commit message or change
> > patch order).
> 
> If I merge the 1/6 and 3/6, can you suggest a patch subject, "Add symbol
> definition for NMI magic constants and rename memory parity to PCI
> SERR"?

Yes, maybe we make the edac change a separate one.

Thanks,

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-20  8:00               ` Robert Richter
@ 2010-09-20 12:59                 ` Borislav Petkov
  2010-09-21  0:22                   ` Huang Ying
  0 siblings, 1 reply; 69+ messages in thread
From: Borislav Petkov @ 2010-09-20 12:59 UTC (permalink / raw)
  To: Huang Ying
  Cc: Robert Richter, Ingo Molnar, H. Peter Anvin, linux-kernel,
	Andi Kleen, Doug Thompson

From: Robert Richter <robert.richter@amd.com>
Date: Mon, Sep 20, 2010 at 10:00:10AM +0200

(adding Doug to CC)

> On 18.09.10 20:20:35, Huang Ying wrote:
> > > If I am not wrong the only real functional change is to rip out the
> > > edac handler. So, just introduce the final names with your renaming in
> > > patch #1 (and maybe make a comment in the commit message or change
> > > patch order).
> > 
> > If I merge the 1/6 and 3/6, can you suggest a patch subject, "Add symbol
> > definition for NMI magic constants and rename memory parity to PCI
> > SERR"?
> 
> Yes, maybe we make the edac change a separate one.

What is more, there are a bunch of edac drivers using the PCI SERR nmi
as a means to check for PCI errors so we shouldn't be removing it now,
should we?

-- 
Regards/Gruss,
Boris.

Operating Systems Research Center
Advanced Micro Devices, Inc.


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-20 12:59                 ` Borislav Petkov
@ 2010-09-21  0:22                   ` Huang Ying
  2010-09-21  6:37                     ` Borislav Petkov
  0 siblings, 1 reply; 69+ messages in thread
From: Huang Ying @ 2010-09-21  0:22 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Robert Richter, Ingo Molnar, H. Peter Anvin, linux-kernel,
	Andi Kleen, Doug Thompson

On Mon, 2010-09-20 at 20:59 +0800, Borislav Petkov wrote:
> From: Robert Richter <robert.richter@amd.com>
> Date: Mon, Sep 20, 2010 at 10:00:10AM +0200
> 
> (adding Doug to CC)
> 
> > On 18.09.10 20:20:35, Huang Ying wrote:
> > > > If I am not wrong the only real functional change is to rip out the
> > > > edac handler. So, just introduce the final names with your renaming in
> > > > patch #1 (and maybe make a comment in the commit message or change
> > > > patch order).
> > > 
> > > If I merge the 1/6 and 3/6, can you suggest a patch subject, "Add symbol
> > > definition for NMI magic constants and rename memory parity to PCI
> > > SERR"?
> > 
> > Yes, maybe we make the edac change a separate one.
> 
> What is more, there are a bunch of edac drivers using the PCI SERR nmi
> as a means to check for PCI errors so we shouldn't be removing it now,
> should we?

After checking the source, I found in mem_parity_error (will renamed to
pci_serr_error), edac_atomic_assert_error() is called, which increase
edac_err_assert, edac_err_assert is used in
edac_mc_assert_error_check_and_clear(), which is used in
edac_mc_workq_function for memory error only, not for PCI errors.

Do we really need PCI SERR NMI for memory errors?

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-21  0:22                   ` Huang Ying
@ 2010-09-21  6:37                     ` Borislav Petkov
  2010-09-21 14:08                       ` Doug Thompson
  0 siblings, 1 reply; 69+ messages in thread
From: Borislav Petkov @ 2010-09-21  6:37 UTC (permalink / raw)
  To: Huang Ying
  Cc: Richter, Robert, Ingo Molnar, H. Peter Anvin, linux-kernel,
	Andi Kleen, Doug Thompson, edac-devel

From: Huang Ying <ying.huang@intel.com>
Date: Mon, Sep 20, 2010 at 08:22:28PM -0400

(Forgot to add edac-devel to Cc)

> > What is more, there are a bunch of edac drivers using the PCI SERR nmi
> > as a means to check for PCI errors so we shouldn't be removing it now,
> > should we?
> 
> After checking the source, I found in mem_parity_error (will renamed to
> pci_serr_error), edac_atomic_assert_error() is called, which increase
> edac_err_assert, edac_err_assert is used in
> edac_mc_assert_error_check_and_clear(), which is used in
> edac_mc_workq_function for memory error only, not for PCI errors.

Yes, I suppose the edac part in the mem_parity_error() was originally
meant for memory parity errors. Now, I understand your incentive of
changing that to handle PCI SERR errors but by axing the edac part,
you're practically disabling the mci->edac_check() call for edac
drivers using NMIs for error reporting (I don't know how many do that,
btw...) and almost every edac driver defines that function pointer to a
driver-specific error checking function.

So if there are no more IBM PC-AT machines running Linux out, I
think we can rip out the whole code around edac_err_assert and thus
remove the edac_mc_assert_error_check_and_clear() part from the
edac_mc_workq_function() which would make all edac drivers solely poll
for mem errors.

What do the others think, Doug?

-- 
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-21  6:37                     ` Borislav Petkov
@ 2010-09-21 14:08                       ` Doug Thompson
  0 siblings, 0 replies; 69+ messages in thread
From: Doug Thompson @ 2010-09-21 14:08 UTC (permalink / raw)
  To: Huang Ying, Borislav Petkov
  Cc: RobertRichter, Ingo Molnar, H. Peter Anvin, linux-kernel,
	Andi Kleen, edac-devel

--- On Tue, 9/21/10, Borislav Petkov <borislav.petkov@amd.com> wrote:

> From: Borislav Petkov <borislav.petkov@amd.com>
> Subject: Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
> To: "Huang Ying" <ying.huang@intel.com>
> Cc: "Richter, Robert" <robert.richter@amd.com>, "Ingo Molnar" <mingo@elte.hu>, "H. Peter Anvin" <hpa@zytor.com>, "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>, "Andi Kleen" <andi@firstfloor.org>, "Doug Thompson" <norsk5@yahoo.com>, "edac-devel" <linux-edac@vger.kernel.org>
> Date: Tuesday, September 21, 2010, 12:37 AM
> From: Huang Ying <ying.huang@intel.com>
> Date: Mon, Sep 20, 2010 at 08:22:28PM -0400
> 
> (Forgot to add edac-devel to Cc)

thanks

> 
> > > What is more, there are a bunch of edac drivers
> using the PCI SERR nmi
> > > as a means to check for PCI errors so we
> shouldn't be removing it now,
> > > should we?
> > 
> > After checking the source, I found in mem_parity_error
> (will renamed to
> > pci_serr_error), edac_atomic_assert_error() is called,
> which increase
> > edac_err_assert, edac_err_assert is used in
> > edac_mc_assert_error_check_and_clear(), which is used
> in
> > edac_mc_workq_function for memory error only, not for
> PCI errors.
> 
> Yes, I suppose the edac part in the mem_parity_error() was
> originally
> meant for memory parity errors. Now, I understand your
> incentive of
> changing that to handle PCI SERR errors but by axing the
> edac part,
> you're practically disabling the mci->edac_check() call
> for edac
> drivers using NMIs for error reporting (I don't know how
> many do that,
> btw...) and almost every edac driver defines that function
> pointer to a
> driver-specific error checking function.
> 
> So if there are no more IBM PC-AT machines running Linux
> out, I
> think we can rip out the whole code around edac_err_assert
> and thus
> remove the edac_mc_assert_error_check_and_clear() part from
> the
> edac_mc_workq_function() which would make all edac drivers
> solely poll
> for mem errors.
> 
> What do the others think, Doug?

History lesson:

The addition of PCI bus ERROR scanning was added to EDAC when we (at Linux Networx, 2005 timeframe) discovered a bad PCI riser card. Due to bad manufacturing, it would cause PCI bus errors during transfers. We determined that by scanning the PCI bus, we could isolate bad cards and verify proper PCI bus operation on those cards and other PCI devices.

Once that was in place, we found other systems that also had PCI bus errors, though less frequently than our original system. A handful of errors were discovered and we found that not many systems were handling any of them, nor even reporting them. Hence the PCI bus scanner of EDAC.  I profiled it, and it took on average 2500 TSC cycles to perform a complete bus scan which occurred every second. So it was an expensive operation. That is why I added controls to turn off PCI bus scanning if desired.

Memory parity errors via NMI was NOT the prime reason it was added to EDAC. Looking at SERR (or any PCI bus error) was the prime reason it was added. There was a patch to better handle NMI event handling better, but didn't get pushed upstream.'

I have not kept up with the kernel PCI SERR error handling code development else (if any) and don't fully know its features nor current operation.

If it is determined that EDAC no longer needs to scan the PCI bus because some other system is now managing that error checking operation, great!  

It was a wart in a way, hanging in the memory driver. I really wanted to revisit that and either set it up as a separate driver or embedded in the EDAC core or something.

doug thompson

> 
> -- 
> Regards/Gruss,
> Boris.
> 
> Advanced Micro Devices GmbH
> Einsteinring 24, 85609 Dornach
> General Managers: Alberto Bozzo, Andrew Bowd
> Registration: Dornach, Gemeinde Aschheim, Landkreis
> Muenchen
> Registergericht Muenchen, HRB Nr. 43632
> 
> 

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-10  2:51 [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
                   ` (5 preceding siblings ...)
  2010-09-10 20:37 ` [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Peter Zijlstra
@ 2010-09-21 21:48 ` Don Zickus
  2010-09-21 22:19   ` Andi Kleen
  2010-09-23  9:51   ` huang ying
  6 siblings, 2 replies; 69+ messages in thread
From: Don Zickus @ 2010-09-21 21:48 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Fri, Sep 10, 2010 at 10:51:00AM +0800, Huang Ying wrote:
> Replace the NMI related magic numbers with symbol constants.

Hi Huang,

Sorry for disappearing for a week..

Ingo asked me to shepherd these patches.  I finally got around to do some
testing on them.  I'll do some more tomorrow.

Anyway, I don't have a problem with patches 1-3 and 6 (I guess the rename
and rename again doesn't really bother me and it kinda makes some logical
sense).

I am ok with most of patch 4 but I was wondering if you could split out
the part of using other cpus to access the reason register.  To me it seem
like the nmi handler rewrite and allowing !bsp cpus to access the reason
registers were two different ideas.  For bisecting reasons it would be
easier to seperate them in case we have problems with lost NMIs later.  It
would be easier to determine if the lost NMIs were from the rewrite or the
migration of the reason register to other cpus.

I still have a stupid hangup about the raw_spin_lock but if no one else
has any issues, then I'll just shutup about it. :-)

As for patch 5, I am worried about breaking existing user systems.  I went
through the fedora buglist and noticed a couple dozen bugzillas
complaining about unknown nmis.  The people complaining still seemed to
have functioning systems (at least they seemed to think so).  Adding in
the panic gets me worried that we might break a user's setup and cause
them regressions.

Though I understand what Andi is saying an unknown NMI is bad and the
system should panic, but on the other hand, unless we have a way of
analyzing it and give a user an option to either fix it or override it,
just panicing may not be the best way right now IMO.

I guess adding either another knob to override the hardware error option
or tying it in with the panic_on_unknown_error option might make me more
comfortable.  That way enterprise customers can always just enable it by
default and desktop users (for now) could have it off.

Thoughts?

Cheers,
Don
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> ---
>  arch/x86/include/asm/mach_traps.h |   12 +++++++++++-
>  arch/x86/kernel/traps.c           |   18 +++++++++---------
>  2 files changed, 20 insertions(+), 10 deletions(-)
> 
> --- a/arch/x86/include/asm/mach_traps.h
> +++ b/arch/x86/include/asm/mach_traps.h
> @@ -7,9 +7,19 @@
>  
>  #include <asm/mc146818rtc.h>
>  
> +#define NMI_REASON_PORT		0x61
> +
> +#define NMI_REASON_MEMPAR	0x80
> +#define NMI_REASON_IOCHK	0x40
> +#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
> +
> +#define NMI_REASON_CLEAR_MEMPAR	0x04
> +#define NMI_REASON_CLEAR_IOCHK	0x08
> +#define NMI_REASON_CLEAR_MASK	0x0f
> +
>  static inline unsigned char get_nmi_reason(void)
>  {
> -	return inb(0x61);
> +	return inb(NMI_REASON_PORT);
>  }
>  
>  static inline void reassert_nmi(void)
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -323,8 +323,8 @@ mem_parity_error(unsigned char reason, s
>  	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
>  
>  	/* Clear and disable the memory parity error line. */
> -	reason = (reason & 0xf) | 4;
> -	outb(reason, 0x61);
> +	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_MEMPAR;
> +	outb(reason, NMI_REASON_PORT);
>  }
>  
>  static notrace __kprobes void
> @@ -339,15 +339,15 @@ io_check_error(unsigned char reason, str
>  		panic("NMI IOCK error: Not continuing");
>  
>  	/* Re-enable the IOCK line, wait for a few seconds */
> -	reason = (reason & 0xf) | 8;
> -	outb(reason, 0x61);
> +	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
> +	outb(reason, NMI_REASON_PORT);
>  
>  	i = 2000;
>  	while (--i)
>  		udelay(1000);
>  
> -	reason &= ~8;
> -	outb(reason, 0x61);
> +	reason &= ~NMI_REASON_CLEAR_IOCHK;
> +	outb(reason, NMI_REASON_PORT);
>  }
>  
>  static notrace __kprobes void
> @@ -388,7 +388,7 @@ static notrace __kprobes void default_do
>  	if (!cpu)
>  		reason = get_nmi_reason();
>  
> -	if (!(reason & 0xc0)) {
> +	if (!(reason & NMI_REASON_MASK)) {
>  		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
>  								== NOTIFY_STOP)
>  			return;
> @@ -418,9 +418,9 @@ static notrace __kprobes void default_do
>  		return;
>  
>  	/* AK: following checks seem to be broken on modern chipsets. FIXME */
> -	if (reason & 0x80)
> +	if (reason & NMI_REASON_MEMPAR)
>  		mem_parity_error(reason, regs);
> -	if (reason & 0x40)
> +	if (reason & NMI_REASON_IOCHK)
>  		io_check_error(reason, regs);
>  #ifdef CONFIG_X86_32
>  	/*
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-21 21:48 ` Don Zickus
@ 2010-09-21 22:19   ` Andi Kleen
  2010-09-22 16:07     ` Don Zickus
  2010-09-23  9:51   ` huang ying
  1 sibling, 1 reply; 69+ messages in thread
From: Andi Kleen @ 2010-09-21 22:19 UTC (permalink / raw)
  To: Don Zickus
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen


>
> I guess adding either another knob to override the hardware error option
> or tying it in with the panic_on_unknown_error option might make me more
> comfortable.  That way enterprise customers can always just enable it by
> default and desktop users (for now) could have it off.

Anything that needs explicit enabling is a bad idea, that
would lead to a lot of users running in "corrupt my data" mode.

The code currently uses the presence of a HEST error table
to detect a server. HEST should be only available on servers.

On servers at least panic should be default.

-Andi



^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-10  2:51 ` [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error Huang Ying
  2010-09-13  1:02   ` Robert Richter
@ 2010-09-21 23:04   ` Maciej W. Rozycki
  2010-09-23  5:37     ` huang ying
  1 sibling, 1 reply; 69+ messages in thread
From: Maciej W. Rozycki @ 2010-09-21 23:04 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Fri, 10 Sep 2010, Huang Ying wrote:

> memory parity error is only valid for IBM PC-AT, newer machine use 7
> bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
> reported via MCE. So corresponding function name and kernel log string
> is changed.

 Things perhaps changed over the last few years while I have not been 
watching, but for many years the bit #7 of the NMI status port 
(implemented by the southbridge at 0x61 in the port I/O space) was still 
used for memory parity or ECC errors even after the original IBM PC/AT.  
The usual arrangement was in the event of a memory error the memory 
controller in the northbridge would assert the chip's PCI SERR output 
line, which in turn would be trapped by the southbridge and converted to 
an NMI event while setting the said bit in the NMI status port.  See e.g. 
the 82349HX System Controller datasheet (Intel document number 290551).

 So the name of the error reported is not that unjustified except, of 
course, to be precise the handler would have to scan the state of the SERR 
output reported by all the PCI devices in the PCI configuration space to 
find the originator and then interpret the event accordingly.  Which 
obviously means the only piece of code that could exactly know what the 
reason was is the respective device driver as causes of SERR are 
device-specific and may require processing of device-specific registers to 
determine the cause and/or recover (a device reset may be required in some 
cases).

 Of course using the MCE seems natural and better, especially if the 
exception can be raised synchronously and stop the failing memory load CPU 
instruction from completion -- this is important for parity and MBE ECC 
errors, where in some cases the handler may be able to retry the failing 
operation having refreshed RAM from the backing store or otherwise the 
affected process must be killed (unless a kernel memory location is 
involved that is, where the whole system has to be brought down).

 OTOH, for CPU stores and DMA transactions the event will always be 
asynchronous and an NMI might be a better option, as in the case of parity 
and MBE ECC errors the whole system will probably have to be brought down, 
and with SBE ECC errors scrubbing can be done at any time and otherwise 
(except from logging and/or marking the physical page bad, as required) no 
action is needed.

  Maciej

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-21 22:19   ` Andi Kleen
@ 2010-09-22 16:07     ` Don Zickus
  2010-09-23  9:29       ` huang ying
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-22 16:07 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Wed, Sep 22, 2010 at 12:19:16AM +0200, Andi Kleen wrote:
> 
> >
> > I guess adding either another knob to override the hardware error option
> > or tying it in with the panic_on_unknown_error option might make me more
> > comfortable.  That way enterprise customers can always just enable it by
> > default and desktop users (for now) could have it off.
> 
> Anything that needs explicit enabling is a bad idea, that
> would lead to a lot of users running in "corrupt my data" mode.

I know.  But as I said earlier in my emails, I am trying to figure out how
to deal with the fallout from unknown nmis turning into panics.  Today
people see unknown nmis.  They may or may not be corrupting data.  They
usually file a bug.  Currently it is hard for me to diagnosis the problem.
Usually the old 'upgrade your bios/firmware' does the trick.  Sometimes it
doesn't and people feel like their machines still run fine.  So they
ignore it (for good or for bad).

Turning unknown nmis into panics would break their current setup without
much gain.  So I was trying to propose something temporarily until we
could get a better infrastructure to isolate the source and provide better
info on what to do.

I agree with you that long term unknown nmis should be panics.  I just get
nervous about doing that now from a support perspective.

> 
> The code currently uses the presence of a HEST error table
> to detect a server. HEST should be only available on servers.
> 
> On servers at least panic should be default.

Ok.  That's fine. But then what.  What does a developer do with that
panic?  There's no useful info.  That is sorta my problem.  Then again I
do not know much about HEST.

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-21 23:04   ` Maciej W. Rozycki
@ 2010-09-23  5:37     ` huang ying
  2010-09-29  0:26       ` Maciej W. Rozycki
  0 siblings, 1 reply; 69+ messages in thread
From: huang ying @ 2010-09-23  5:37 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

Hi, Maciej,

On Wed, Sep 22, 2010 at 7:04 AM, Maciej W. Rozycki <macro@linux-mips.org> wrote:
> On Fri, 10 Sep 2010, Huang Ying wrote:
>
>> memory parity error is only valid for IBM PC-AT, newer machine use 7
>> bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
>> reported via MCE. So corresponding function name and kernel log string
>> is changed.
>
>  Things perhaps changed over the last few years while I have not been
> watching, but for many years the bit #7 of the NMI status port
> (implemented by the southbridge at 0x61 in the port I/O space) was still
> used for memory parity or ECC errors even after the original IBM PC/AT.
> The usual arrangement was in the event of a memory error the memory
> controller in the northbridge would assert the chip's PCI SERR output
> line, which in turn would be trapped by the southbridge and converted to
> an NMI event while setting the said bit in the NMI status port.  See e.g.
> the 82349HX System Controller datasheet (Intel document number 290551).

Thanks for your information. So EDAC function call in NMI handler
should be kept? But as you pointed out, the function name of
corresponding handler should be PCI SERR instead of memory parity. It
just can be used to report memory error on some system. I think we can
rename the function and string to PCI SERR and add some comments for
EDAC function call that checks memory errors.

>  So the name of the error reported is not that unjustified except, of
> course, to be precise the handler would have to scan the state of the SERR
> output reported by all the PCI devices in the PCI configuration space to
> find the originator and then interpret the event accordingly.  Which
> obviously means the only piece of code that could exactly know what the
> reason was is the respective device driver as causes of SERR are
> device-specific and may require processing of device-specific registers to
> determine the cause and/or recover (a device reset may be required in some
> cases).

In addition to PCI SERR, I think modern system rely more on PCIE AER,
which can report more information about error. There are recovery
support for PCIE AER in kernel already. Do we need some similar
mechanism for PCI SERR? Because PCIE AER becomes more and more common
on server platform, I think some minimal check such as scaning devices
SERR/PERR bit should be sufficient.

>  Of course using the MCE seems natural and better, especially if the
> exception can be raised synchronously and stop the failing memory load CPU
> instruction from completion -- this is important for parity and MBE ECC
> errors, where in some cases the handler may be able to retry the failing
> operation having refreshed RAM from the backing store or otherwise the
> affected process must be killed (unless a kernel memory location is
> involved that is, where the whole system has to be brought down).
>
>  OTOH, for CPU stores and DMA transactions the event will always be
> asynchronous and an NMI might be a better option, as in the case of parity
> and MBE ECC errors the whole system will probably have to be brought down,
> and with SBE ECC errors scrubbing can be done at any time and otherwise
> (except from logging and/or marking the physical page bad, as required) no
> action is needed.

In fact, MCE is a special exception, it can be used for asynchronous
events too. Such as memory error detected by patrol scrubbing, please
take a look at latest Intel 64 and IA32 architectures software
developer's manual Vol 3A section 15.9.3: Architecturally Defined UCR
Errors.

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-22 16:07     ` Don Zickus
@ 2010-09-23  9:29       ` huang ying
  2010-09-23 14:16         ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: huang ying @ 2010-09-23  9:29 UTC (permalink / raw)
  To: Don Zickus
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

Hi, Don,

On Thu, Sep 23, 2010 at 12:07 AM, Don Zickus <dzickus@redhat.com> wrote:
> On Wed, Sep 22, 2010 at 12:19:16AM +0200, Andi Kleen wrote:
>>
>> >
>> > I guess adding either another knob to override the hardware error option
>> > or tying it in with the panic_on_unknown_error option might make me more
>> > comfortable.  That way enterprise customers can always just enable it by
>> > default and desktop users (for now) could have it off.
>>
>> Anything that needs explicit enabling is a bad idea, that
>> would lead to a lot of users running in "corrupt my data" mode.
>
> I know.  But as I said earlier in my emails, I am trying to figure out how
> to deal with the fallout from unknown nmis turning into panics.  Today
> people see unknown nmis.  They may or may not be corrupting data.  They
> usually file a bug.  Currently it is hard for me to diagnosis the problem.
> Usually the old 'upgrade your bios/firmware' does the trick.  Sometimes it
> doesn't and people feel like their machines still run fine.  So they
> ignore it (for good or for bad).
>
> Turning unknown nmis into panics would break their current setup without
> much gain.  So I was trying to propose something temporarily until we
> could get a better infrastructure to isolate the source and provide better
> info on what to do.
>
> I agree with you that long term unknown nmis should be panics.  I just get
> nervous about doing that now from a support perspective.

In fact, we use white list policy here. Only systems with HEST or
identified by chipset host bridge PCI ID will panic for unknown NMI.
So I think systems you worried about will not have this enabled.

>> The code currently uses the presence of a HEST error table
>> to detect a server. HEST should be only available on servers.
>>
>> On servers at least panic should be default.
>
> Ok.  That's fine. But then what.  What does a developer do with that
> panic?  There's no useful info.  That is sorta my problem.  Then again I
> do not know much about HEST.

On some system, there is some hardware error log in BMC/BIOS. The
hardware error log can be gotten via IPMI or BIOS menu. Otherwise, can
we get some useful info for unknown NMI? If we can, can we collect the
info, then print it on console and save it into flash via ERST (part
of APEI too) before panic?

HEST is defined in ACPI spec 4.0 and later version in section named
APEI (ACPI Platform Error Interface). It is used to describe the error
sources of system. It should be available only on server platform.

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-21 21:48 ` Don Zickus
  2010-09-21 22:19   ` Andi Kleen
@ 2010-09-23  9:51   ` huang ying
  1 sibling, 0 replies; 69+ messages in thread
From: huang ying @ 2010-09-23  9:51 UTC (permalink / raw)
  To: Don Zickus
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

Hi, Don,

On Wed, Sep 22, 2010 at 5:48 AM, Don Zickus <dzickus@redhat.com> wrote:
> On Fri, Sep 10, 2010 at 10:51:00AM +0800, Huang Ying wrote:
>> Replace the NMI related magic numbers with symbol constants.
>
> Hi Huang,
>
> Sorry for disappearing for a week..
>
> Ingo asked me to shepherd these patches.  I finally got around to do some
> testing on them.  I'll do some more tomorrow.

Thanks. I will post a new version in next week according to comments
collected so far.

> Anyway, I don't have a problem with patches 1-3 and 6 (I guess the rename
> and rename again doesn't really bother me and it kinda makes some logical
> sense).
>
> I am ok with most of patch 4 but I was wondering if you could split out
> the part of using other cpus to access the reason register.  To me it seem
> like the nmi handler rewrite and allowing !bsp cpus to access the reason
> registers were two different ideas.  For bisecting reasons it would be
> easier to seperate them in case we have problems with lost NMIs later.  It
> would be easier to determine if the lost NMIs were from the rewrite or the
> migration of the reason register to other cpus.

Yes. It's reasonable, I will do it.

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-23  9:29       ` huang ying
@ 2010-09-23 14:16         ` Don Zickus
  2010-09-24 11:50           ` huang ying
  0 siblings, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-23 14:16 UTC (permalink / raw)
  To: huang ying
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Thu, Sep 23, 2010 at 05:29:57PM +0800, huang ying wrote:
> Hi, Don,
> 
> On Thu, Sep 23, 2010 at 12:07 AM, Don Zickus <dzickus@redhat.com> wrote:
> > On Wed, Sep 22, 2010 at 12:19:16AM +0200, Andi Kleen wrote:
> >>
> >> >
> >> > I guess adding either another knob to override the hardware error option
> >> > or tying it in with the panic_on_unknown_error option might make me more
> >> > comfortable.  That way enterprise customers can always just enable it by
> >> > default and desktop users (for now) could have it off.
> >>
> >> Anything that needs explicit enabling is a bad idea, that
> >> would lead to a lot of users running in "corrupt my data" mode.
> >
> > I know.  But as I said earlier in my emails, I am trying to figure out how
> > to deal with the fallout from unknown nmis turning into panics.  Today
> > people see unknown nmis.  They may or may not be corrupting data.  They
> > usually file a bug.  Currently it is hard for me to diagnosis the problem.
> > Usually the old 'upgrade your bios/firmware' does the trick.  Sometimes it
> > doesn't and people feel like their machines still run fine.  So they
> > ignore it (for good or for bad).
> >
> > Turning unknown nmis into panics would break their current setup without
> > much gain.  So I was trying to propose something temporarily until we
> > could get a better infrastructure to isolate the source and provide better
> > info on what to do.
> >
> > I agree with you that long term unknown nmis should be panics.  I just get
> > nervous about doing that now from a support perspective.
> 
> In fact, we use white list policy here. Only systems with HEST or
> identified by chipset host bridge PCI ID will panic for unknown NMI.
> So I think systems you worried about will not have this enabled.
> 
> >> The code currently uses the presence of a HEST error table
> >> to detect a server. HEST should be only available on servers.
> >>
> >> On servers at least panic should be default.
> >
> > Ok.  That's fine. But then what.  What does a developer do with that
> > panic?  There's no useful info.  That is sorta my problem.  Then again I
> > do not know much about HEST.
> 
> On some system, there is some hardware error log in BMC/BIOS. The
> hardware error log can be gotten via IPMI or BIOS menu. Otherwise, can
> we get some useful info for unknown NMI? If we can, can we collect the
> info, then print it on console and save it into flash via ERST (part
> of APEI too) before panic?

Ok.  Does the BIOS/BMC automatically do this?  Can we just print a message
on panic saying checking your BIOS/BMC logs for more info?

I would love to add code to gather more useful info for unknown NMIs, but
is it expected that HEST does some of this?  I guess what I am trying to
figure out, if we are going to put intelligence to detect a HEST enabled
machine and panic when unknown NMI comes along (presumably from HEST??),
then can we leverage HEST at all to understand why the NMI happened or
point the user to the BIOS/BMC to get more info.  In other words, what
value do we get HEST other than we detect its there, lets panic.

> 
> HEST is defined in ACPI spec 4.0 and later version in section named
> APEI (ACPI Platform Error Interface). It is used to describe the error
> sources of system. It should be available only on server platform.

Ok.  Does the kernel have intelligence to use it or the BIOS yet?

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-23 14:16         ` Don Zickus
@ 2010-09-24 11:50           ` huang ying
  2010-09-24 14:29             ` Don Zickus
  0 siblings, 1 reply; 69+ messages in thread
From: huang ying @ 2010-09-24 11:50 UTC (permalink / raw)
  To: Don Zickus
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Thu, Sep 23, 2010 at 10:16 PM, Don Zickus <dzickus@redhat.com> wrote:
> On Thu, Sep 23, 2010 at 05:29:57PM +0800, huang ying wrote:
>> Hi, Don,
>>
>> On Thu, Sep 23, 2010 at 12:07 AM, Don Zickus <dzickus@redhat.com> wrote:
>> > On Wed, Sep 22, 2010 at 12:19:16AM +0200, Andi Kleen wrote:
>> >>
>> >> >
>> >> > I guess adding either another knob to override the hardware error option
>> >> > or tying it in with the panic_on_unknown_error option might make me more
>> >> > comfortable.  That way enterprise customers can always just enable it by
>> >> > default and desktop users (for now) could have it off.
>> >>
>> >> Anything that needs explicit enabling is a bad idea, that
>> >> would lead to a lot of users running in "corrupt my data" mode.
>> >
>> > I know.  But as I said earlier in my emails, I am trying to figure out how
>> > to deal with the fallout from unknown nmis turning into panics.  Today
>> > people see unknown nmis.  They may or may not be corrupting data.  They
>> > usually file a bug.  Currently it is hard for me to diagnosis the problem.
>> > Usually the old 'upgrade your bios/firmware' does the trick.  Sometimes it
>> > doesn't and people feel like their machines still run fine.  So they
>> > ignore it (for good or for bad).
>> >
>> > Turning unknown nmis into panics would break their current setup without
>> > much gain.  So I was trying to propose something temporarily until we
>> > could get a better infrastructure to isolate the source and provide better
>> > info on what to do.
>> >
>> > I agree with you that long term unknown nmis should be panics.  I just get
>> > nervous about doing that now from a support perspective.
>>
>> In fact, we use white list policy here. Only systems with HEST or
>> identified by chipset host bridge PCI ID will panic for unknown NMI.
>> So I think systems you worried about will not have this enabled.
>>
>> >> The code currently uses the presence of a HEST error table
>> >> to detect a server. HEST should be only available on servers.
>> >>
>> >> On servers at least panic should be default.
>> >
>> > Ok.  That's fine. But then what.  What does a developer do with that
>> > panic?  There's no useful info.  That is sorta my problem.  Then again I
>> > do not know much about HEST.
>>
>> On some system, there is some hardware error log in BMC/BIOS. The
>> hardware error log can be gotten via IPMI or BIOS menu. Otherwise, can
>> we get some useful info for unknown NMI? If we can, can we collect the
>> info, then print it on console and save it into flash via ERST (part
>> of APEI too) before panic?
>
> Ok.  Does the BIOS/BMC automatically do this?  Can we just print a message
> on panic saying checking your BIOS/BMC logs for more info?

Yes. BIOS/BMC automatically do that. And I will add it to panic message.

> I would love to add code to gather more useful info for unknown NMIs, but
> is it expected that HEST does some of this?  I guess what I am trying to
> figure out, if we are going to put intelligence to detect a HEST enabled
> machine and panic when unknown NMI comes along (presumably from HEST??),
> then can we leverage HEST at all to understand why the NMI happened or
> point the user to the BIOS/BMC to get more info.  In other words, what
> value do we get HEST other than we detect its there, lets panic.

Yes. HEST can be used to report some hardware error information. I am
working on that now.

>> HEST is defined in ACPI spec 4.0 and later version in section named
>> APEI (ACPI Platform Error Interface). It is used to describe the error
>> sources of system. It should be available only on server platform.
>
> Ok.  Does the kernel have intelligence to use it or the BIOS yet?

HEST works in kernel BIOS cooperative way. I am working on a HEST
driver which will get notified for NMI and collect the error
information reported by BIOS. But It is possible that some systems
have only BMC/BIOS log and do not report that to OS except unknown
NMI. The unknown NMI panic logic is for these systems.

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-24 11:50           ` huang ying
@ 2010-09-24 14:29             ` Don Zickus
  0 siblings, 0 replies; 69+ messages in thread
From: Don Zickus @ 2010-09-24 14:29 UTC (permalink / raw)
  To: huang ying
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel

On Fri, Sep 24, 2010 at 07:50:16PM +0800, huang ying wrote:
> On Thu, Sep 23, 2010 at 10:16 PM, Don Zickus <dzickus@redhat.com> wrote:
> >> On some system, there is some hardware error log in BMC/BIOS. The
> >> hardware error log can be gotten via IPMI or BIOS menu. Otherwise, can
> >> we get some useful info for unknown NMI? If we can, can we collect the
> >> info, then print it on console and save it into flash via ERST (part
> >> of APEI too) before panic?
> >
> > Ok.  Does the BIOS/BMC automatically do this?  Can we just print a message
> > on panic saying checking your BIOS/BMC logs for more info?
> 
> Yes. BIOS/BMC automatically do that. And I will add it to panic message.
> 
> > I would love to add code to gather more useful info for unknown NMIs, but
> > is it expected that HEST does some of this?  I guess what I am trying to
> > figure out, if we are going to put intelligence to detect a HEST enabled
> > machine and panic when unknown NMI comes along (presumably from HEST??),
> > then can we leverage HEST at all to understand why the NMI happened or
> > point the user to the BIOS/BMC to get more info.  In other words, what
> > value do we get HEST other than we detect its there, lets panic.
> 
> Yes. HEST can be used to report some hardware error information. I am
> working on that now.
> 
> >> HEST is defined in ACPI spec 4.0 and later version in section named
> >> APEI (ACPI Platform Error Interface). It is used to describe the error
> >> sources of system. It should be available only on server platform.
> >
> > Ok.  Does the kernel have intelligence to use it or the BIOS yet?
> 
> HEST works in kernel BIOS cooperative way. I am working on a HEST
> driver which will get notified for NMI and collect the error
> information reported by BIOS. But It is possible that some systems
> have only BMC/BIOS log and do not report that to OS except unknown
> NMI. The unknown NMI panic logic is for these systems.

Ah ok, thanks for the info.  I think adding the info to the panic message
would be valuable.  I have no more objections to your patch now. :-)

I appreciate your patience for clue-ing me in on how HEST works!

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-23  5:37     ` huang ying
@ 2010-09-29  0:26       ` Maciej W. Rozycki
  0 siblings, 0 replies; 69+ messages in thread
From: Maciej W. Rozycki @ 2010-09-29  0:26 UTC (permalink / raw)
  To: huang ying
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

Hi Huang,

> >  Things perhaps changed over the last few years while I have not been
> > watching, but for many years the bit #7 of the NMI status port
> > (implemented by the southbridge at 0x61 in the port I/O space) was still
> > used for memory parity or ECC errors even after the original IBM PC/AT.
> > The usual arrangement was in the event of a memory error the memory
> > controller in the northbridge would assert the chip's PCI SERR output
> > line, which in turn would be trapped by the southbridge and converted to
> > an NMI event while setting the said bit in the NMI status port.  See e.g.
> > the 82349HX System Controller datasheet (Intel document number 290551).
> 
> Thanks for your information. So EDAC function call in NMI handler
> should be kept?

 I've seen EDAC mentioned before, but I lack further details as to what it 
is -- if you give me a link to the relevant piece of documentation, then 
I'll see if I can find some time to look into it.  Otherwise I can't 
comment on it, sorry.

> But as you pointed out, the function name of
> corresponding handler should be PCI SERR instead of memory parity. It
> just can be used to report memory error on some system. I think we can
> rename the function and string to PCI SERR and add some comments for
> EDAC function call that checks memory errors.

 Linux can certainly run on pre-PCI x86 machines; while I agree it makes 
sense to update the references to match reality, I think you need to be 
careful about it.  "A memory or system error" might be a good compromise 
-- mentioning parity explicitly may not be a good idea; I'm sure there 
must have been ECC x86 systems made even before PCI (think EISA -- these 
often were quite sophisticated; unsure about IBM's MCA).

 Otherwise if you think you absolutely *must* mention "PCI SERR" where 
relevant, then I suggest that you investigate how to determine whether the 
kernel is running on a PCI system (something along the lines of 
(CONFIG_PCI && pci_host_bridge_found)) and adjust the message accordingly 
based on actual configuration.

> >  So the name of the error reported is not that unjustified except, of
> > course, to be precise the handler would have to scan the state of the SERR
> > output reported by all the PCI devices in the PCI configuration space to
> > find the originator and then interpret the event accordingly.  Which
> > obviously means the only piece of code that could exactly know what the
> > reason was is the respective device driver as causes of SERR are
> > device-specific and may require processing of device-specific registers to
> > determine the cause and/or recover (a device reset may be required in some
> > cases).
> 
> In addition to PCI SERR, I think modern system rely more on PCIE AER,
> which can report more information about error. There are recovery
> support for PCIE AER in kernel already. Do we need some similar
> mechanism for PCI SERR? Because PCIE AER becomes more and more common
> on server platform, I think some minimal check such as scaning devices
> SERR/PERR bit should be sufficient.

 Even you you don't care about 1990s' vintage computers (which you have 
the right not to) I'd expect legacy PCI/PCI-X to stay around for a while 
as it was with ISA as not all option cards will ever be redesigned as PCIe 
options.  And even if they do, then people may not necessarily want to 
throw away all their old still-working peripheral hardware when they 
upgrade the system, especially the more sophisticated or expensive bits.

> >  OTOH, for CPU stores and DMA transactions the event will always be
> > asynchronous and an NMI might be a better option, as in the case of parity
> > and MBE ECC errors the whole system will probably have to be brought down,
> > and with SBE ECC errors scrubbing can be done at any time and otherwise
> > (except from logging and/or marking the physical page bad, as required) no
> > action is needed.
> 
> In fact, MCE is a special exception, it can be used for asynchronous
> events too. Such as memory error detected by patrol scrubbing, please
> take a look at latest Intel 64 and IA32 architectures software
> developer's manual Vol 3A section 15.9.3: Architecturally Defined UCR
> Errors.

 The MCE has always had an asynchronous option -- even the original 
implementation in the Pentium CPU had a BUSCHK# input line which was 
cleverly used by the i430NX/Neptune chipset to signal hard ECC errors (so 
the handler had the address of the failing bus transaction readily 
available in the Machine Check Address MSR), but regrettably never after.

 The problem with using the MCE for non-CPU related events such as DMA 
transfers that failed is the choice of the target CPU in multi-processor 
systems.  Is there a well-defined way for routing such events that an OS 
like Linux could use?  It better not went to an off-line processor for 
example.

 There's no such problem with system NMIs as they can be routed to the CPU 
of choice by means of LINT1 input configuration in the local APIC unit of 
the concerned processors.

  Maciej

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-14 17:48 ` Ingo Molnar
@ 2010-09-15  5:06   ` Huang Ying
  0 siblings, 0 replies; 69+ messages in thread
From: Huang Ying @ 2010-09-15  5:06 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andi Kleen, Don Zickus, H. Peter Anvin, linux-kernel

On Wed, 2010-09-15 at 01:48 +0800, Ingo Molnar wrote:
> * Some One <some@where> stripped out all quoted identities and wrote:
> > >
> > > The proper approach would be not to add hacks to the NMI code but to 
> > > implement southbridge drivers - which would also have NMI callbacks.
> > 
> > BTW southbridges do less and less regarding PCI.
> 
> Except WICTCR.
> 
> > > These are unchartered waters, but variance in that space is reducing 
> > > systematically so it would be worth a shot.
> > 
> > You don't really need special drivers for AER [...]
> 
> On the contrary, we need proper driverization for _everything_ new. 
> Embedded x86 is here to stay, so we are abstracting away each and every 
> bit of the platform. See struct x86_ops for a highlevel platform driver 
> - but a more specific, southbridge-encompassing driver framework can be 
> created too.

All in all, we can have proper drivers to printk/write to NVRAM the
error information collected for unknown NMI, regardless they are AER or
Southbridge driver or both. And, the drivers will register handlers for
unknown NMI. Do you agree?

Best Regards,
Huang Ying


^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-14 14:31 [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI Andi Kleen
  2010-09-14 15:17 ` Don Zickus
@ 2010-09-14 17:48 ` Ingo Molnar
  2010-09-15  5:06   ` Huang Ying
  1 sibling, 1 reply; 69+ messages in thread
From: Ingo Molnar @ 2010-09-14 17:48 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Don Zickus, Huang Ying, H. Peter Anvin, linux-kernel


* Some One <some@where> stripped out all quoted identities and wrote:

> >
> > The proper approach would be not to add hacks to the NMI code but to 
> > implement southbridge drivers - which would also have NMI callbacks.
> 
> BTW southbridges do less and less regarding PCI.

Except WICTCR.

> > These are unchartered waters, but variance in that space is reducing 
> > systematically so it would be worth a shot.
> 
> You don't really need special drivers for AER [...]

On the contrary, we need proper driverization for _everything_ new. 
Embedded x86 is here to stay, so we are abstracting away each and every 
bit of the platform. See struct x86_ops for a highlevel platform driver 
- but a more specific, southbridge-encompassing driver framework can be 
created too.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-14 15:17 ` Don Zickus
@ 2010-09-14 17:40   ` Andi Kleen
  0 siblings, 0 replies; 69+ messages in thread
From: Andi Kleen @ 2010-09-14 17:40 UTC (permalink / raw)
  To: Don Zickus; +Cc: Ingo Molnar, Huang Ying, H. Peter Anvin, linux-kernel

> 
> From what someone explained to me, the only platform that implements
> AER correctly is Nehalem based ones.  So for AMD and Intel Core
> arches, AER is not expected to work.  Is that incorrect?

At least for Intel it's definitely not true. AER goes back a
lot of generations, pretty much all that support PCI Express. 

There were some bugs in it of course and some workarounds needed, but
no show stoppers in any particular part to my knowledge.

Haven't kept fully track of AMD systems recently, but would surprise
me if it was the case on AMD either.

This applies to servers. On clients AER is more spotty
and various simply don't have it.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-14 14:31 [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI Andi Kleen
@ 2010-09-14 15:17 ` Don Zickus
  2010-09-14 17:40   ` Andi Kleen
  2010-09-14 17:48 ` Ingo Molnar
  1 sibling, 1 reply; 69+ messages in thread
From: Don Zickus @ 2010-09-14 15:17 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Ingo Molnar, Huang Ying, H. Peter Anvin, linux-kernel

On Tue, Sep 14, 2010 at 04:31:20PM +0200, Andi Kleen wrote:
> 
> >
> > The proper approach would be not to add hacks to the NMI code but to
> > implement southbridge drivers - which would also have NMI callbacks.
> 
> BTW southbridges do less and less regarding PCI.
> 
> > These are unchartered waters, but variance in that space is reducing
> > systematically so it would be worth a shot.
> 
> You don't really need special drivers for AER -- it's fully standardized
> and works generically. I think the old PCI-X error bits Don was interested
> in were also all architectural.
> 
> The driver for the first is already there, just right now the
> information is not dumped in the right places.
> 
> There are a few platform specific error signals, but they tend
> to be rather obscure stuff. The "meat" is all in the standard.

>From what someone explained to me, the only platform that implements AER
correctly is Nehalem based ones.  So for AMD and Intel Core arches, AER is
not expected to work.  Is that incorrect?

Cheers,
Don

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI
@ 2010-09-14 14:31 Andi Kleen
  2010-09-14 15:17 ` Don Zickus
  2010-09-14 17:48 ` Ingo Molnar
  0 siblings, 2 replies; 69+ messages in thread
From: Andi Kleen @ 2010-09-14 14:31 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Don Zickus, Andi Kleen, Huang Ying, H. Peter Anvin, linux-kernel

>
> The proper approach would be not to add hacks to the NMI code but to
> implement southbridge drivers - which would also have NMI callbacks.

BTW southbridges do less and less regarding PCI.

> These are unchartered waters, but variance in that space is reducing
> systematically so it would be worth a shot.

You don't really need special drivers for AER -- it's fully standardized
and works generically. I think the old PCI-X error bits Don was interested
in were also all architectural.

The driver for the first is already there, just right now the
information is not dumped in the right places.

There are a few platform specific error signals, but they tend
to be rather obscure stuff. The "meat" is all in the standard.

-Andi

^ permalink raw reply	[flat|nested] 69+ messages in thread

end of thread, other threads:[~2010-09-29  0:26 UTC | newest]

Thread overview: 69+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-10  2:51 [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
2010-09-10  2:51 ` [RFC 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
2010-09-10  2:51 ` [RFC 3/6] x86, NMI, Rename memory parity error to PCI SERR error Huang Ying
2010-09-13  1:02   ` Robert Richter
2010-09-13  2:02     ` Huang Ying
2010-09-16  8:18       ` Robert Richter
2010-09-17  0:08         ` Huang Ying
2010-09-17  9:14           ` Robert Richter
2010-09-19  0:20             ` Huang Ying
2010-09-20  8:00               ` Robert Richter
2010-09-20 12:59                 ` Borislav Petkov
2010-09-21  0:22                   ` Huang Ying
2010-09-21  6:37                     ` Borislav Petkov
2010-09-21 14:08                       ` Doug Thompson
2010-09-21 23:04   ` Maciej W. Rozycki
2010-09-23  5:37     ` huang ying
2010-09-29  0:26       ` Maciej W. Rozycki
2010-09-10  2:51 ` [RFC 4/6] x86, NMI, Rewrite NMI handler Huang Ying
2010-09-10 15:56   ` Don Zickus
2010-09-10 16:03     ` Andi Kleen
2010-09-10 18:29       ` Don Zickus
2010-09-13  2:09         ` Huang Ying
2010-09-13 14:04           ` Don Zickus
2010-09-14  5:12             ` Huang Ying
2010-09-14 13:37               ` Don Zickus
2010-09-13  1:16   ` Robert Richter
2010-09-10  2:51 ` [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI Huang Ying
2010-09-10 16:02   ` Don Zickus
2010-09-10 16:19     ` Andi Kleen
2010-09-10 18:40       ` Don Zickus
2010-09-13  2:19         ` Huang Ying
2010-09-13 14:11           ` Don Zickus
2010-09-13 15:24             ` Andi Kleen
2010-09-13 15:47               ` Don Zickus
2010-09-13 16:57                 ` Andi Kleen
2010-09-13 17:53                   ` Don Zickus
2010-09-13 18:07                     ` Andi Kleen
2010-09-13 18:23                       ` Don Zickus
2010-09-13 18:36                         ` Andi Kleen
2010-09-13 19:36                           ` Don Zickus
2010-09-13 20:49                             ` Andi Kleen
2010-09-13 21:25                               ` Valdis.Kletnieks
2010-09-14  7:48                                 ` Andi Kleen
2010-09-14 17:54                                   ` Valdis.Kletnieks
2010-09-14 12:21                             ` Ingo Molnar
2010-09-14 13:45                               ` Don Zickus
2010-09-14 19:34                               ` Cyrill Gorcunov
2010-09-15  9:29                                 ` Ingo Molnar
2010-09-10  2:51 ` [RFC 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying
2010-09-10 16:13   ` Don Zickus
2010-09-13  2:27     ` Huang Ying
2010-09-13  6:24       ` Ingo Molnar
2010-09-10 20:37 ` [RFC 1/6] x86, NMI, Add symbol definition for NMI magic constants Peter Zijlstra
2010-09-10 22:58   ` H. Peter Anvin
2010-09-11  8:50   ` Andi Kleen
2010-09-13  1:30     ` Robert Richter
2010-09-21 21:48 ` Don Zickus
2010-09-21 22:19   ` Andi Kleen
2010-09-22 16:07     ` Don Zickus
2010-09-23  9:29       ` huang ying
2010-09-23 14:16         ` Don Zickus
2010-09-24 11:50           ` huang ying
2010-09-24 14:29             ` Don Zickus
2010-09-23  9:51   ` huang ying
2010-09-14 14:31 [RFC 5/6] x86, NMI, Add support to notify hardware error with unknown NMI Andi Kleen
2010-09-14 15:17 ` Don Zickus
2010-09-14 17:40   ` Andi Kleen
2010-09-14 17:48 ` Ingo Molnar
2010-09-15  5:06   ` Huang Ying

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.