linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants
@ 2010-09-27  0:57 Huang Ying
  2010-09-27  0:57 ` [PATCH -v2 2/7] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
                   ` (6 more replies)
  0 siblings, 7 replies; 64+ messages in thread
From: Huang Ying @ 2010-09-27  0:57 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Huang Ying

Replace the NMI related magic numbers with symbol constants.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/mach_traps.h |   12 +++++++++++-
 arch/x86/kernel/traps.c           |   18 +++++++++---------
 2 files changed, 20 insertions(+), 10 deletions(-)

--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -7,9 +7,19 @@
 
 #include <asm/mc146818rtc.h>
 
+#define NMI_REASON_PORT		0x61
+
+#define NMI_REASON_MEMPAR	0x80
+#define NMI_REASON_IOCHK	0x40
+#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
+
+#define NMI_REASON_CLEAR_MEMPAR	0x04
+#define NMI_REASON_CLEAR_IOCHK	0x08
+#define NMI_REASON_CLEAR_MASK	0x0f
+
 static inline unsigned char get_nmi_reason(void)
 {
-	return inb(0x61);
+	return inb(NMI_REASON_PORT);
 }
 
 static inline void reassert_nmi(void)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -323,8 +323,8 @@ mem_parity_error(unsigned char reason, s
 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 
 	/* Clear and disable the memory parity error line. */
-	reason = (reason & 0xf) | 4;
-	outb(reason, 0x61);
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_MEMPAR;
+	outb(reason, NMI_REASON_PORT);
 }
 
 static notrace __kprobes void
@@ -339,15 +339,15 @@ io_check_error(unsigned char reason, str
 		panic("NMI IOCK error: Not continuing");
 
 	/* Re-enable the IOCK line, wait for a few seconds */
-	reason = (reason & 0xf) | 8;
-	outb(reason, 0x61);
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
 
 	i = 2000;
 	while (--i)
 		udelay(1000);
 
-	reason &= ~8;
-	outb(reason, 0x61);
+	reason &= ~NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
 }
 
 static notrace __kprobes void
@@ -388,7 +388,7 @@ static notrace __kprobes void default_do
 	if (!cpu)
 		reason = get_nmi_reason();
 
-	if (!(reason & 0xc0)) {
+	if (!(reason & NMI_REASON_MASK)) {
 		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 								== NOTIFY_STOP)
 			return;
@@ -418,9 +418,9 @@ static notrace __kprobes void default_do
 		return;
 
 	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-	if (reason & 0x80)
+	if (reason & NMI_REASON_MEMPAR)
 		mem_parity_error(reason, regs);
-	if (reason & 0x40)
+	if (reason & NMI_REASON_IOCHK)
 		io_check_error(reason, regs);
 #ifdef CONFIG_X86_32
 	/*

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH -v2 2/7] x86, NMI, Add touch_nmi_watchdog to io_check_error delay
  2010-09-27  0:57 [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
@ 2010-09-27  0:57 ` Huang Ying
  2010-09-27  0:57 ` [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error Huang Ying
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 64+ messages in thread
From: Huang Ying @ 2010-09-27  0:57 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Huang Ying

Prevent the long delay in io_check_error make NMI watchdog timeout.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/kernel/traps.c |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -342,9 +342,11 @@ io_check_error(unsigned char reason, str
 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
 	outb(reason, NMI_REASON_PORT);
 
-	i = 2000;
-	while (--i)
-		udelay(1000);
+	i = 20000;
+	while (--i) {
+		touch_nmi_watchdog();
+		udelay(100);
+	}
 
 	reason &= ~NMI_REASON_CLEAR_IOCHK;
 	outb(reason, NMI_REASON_PORT);

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-27  0:57 [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
  2010-09-27  0:57 ` [PATCH -v2 2/7] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
@ 2010-09-27  0:57 ` Huang Ying
  2010-09-27  8:01   ` Robert Richter
  2010-09-27  0:57 ` [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler Huang Ying
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 64+ messages in thread
From: Huang Ying @ 2010-09-27  0:57 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Huang Ying

memory parity error is only valid for IBM PC-AT, newer machine use 7
bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
reported via MCE. So corresponding function name and kernel log string
is changed.

But on some machines, PCI SERR line is still used to report memory
errors. This is used by EDAC, so corresponding EDAC call is reserved.


v2:

- EDAC call in pci_serr_error is reserved.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/mach_traps.h |    6 +++---
 arch/x86/kernel/traps.c           |   21 ++++++++++-----------
 2 files changed, 13 insertions(+), 14 deletions(-)

--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -9,11 +9,11 @@
 
 #define NMI_REASON_PORT		0x61
 
-#define NMI_REASON_MEMPAR	0x80
+#define NMI_REASON_SERR		0x80
 #define NMI_REASON_IOCHK	0x40
-#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
+#define NMI_REASON_MASK		(NMI_REASON_SERR | NMI_REASON_IOCHK)
 
-#define NMI_REASON_CLEAR_MEMPAR	0x04
+#define NMI_REASON_CLEAR_SERR	0x04
 #define NMI_REASON_CLEAR_IOCHK	0x08
 #define NMI_REASON_CLEAR_MASK	0x0f
 
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -301,15 +301,14 @@ gp_in_kernel:
 }
 
 static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
-	printk(KERN_EMERG
-		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-			reason, smp_processor_id());
-
-	printk(KERN_EMERG
-		"You have some hardware problem, likely on the PCI bus.\n");
+	printk(KERN_EMERG "NMI: PCI system error (SERR).\n");
 
+	/*
+	 * On some machines, PCI SERR line is used to report memory
+	 * errors. EDAC makes use of it.
+	 */
 #if defined(CONFIG_EDAC)
 	if (edac_handler_set()) {
 		edac_atomic_assert_error();
@@ -322,8 +321,8 @@ mem_parity_error(unsigned char reason, s
 
 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 
-	/* Clear and disable the memory parity error line. */
-	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_MEMPAR;
+	/* Clear and disable the PCI SERR error line. */
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
 	outb(reason, NMI_REASON_PORT);
 }
 
@@ -420,8 +419,8 @@ static notrace __kprobes void default_do
 		return;
 
 	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-	if (reason & NMI_REASON_MEMPAR)
-		mem_parity_error(reason, regs);
+	if (reason & NMI_REASON_SERR)
+		pci_serr_error(reason, regs);
 	if (reason & NMI_REASON_IOCHK)
 		io_check_error(reason, regs);
 #ifdef CONFIG_X86_32

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-27  0:57 [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
  2010-09-27  0:57 ` [PATCH -v2 2/7] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
  2010-09-27  0:57 ` [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error Huang Ying
@ 2010-09-27  0:57 ` Huang Ying
  2010-09-27  9:41   ` Robert Richter
  2010-09-27  0:57 ` [PATCH -v2 5/7] Make NMI reason io port (0x61) can be processed on any CPU Huang Ying
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 64+ messages in thread
From: Huang Ying @ 2010-09-27  0:57 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Huang Ying

The original NMI handler is quite outdated in many aspects. This patch
try to fix it.

The order to process the NMI sources are changed as follow:

notify_die(DIE_NMI_IPI);
notify_die(DIE_NMI);
/* process io port 0x61 */
nmi_watchdog_touch();
notify_die(DIE_NMIUNKNOWN);
unknown_nmi();

DIE_NMI_IPI is used to process CPU specific NMI sources, such as perf
event, oprofile, crash IPI, etc. While DIE_NMI is used to process
non-CPU-specific NMI sources, such as APEI (ACPI Platform Error
Interface) GHES (Generic Hardware Error Source), etc. Non-CPU-specific
NMI sources can be processed on any CPU,

DIE_NMI_IPI must be processed before DIE_NMI. For example, perf event
trigger a NMI on CPU 1, at the same time, APEI GHES trigger another
NMI on CPU 0. If DIE_NMI is processed before DIE_NMI_IPI, it is
possible that APEI GHES is processed on CPU 1, while unknown NMI is
gotten on CPU 0.

In this new order of processing, performance sensitive NMI sources
such as oprofile or perf event will have better performance because
the time consuming IO port reading is done after them.

Only one NMI is eaten for each NMI handler call, even for PCI SERR and
IOCHK NMIs. Because one NMI should be raised for each of them, eating
too many NMI will cause unnecessary unknown NMI.

The die value used in NMI sources are fixed accordingly.

The NMI handler in the patch is designed by Andi Kleen.


v2:

- Split process NMI reason (0x61) on non-BSP into another patch

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/kernel/cpu/perf_event.c  |    1 
 arch/x86/kernel/traps.c           |   80 +++++++++++++++++++-------------------
 arch/x86/oprofile/nmi_int.c       |    1 
 arch/x86/oprofile/nmi_timer_int.c |    2 
 drivers/char/ipmi/ipmi_watchdog.c |    2 
 drivers/watchdog/hpwdt.c          |    2 
 6 files changed, 43 insertions(+), 45 deletions(-)

--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1247,7 +1247,6 @@ perf_event_nmi_handler(struct notifier_b
 		return NOTIFY_DONE;
 
 	switch (cmd) {
-	case DIE_NMI:
 	case DIE_NMI_IPI:
 		break;
 	case DIE_NMIUNKNOWN:
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -354,9 +354,6 @@ io_check_error(unsigned char reason, str
 static notrace __kprobes void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
-	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
-			NOTIFY_STOP)
-		return;
 #ifdef CONFIG_MCA
 	/*
 	 * Might actually be able to figure out what the guilty party
@@ -385,51 +382,54 @@ static notrace __kprobes void default_do
 
 	cpu = smp_processor_id();
 
-	/* Only the BSP gets external NMIs from the system. */
-	if (!cpu)
-		reason = get_nmi_reason();
+	/*
+	 * CPU-specific NMI must be processed before non-CPU-specific
+	 * NMI, otherwise we may lose it, because the CPU-specific
+	 * NMI can not be detected/processed on other CPUs.
+	 */
 
-	if (!(reason & NMI_REASON_MASK)) {
-		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
-								== NOTIFY_STOP)
-			return;
+	/*
+	 * CPU-specific NMI: send to specific CPU or NMI sources must
+	 * be processed on specific CPU
+	 */
+	if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
+	    == NOTIFY_STOP)
+		return;
 
-#ifdef CONFIG_X86_LOCAL_APIC
-		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-							== NOTIFY_STOP)
-			return;
+	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
+	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+		return;
 
-#ifndef CONFIG_LOCKUP_DETECTOR
-		/*
-		 * Ok, so this is none of the documented NMI sources,
-		 * so it must be the NMI watchdog.
-		 */
-		if (nmi_watchdog_tick(regs, reason))
-			return;
-		if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
-			unknown_nmi_error(reason, regs);
-#else
-		unknown_nmi_error(reason, regs);
+	if (!cpu) {
+		reason = get_nmi_reason();
+		if (reason & NMI_REASON_MASK) {
+			if (reason & NMI_REASON_SERR)
+				pci_serr_error(reason, regs);
+			else if (reason & NMI_REASON_IOCHK)
+				io_check_error(reason, regs);
+#ifdef CONFIG_X86_32
+			/*
+			 * Reassert NMI in case it became active
+			 * meanwhile as it's edge-triggered:
+			 */
+			reassert_nmi();
 #endif
+			return;
+		}
+	}
 
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
+	if (nmi_watchdog_tick(regs, reason))
 		return;
-	}
-	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+	if (do_nmi_callback(regs, smp_processor_id()))
 		return;
-
-	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-	if (reason & NMI_REASON_SERR)
-		pci_serr_error(reason, regs);
-	if (reason & NMI_REASON_IOCHK)
-		io_check_error(reason, regs);
-#ifdef CONFIG_X86_32
-	/*
-	 * Reassert NMI in case it became active meanwhile
-	 * as it's edge-triggered:
-	 */
-	reassert_nmi();
 #endif
+
+	if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
+	    == NOTIFY_STOP)
+		return;
+
+	unknown_nmi_error(reason, regs);
 }
 
 dotraplinkage notrace __kprobes void
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -64,7 +64,6 @@ static int profile_exceptions_notify(str
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI:
 	case DIE_NMI_IPI:
 		if (ctr_running)
 			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -25,7 +25,7 @@ static int profile_timer_exceptions_noti
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI:
+	case DIE_NMI_IPI:
 		oprofile_add_sample(args->regs, 0);
 		ret = NOTIFY_STOP;
 		break;
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -1080,7 +1080,7 @@ ipmi_nmi(struct notifier_block *self, un
 {
 	struct die_args *args = data;
 
-	if (val != DIE_NMI)
+	if (val != DIE_NMIUNKNOWN)
 		return NOTIFY_OK;
 
 	/* Hack, if it's a memory or I/O error, ignore it. */
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -469,7 +469,7 @@ static int hpwdt_pretimeout(struct notif
 	unsigned long rom_pl;
 	static int die_nmi_called;
 
-	if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI)
+	if (ulReason != DIE_NMIUNKNOWN)
 		goto out;
 
 	if (!hpwdt_nmi_decoding)

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH -v2 5/7] Make NMI reason io port (0x61) can be processed on any CPU
  2010-09-27  0:57 [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
                   ` (2 preceding siblings ...)
  2010-09-27  0:57 ` [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler Huang Ying
@ 2010-09-27  0:57 ` Huang Ying
  2010-09-27  0:57 ` [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI Huang Ying
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 64+ messages in thread
From: Huang Ying @ 2010-09-27  0:57 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Huang Ying

In original NMI handler, NMI reason io port (0x61) is only processed
on BSP. This makes it impossible to hot-remove BSP. To solve the
issue, a raw spinlock is used to make the port can be processed on any
CPU.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/kernel/traps.c |   38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,12 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+/*
+ * Prevent NMI reason port (0x61) being accessed simultaneously, can
+ * only be used in NMI handler.
+ */
+static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -378,9 +384,6 @@ unknown_nmi_error(unsigned char reason,
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
-	int cpu;
-
-	cpu = smp_processor_id();
 
 	/*
 	 * CPU-specific NMI must be processed before non-CPU-specific
@@ -400,23 +403,24 @@ static notrace __kprobes void default_do
 	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
-	if (!cpu) {
-		reason = get_nmi_reason();
-		if (reason & NMI_REASON_MASK) {
-			if (reason & NMI_REASON_SERR)
-				pci_serr_error(reason, regs);
-			else if (reason & NMI_REASON_IOCHK)
-				io_check_error(reason, regs);
+	raw_spin_lock(&nmi_reason_lock);
+	reason = get_nmi_reason();
+	if (reason & NMI_REASON_MASK) {
+		if (reason & NMI_REASON_SERR)
+			pci_serr_error(reason, regs);
+		else if (reason & NMI_REASON_IOCHK)
+			io_check_error(reason, regs);
 #ifdef CONFIG_X86_32
-			/*
-			 * Reassert NMI in case it became active
-			 * meanwhile as it's edge-triggered:
-			 */
-			reassert_nmi();
+		/*
+		 * Reassert NMI in case it became active
+		 * meanwhile as it's edge-triggered:
+		 */
+		reassert_nmi();
 #endif
-			return;
-		}
+		raw_spin_unlock(&nmi_reason_lock);
+		return;
 	}
+	raw_spin_unlock(&nmi_reason_lock);
 
 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
 	if (nmi_watchdog_tick(regs, reason))

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-27  0:57 [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
                   ` (3 preceding siblings ...)
  2010-09-27  0:57 ` [PATCH -v2 5/7] Make NMI reason io port (0x61) can be processed on any CPU Huang Ying
@ 2010-09-27  0:57 ` Huang Ying
  2010-09-27 10:09   ` Robert Richter
  2010-09-27 15:38   ` Don Zickus
  2010-09-27  0:57 ` [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic Huang Ying
  2010-09-27 10:50 ` [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Robert Richter
  6 siblings, 2 replies; 64+ messages in thread
From: Huang Ying @ 2010-09-27  0:57 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Huang Ying

On some platforms, fatal hardware error may be notified via unknown
NMI.

For example, on some platform with APEI firmware first mode support,
firmware generates NMI for fatal error but without error record. The
unknown NMI should be treated as notification of fatal hardware
error. The unknown_nmi_for_hwerr is added for these platform, if it is
not zero, system will treat unknown NMI as notification of fatal
hardware error.

These platforms are identified via the presentation of APEI HEST or
some PCI ID of the host bridge. The PCI ID of host bridge instead of
DMI ID is used, so that the checking can be done based on the platform
type instead of motherboard. This should be simpler and sufficient.

The method to identify the platforms is designed by Andi Kleen.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/nmi.h |    1 
 arch/x86/kernel/Makefile   |    2 +
 arch/x86/kernel/hwerr.c    |   55 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/traps.c    |   10 ++++++++
 drivers/acpi/apei/hest.c   |    8 ++++++
 5 files changed, 76 insertions(+)
 create mode 100644 arch/x86/kernel/hwerr.c

--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -44,6 +44,7 @@ struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
+extern int unknown_nmi_for_hwerr;
 
 void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -118,6 +118,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION)
 
 obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 
+obj-y					+= hwerr.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
--- /dev/null
+++ b/arch/x86/kernel/hwerr.c
@@ -0,0 +1,55 @@
+/*
+ * Hardware error architecture dependent processing
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/nmi.h>
+
+/*
+ * On some platform, hardware errors may be notified via unknown
+ * NMI. These platform is identified via the PCI ID of host bridge.
+ *
+ * The PCI ID of host bridge instead of DMI ID is used, so that the
+ * checking can be done based on the platform instead of motherboard.
+ * This should be simpler and sufficient.
+ */
+static const
+struct pci_device_id unknown_nmi_for_hwerr_platform[] __initdata = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3406) },
+	{ 0, }
+};
+
+int __init check_unknown_nmi_for_hwerr(void)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev) {
+		if (pci_match_id(unknown_nmi_for_hwerr_platform, dev)) {
+			pr_info(
+"Host bridge is identified, will treat unknown NMI as hardware error!\n");
+			unknown_nmi_for_hwerr = 1;
+			break;
+		}
+	}
+
+	return 0;
+}
+late_initcall(check_unknown_nmi_for_hwerr);
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+int unknown_nmi_for_hwerr;
+
 /*
  * Prevent NMI reason port (0x61) being accessed simultaneously, can
  * only be used in NMI handler.
@@ -360,6 +362,14 @@ io_check_error(unsigned char reason, str
 static notrace __kprobes void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
+	/*
+	 * On some platforms, hardware errors may be notified via
+	 * unknown NMI
+	 */
+	if (unknown_nmi_for_hwerr)
+		panic("NMI for hardware error without error record: "
+		      "Not continuing");
+
 #ifdef CONFIG_MCA
 	/*
 	 * Might actually be able to figure out what the guilty party
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -35,6 +35,7 @@
 #include <linux/highmem.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/nmi.h>
 #include <acpi/apei.h>
 
 #include "apei-internal.h"
@@ -222,6 +223,13 @@ static int __init hest_init(void)
 	if (rc)
 		goto err;
 
+	/*
+	 * System has proper HEST should treat unknown NMI as fatal
+	 * hardware error notification
+	 */
+	pr_info("HEST is valid, will treat unknown NMI as hardware error!\n");
+	unknown_nmi_for_hwerr = 1;
+
 	rc = hest_ghes_dev_register(ghes_count);
 	if (rc)
 		goto err;

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-27  0:57 [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
                   ` (4 preceding siblings ...)
  2010-09-27  0:57 ` [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI Huang Ying
@ 2010-09-27  0:57 ` Huang Ying
  2010-09-27 10:44   ` Robert Richter
  2010-09-27 10:50 ` [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Robert Richter
  6 siblings, 1 reply; 64+ messages in thread
From: Huang Ying @ 2010-09-27  0:57 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Huang Ying

do_nmi_callback related logic is removed, because it is useless, just
adds code complexity.

unknown_nmi_panic sysctl is reserved to keep kernel ABI unchanged.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/nmi.h    |   10 +++++++++-
 arch/x86/kernel/apic/hw_nmi.c |    1 -
 arch/x86/kernel/apic/nmi.c    |   29 +----------------------------
 arch/x86/kernel/traps.c       |   16 ++++++++++------
 4 files changed, 20 insertions(+), 36 deletions(-)

--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -30,9 +30,17 @@ extern void setup_apic_nmi_watchdog(void
 extern void stop_apic_nmi_watchdog(void *);
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
 extern void cpu_nmi_set_wd_enabled(void);
 
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
+extern int nmi_watchdog_tick(struct pt_regs *regs);
+#else
+static inline int nmi_watchdog_tick(struct pt_regs *regs)
+{
+	return 0;
+}
+#endif
+
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;
 #define NMI_NONE	0
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -100,7 +100,6 @@ void acpi_nmi_disable(void) { return; }
 #endif
 atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
-int unknown_nmi_panic;
 void cpu_nmi_set_wd_enabled(void) { return; }
 void stop_apic_nmi_watchdog(void *unused) { return; }
 void setup_apic_nmi_watchdog(void *unused) { return; }
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -37,7 +37,6 @@
 
 #include <asm/mach_traps.h>
 
-int unknown_nmi_panic;
 int nmi_watchdog_enabled;
 
 /* For reliability, we're prepared to waste bits here. */
@@ -389,7 +388,7 @@ void touch_nmi_watchdog(void)
 EXPORT_SYMBOL(touch_nmi_watchdog);
 
 notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
+nmi_watchdog_tick(struct pt_regs *regs)
 {
 	/*
 	 * Since current_thread_info()-> is always on the stack, and we
@@ -483,23 +482,6 @@ static void disable_ioapic_nmi_watchdog(
 	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
 }
 
-static int __init setup_unknown_nmi_panic(char *str)
-{
-	unknown_nmi_panic = 1;
-	return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
-	unsigned char reason = get_nmi_reason();
-	char buf[64];
-
-	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-	die_nmi(buf, regs, 1); /* Always panic here */
-	return 0;
-}
-
 /*
  * proc handler for /proc/sys/kernel/nmi
  */
@@ -540,15 +522,6 @@ int proc_nmi_enabled(struct ctl_table *t
 
 #endif /* CONFIG_SYSCTL */
 
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
-	if (unknown_nmi_panic)
-		return unknown_nmi_panic_callback(regs, cpu);
-#endif
-	return 0;
-}
-
 void arch_trigger_all_cpu_backtrace(void)
 {
 	int i;
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
 static int ignore_nmis;
 
 int unknown_nmi_for_hwerr;
+int unknown_nmi_panic;
 
 /*
  * Prevent NMI reason port (0x61) being accessed simultaneously, can
@@ -308,6 +309,13 @@ gp_in_kernel:
 	die("general protection fault", regs, error_code);
 }
 
+static int __init setup_unknown_nmi_panic(char *str)
+{
+	unknown_nmi_panic = 1;
+	return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
 static notrace __kprobes void
 pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
@@ -385,7 +393,7 @@ unknown_nmi_error(unsigned char reason,
 			reason, smp_processor_id());
 
 	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-	if (panic_on_unrecovered_nmi)
+	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
 
 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -432,12 +440,8 @@ static notrace __kprobes void default_do
 	}
 	raw_spin_unlock(&nmi_reason_lock);
 
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
-	if (nmi_watchdog_tick(regs, reason))
-		return;
-	if (do_nmi_callback(regs, smp_processor_id()))
+	if (nmi_watchdog_tick(regs))
 		return;
-#endif
 
 	if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
 	    == NOTIFY_STOP)

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-27  0:57 ` [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error Huang Ying
@ 2010-09-27  8:01   ` Robert Richter
  2010-09-27  8:39     ` Huang Ying
  0 siblings, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-27  8:01 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 26.09.10 20:57:02, Huang Ying wrote:
> memory parity error is only valid for IBM PC-AT, newer machine use 7
> bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
> reported via MCE. So corresponding function name and kernel log string
> is changed.
> 
> But on some machines, PCI SERR line is still used to report memory
> errors. This is used by EDAC, so corresponding EDAC call is reserved.
> 
> 
> v2:
> 
> - EDAC call in pci_serr_error is reserved.
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> ---
>  arch/x86/include/asm/mach_traps.h |    6 +++---
>  arch/x86/kernel/traps.c           |   21 ++++++++++-----------
>  2 files changed, 13 insertions(+), 14 deletions(-)
> 
> --- a/arch/x86/include/asm/mach_traps.h
> +++ b/arch/x86/include/asm/mach_traps.h
> @@ -9,11 +9,11 @@
>  
>  #define NMI_REASON_PORT		0x61
>  
> -#define NMI_REASON_MEMPAR	0x80
> +#define NMI_REASON_SERR		0x80
>  #define NMI_REASON_IOCHK	0x40
> -#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
> +#define NMI_REASON_MASK		(NMI_REASON_SERR | NMI_REASON_IOCHK)
>  
> -#define NMI_REASON_CLEAR_MEMPAR	0x04
> +#define NMI_REASON_CLEAR_SERR	0x04

I already commented on this, patch #1 and #3 are basically the same in
most parts which should be merged. What remains then in this patch is
the modified printk() and the comment. Both could be added to #1 too
which is then some sort of code cleanup patch.

>  #define NMI_REASON_CLEAR_IOCHK	0x08
>  #define NMI_REASON_CLEAR_MASK	0x0f
>  
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -301,15 +301,14 @@ gp_in_kernel:
>  }
>  
>  static notrace __kprobes void
> -mem_parity_error(unsigned char reason, struct pt_regs *regs)
> +pci_serr_error(unsigned char reason, struct pt_regs *regs)
>  {
> -	printk(KERN_EMERG
> -		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
> -			reason, smp_processor_id());
> -
> -	printk(KERN_EMERG
> -		"You have some hardware problem, likely on the PCI bus.\n");
> +	printk(KERN_EMERG "NMI: PCI system error (SERR).\n");

You should keep reporting the cpu id to identify the affected node and
also the reason.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-27  8:01   ` Robert Richter
@ 2010-09-27  8:39     ` Huang Ying
  2010-09-27  9:00       ` Robert Richter
  0 siblings, 1 reply; 64+ messages in thread
From: Huang Ying @ 2010-09-27  8:39 UTC (permalink / raw)
  To: Robert Richter
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Mon, 2010-09-27 at 16:01 +0800, Robert Richter wrote:
> On 26.09.10 20:57:02, Huang Ying wrote:
> > memory parity error is only valid for IBM PC-AT, newer machine use 7
> > bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
> > reported via MCE. So corresponding function name and kernel log string
> > is changed.
> > 
> > But on some machines, PCI SERR line is still used to report memory
> > errors. This is used by EDAC, so corresponding EDAC call is reserved.
> > 
> > 
> > v2:
> > 
> > - EDAC call in pci_serr_error is reserved.
> > 
> > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > ---
> >  arch/x86/include/asm/mach_traps.h |    6 +++---
> >  arch/x86/kernel/traps.c           |   21 ++++++++++-----------
> >  2 files changed, 13 insertions(+), 14 deletions(-)
> > 
> > --- a/arch/x86/include/asm/mach_traps.h
> > +++ b/arch/x86/include/asm/mach_traps.h
> > @@ -9,11 +9,11 @@
> >  
> >  #define NMI_REASON_PORT		0x61
> >  
> > -#define NMI_REASON_MEMPAR	0x80
> > +#define NMI_REASON_SERR		0x80
> >  #define NMI_REASON_IOCHK	0x40
> > -#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
> > +#define NMI_REASON_MASK		(NMI_REASON_SERR | NMI_REASON_IOCHK)
> >  
> > -#define NMI_REASON_CLEAR_MEMPAR	0x04
> > +#define NMI_REASON_CLEAR_SERR	0x04
> 
> I already commented on this, patch #1 and #3 are basically the same in
> most parts which should be merged. What remains then in this patch is
> the modified printk() and the comment. Both could be added to #1 too
> which is then some sort of code cleanup patch.

Don thinks it is Ok to keep 2 patches.

> >  #define NMI_REASON_CLEAR_IOCHK	0x08
> >  #define NMI_REASON_CLEAR_MASK	0x0f
> >  
> > --- a/arch/x86/kernel/traps.c
> > +++ b/arch/x86/kernel/traps.c
> > @@ -301,15 +301,14 @@ gp_in_kernel:
> >  }
> >  
> >  static notrace __kprobes void
> > -mem_parity_error(unsigned char reason, struct pt_regs *regs)
> > +pci_serr_error(unsigned char reason, struct pt_regs *regs)
> >  {
> > -	printk(KERN_EMERG
> > -		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
> > -			reason, smp_processor_id());
> > -
> > -	printk(KERN_EMERG
> > -		"You have some hardware problem, likely on the PCI bus.\n");
> > +	printk(KERN_EMERG "NMI: PCI system error (SERR).\n");
> 
> You should keep reporting the cpu id to identify the affected node and
> also the reason.

Ok. I will add CPU ID in message. Because we know the reason, I don't
think we need the reason in message.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-27  8:39     ` Huang Ying
@ 2010-09-27  9:00       ` Robert Richter
  2010-09-27 15:33         ` Don Zickus
  0 siblings, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-27  9:00 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 27.09.10 04:39:20, Huang Ying wrote:
> On Mon, 2010-09-27 at 16:01 +0800, Robert Richter wrote:
> > On 26.09.10 20:57:02, Huang Ying wrote:
> > > memory parity error is only valid for IBM PC-AT, newer machine use 7
> > > bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
> > > reported via MCE. So corresponding function name and kernel log string
> > > is changed.
> > > 
> > > But on some machines, PCI SERR line is still used to report memory
> > > errors. This is used by EDAC, so corresponding EDAC call is reserved.
> > > 
> > > 
> > > v2:
> > > 
> > > - EDAC call in pci_serr_error is reserved.
> > > 
> > > Signed-off-by: Huang Ying <ying.huang@intel.com>
> > > ---
> > >  arch/x86/include/asm/mach_traps.h |    6 +++---
> > >  arch/x86/kernel/traps.c           |   21 ++++++++++-----------
> > >  2 files changed, 13 insertions(+), 14 deletions(-)
> > > 
> > > --- a/arch/x86/include/asm/mach_traps.h
> > > +++ b/arch/x86/include/asm/mach_traps.h
> > > @@ -9,11 +9,11 @@
> > >  
> > >  #define NMI_REASON_PORT		0x61
> > >  
> > > -#define NMI_REASON_MEMPAR	0x80
> > > +#define NMI_REASON_SERR		0x80
> > >  #define NMI_REASON_IOCHK	0x40
> > > -#define NMI_REASON_MASK		(NMI_REASON_MEMPAR | NMI_REASON_IOCHK)
> > > +#define NMI_REASON_MASK		(NMI_REASON_SERR | NMI_REASON_IOCHK)
> > >  
> > > -#define NMI_REASON_CLEAR_MEMPAR	0x04
> > > +#define NMI_REASON_CLEAR_SERR	0x04
> > 
> > I already commented on this, patch #1 and #3 are basically the same in
> > most parts which should be merged. What remains then in this patch is
> > the modified printk() and the comment. Both could be added to #1 too
> > which is then some sort of code cleanup patch.
> 
> Don thinks it is Ok to keep 2 patches.

I don't like reviewing new changes which are thrown away with the next
patch. I review things twice and it is much harder to see what really
changed then. Also we should have a clean history.

And with git it is fairly easy to join patches.

> 
> > >  #define NMI_REASON_CLEAR_IOCHK	0x08
> > >  #define NMI_REASON_CLEAR_MASK	0x0f
> > >  
> > > --- a/arch/x86/kernel/traps.c
> > > +++ b/arch/x86/kernel/traps.c
> > > @@ -301,15 +301,14 @@ gp_in_kernel:
> > >  }
> > >  
> > >  static notrace __kprobes void
> > > -mem_parity_error(unsigned char reason, struct pt_regs *regs)
> > > +pci_serr_error(unsigned char reason, struct pt_regs *regs)
> > >  {
> > > -	printk(KERN_EMERG
> > > -		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
> > > -			reason, smp_processor_id());
> > > -
> > > -	printk(KERN_EMERG
> > > -		"You have some hardware problem, likely on the PCI bus.\n");
> > > +	printk(KERN_EMERG "NMI: PCI system error (SERR).\n");
> > 
> > You should keep reporting the cpu id to identify the affected node and
> > also the reason.
> 
> Ok. I will add CPU ID in message. Because we know the reason, I don't
> think we need the reason in message.

You only know that bit 7 is set, not the rest. As this is an error
message we should provide as much information as possible.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-27  0:57 ` [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler Huang Ying
@ 2010-09-27  9:41   ` Robert Richter
  2010-09-27 12:39     ` huang ying
  0 siblings, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-27  9:41 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 26.09.10 20:57:03, Huang Ying wrote:
> The original NMI handler is quite outdated in many aspects. This patch
> try to fix it.
> 
> The order to process the NMI sources are changed as follow:
> 
> notify_die(DIE_NMI_IPI);
> notify_die(DIE_NMI);
> /* process io port 0x61 */
> nmi_watchdog_touch();
> notify_die(DIE_NMIUNKNOWN);
> unknown_nmi();
> 
> DIE_NMI_IPI is used to process CPU specific NMI sources, such as perf
> event, oprofile, crash IPI, etc. While DIE_NMI is used to process
> non-CPU-specific NMI sources, such as APEI (ACPI Platform Error
> Interface) GHES (Generic Hardware Error Source), etc. Non-CPU-specific
> NMI sources can be processed on any CPU,
> 
> DIE_NMI_IPI must be processed before DIE_NMI. For example, perf event
> trigger a NMI on CPU 1, at the same time, APEI GHES trigger another
> NMI on CPU 0. If DIE_NMI is processed before DIE_NMI_IPI, it is
> possible that APEI GHES is processed on CPU 1, while unknown NMI is
> gotten on CPU 0.

I think macro names DIE_NMI_IPI and DIE_NMI should be swapped as
e.g. the perf nmi is actually local and non-IPI.

We might consider to rework the IPI thing completly, but may be in a
follow-on patch.

> 
> In this new order of processing, performance sensitive NMI sources
> such as oprofile or perf event will have better performance because
> the time consuming IO port reading is done after them.
> 
> Only one NMI is eaten for each NMI handler call, even for PCI SERR and
> IOCHK NMIs. Because one NMI should be raised for each of them, eating
> too many NMI will cause unnecessary unknown NMI.
> 
> The die value used in NMI sources are fixed accordingly.
> 
> The NMI handler in the patch is designed by Andi Kleen.
> 
> 
> v2:
> 
> - Split process NMI reason (0x61) on non-BSP into another patch
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event.c  |    1 
>  arch/x86/kernel/traps.c           |   80 +++++++++++++++++++-------------------
>  arch/x86/oprofile/nmi_int.c       |    1 
>  arch/x86/oprofile/nmi_timer_int.c |    2 
>  drivers/char/ipmi/ipmi_watchdog.c |    2 
>  drivers/watchdog/hpwdt.c          |    2 
>  6 files changed, 43 insertions(+), 45 deletions(-)
> 
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1247,7 +1247,6 @@ perf_event_nmi_handler(struct notifier_b
>  		return NOTIFY_DONE;
>  
>  	switch (cmd) {
> -	case DIE_NMI:
>  	case DIE_NMI_IPI:

See my comment above. Same is true for oprofile and some other
handlers below. It isn't an IPI and should be case DIE_NMI: instead.

>  		break;
>  	case DIE_NMIUNKNOWN:
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -354,9 +354,6 @@ io_check_error(unsigned char reason, str
>  static notrace __kprobes void
>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
>  {
> -	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
> -			NOTIFY_STOP)
> -		return;
>  #ifdef CONFIG_MCA
>  	/*
>  	 * Might actually be able to figure out what the guilty party
> @@ -385,51 +382,54 @@ static notrace __kprobes void default_do
>  
>  	cpu = smp_processor_id();

This should go to if (!cpu) and maybe we drop variable cpu completly.

>  
> -	/* Only the BSP gets external NMIs from the system. */
> -	if (!cpu)
> -		reason = get_nmi_reason();
> +	/*
> +	 * CPU-specific NMI must be processed before non-CPU-specific
> +	 * NMI, otherwise we may lose it, because the CPU-specific
> +	 * NMI can not be detected/processed on other CPUs.
> +	 */
>  
> -	if (!(reason & NMI_REASON_MASK)) {
> -		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
> -								== NOTIFY_STOP)
> -			return;
> +	/*
> +	 * CPU-specific NMI: send to specific CPU or NMI sources must
> +	 * be processed on specific CPU
> +	 */
> +	if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
> +	    == NOTIFY_STOP)
> +		return;
>  
> -#ifdef CONFIG_X86_LOCAL_APIC

Are you sure we may drop this option?

> -		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
> -							== NOTIFY_STOP)
> -			return;
> +	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
> +	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> +		return;

As said, IPI and non-IPI are mixed up.

>  
> -#ifndef CONFIG_LOCKUP_DETECTOR
> -		/*
> -		 * Ok, so this is none of the documented NMI sources,
> -		 * so it must be the NMI watchdog.
> -		 */
> -		if (nmi_watchdog_tick(regs, reason))
> -			return;
> -		if (!do_nmi_callback(regs, cpu))
> -#endif /* !CONFIG_LOCKUP_DETECTOR */
> -			unknown_nmi_error(reason, regs);
> -#else
> -		unknown_nmi_error(reason, regs);
> +	if (!cpu) {
> +		reason = get_nmi_reason();
> +		if (reason & NMI_REASON_MASK) {
> +			if (reason & NMI_REASON_SERR)
> +				pci_serr_error(reason, regs);
> +			else if (reason & NMI_REASON_IOCHK)
> +				io_check_error(reason, regs);
> +#ifdef CONFIG_X86_32
> +			/*
> +			 * Reassert NMI in case it became active
> +			 * meanwhile as it's edge-triggered:
> +			 */
> +			reassert_nmi();
>  #endif
> +			return;
> +		}
> +	}
>  
> +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> +	if (nmi_watchdog_tick(regs, reason))
>  		return;
> -	}
> -	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
> +	if (do_nmi_callback(regs, smp_processor_id()))
>  		return;
> -
> -	/* AK: following checks seem to be broken on modern chipsets. FIXME */
> -	if (reason & NMI_REASON_SERR)
> -		pci_serr_error(reason, regs);
> -	if (reason & NMI_REASON_IOCHK)
> -		io_check_error(reason, regs);
> -#ifdef CONFIG_X86_32
> -	/*
> -	 * Reassert NMI in case it became active meanwhile
> -	 * as it's edge-triggered:
> -	 */
> -	reassert_nmi();
>  #endif
> +
> +	if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
> +	    == NOTIFY_STOP)
> +		return;
> +
> +	unknown_nmi_error(reason, regs);
>  }
>  
>  dotraplinkage notrace __kprobes void
> --- a/arch/x86/oprofile/nmi_int.c
> +++ b/arch/x86/oprofile/nmi_int.c
> @@ -64,7 +64,6 @@ static int profile_exceptions_notify(str
>  	int ret = NOTIFY_DONE;
>  
>  	switch (val) {
> -	case DIE_NMI:
>  	case DIE_NMI_IPI:
>  		if (ctr_running)
>  			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
> --- a/arch/x86/oprofile/nmi_timer_int.c
> +++ b/arch/x86/oprofile/nmi_timer_int.c
> @@ -25,7 +25,7 @@ static int profile_timer_exceptions_noti
>  	int ret = NOTIFY_DONE;
>  
>  	switch (val) {
> -	case DIE_NMI:
> +	case DIE_NMI_IPI:
>  		oprofile_add_sample(args->regs, 0);
>  		ret = NOTIFY_STOP;
>  		break;
> --- a/drivers/char/ipmi/ipmi_watchdog.c
> +++ b/drivers/char/ipmi/ipmi_watchdog.c
> @@ -1080,7 +1080,7 @@ ipmi_nmi(struct notifier_block *self, un
>  {
>  	struct die_args *args = data;
>  
> -	if (val != DIE_NMI)
> +	if (val != DIE_NMIUNKNOWN)

As this function handles an nmi and not an unknown nmi, we should keep
DIE_NMI here and better modify priorities in ipmi_nmi_handler.

>  		return NOTIFY_OK;
>  
>  	/* Hack, if it's a memory or I/O error, ignore it. */
> --- a/drivers/watchdog/hpwdt.c
> +++ b/drivers/watchdog/hpwdt.c
> @@ -469,7 +469,7 @@ static int hpwdt_pretimeout(struct notif
>  	unsigned long rom_pl;
>  	static int die_nmi_called;
>  
> -	if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI)
> +	if (ulReason != DIE_NMIUNKNOWN)

Same here, but dropping DIE_NMI_IPI.

-Robert

>  		goto out;
>  
>  	if (!hpwdt_nmi_decoding)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-27  0:57 ` [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI Huang Ying
@ 2010-09-27 10:09   ` Robert Richter
  2010-09-27 12:47     ` huang ying
  2010-09-27 15:38   ` Don Zickus
  1 sibling, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-27 10:09 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 26.09.10 20:57:05, Huang Ying wrote:
> On some platforms, fatal hardware error may be notified via unknown
> NMI.
> 
> For example, on some platform with APEI firmware first mode support,
> firmware generates NMI for fatal error but without error record. The
> unknown NMI should be treated as notification of fatal hardware
> error. The unknown_nmi_for_hwerr is added for these platform, if it is
> not zero, system will treat unknown NMI as notification of fatal
> hardware error.
> 
> These platforms are identified via the presentation of APEI HEST or
> some PCI ID of the host bridge. The PCI ID of host bridge instead of
> DMI ID is used, so that the checking can be done based on the platform
> type instead of motherboard. This should be simpler and sufficient.
> 
> The method to identify the platforms is designed by Andi Kleen.
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> ---
>  arch/x86/include/asm/nmi.h |    1 
>  arch/x86/kernel/Makefile   |    2 +
>  arch/x86/kernel/hwerr.c    |   55 +++++++++++++++++++++++++++++++++++++++++++++

Instead of creating this file the code should be implemented in

 arch/x86/kernel/cpu/intel.c

Similar AMD NB code is implemented in amd.c and k8.c.

>  arch/x86/kernel/traps.c    |   10 ++++++++
>  drivers/acpi/apei/hest.c   |    8 ++++++
>  5 files changed, 76 insertions(+)
>  create mode 100644 arch/x86/kernel/hwerr.c
> 
> --- a/arch/x86/include/asm/nmi.h
> +++ b/arch/x86/include/asm/nmi.h
> @@ -44,6 +44,7 @@ struct ctl_table;
>  extern int proc_nmi_enabled(struct ctl_table *, int ,
>  			void __user *, size_t *, loff_t *);
>  extern int unknown_nmi_panic;
> +extern int unknown_nmi_for_hwerr;
>  
>  void arch_trigger_all_cpu_backtrace(void);
>  #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -118,6 +118,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION)
>  
>  obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
>  
> +obj-y					+= hwerr.o
> +
>  ###
>  # 64 bit specific files
>  ifeq ($(CONFIG_X86_64),y)
> --- /dev/null
> +++ b/arch/x86/kernel/hwerr.c
> @@ -0,0 +1,55 @@
> +/*
> + * Hardware error architecture dependent processing
> + *
> + * Copyright 2010 Intel Corp.
> + *   Author: Huang Ying <ying.huang@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/pci.h>
> +#include <linux/init.h>
> +#include <linux/nmi.h>
> +
> +/*
> + * On some platform, hardware errors may be notified via unknown
> + * NMI. These platform is identified via the PCI ID of host bridge.
> + *
> + * The PCI ID of host bridge instead of DMI ID is used, so that the
> + * checking can be done based on the platform instead of motherboard.
> + * This should be simpler and sufficient.
> + */
> +static const
> +struct pci_device_id unknown_nmi_for_hwerr_platform[] __initdata = {
> +	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3406) },
> +	{ 0, }
> +};
> +
> +int __init check_unknown_nmi_for_hwerr(void)
> +{
> +	struct pci_dev *dev = NULL;
> +
> +	for_each_pci_dev(dev) {
> +		if (pci_match_id(unknown_nmi_for_hwerr_platform, dev)) {
> +			pr_info(
> +"Host bridge is identified, will treat unknown NMI as hardware error!\n");
> +			unknown_nmi_for_hwerr = 1;
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +late_initcall(check_unknown_nmi_for_hwerr);

Maybe you can use early pci functions like read_pci_config() to avoid
late init.

> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
>  
>  static int ignore_nmis;
>  
> +int unknown_nmi_for_hwerr;

If it is an nmi for hwerr, it is no longer an unknown nmi. So we
should drop 'unknow' in the naming.

> +
>  /*
>   * Prevent NMI reason port (0x61) being accessed simultaneously, can
>   * only be used in NMI handler.
> @@ -360,6 +362,14 @@ io_check_error(unsigned char reason, str
>  static notrace __kprobes void
>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
>  {
> +	/*
> +	 * On some platforms, hardware errors may be notified via
> +	 * unknown NMI
> +	 */
> +	if (unknown_nmi_for_hwerr)
> +		panic("NMI for hardware error without error record: "
> +		      "Not continuing");
> +

Instead of checking this flag you should implement and register an nmi
handler for this case.

>  #ifdef CONFIG_MCA
>  	/*
>  	 * Might actually be able to figure out what the guilty party
> --- a/drivers/acpi/apei/hest.c
> +++ b/drivers/acpi/apei/hest.c
> @@ -35,6 +35,7 @@
>  #include <linux/highmem.h>
>  #include <linux/io.h>
>  #include <linux/platform_device.h>
> +#include <linux/nmi.h>
>  #include <acpi/apei.h>
>  
>  #include "apei-internal.h"
> @@ -222,6 +223,13 @@ static int __init hest_init(void)
>  	if (rc)
>  		goto err;
>  
> +	/*
> +	 * System has proper HEST should treat unknown NMI as fatal
> +	 * hardware error notification
> +	 */
> +	pr_info("HEST is valid, will treat unknown NMI as hardware error!\n");
> +	unknown_nmi_for_hwerr = 1;

Same here, instead register the nmi handler.

-Robert

> +
>  	rc = hest_ghes_dev_register(ghes_count);
>  	if (rc)
>  		goto err;
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-27  0:57 ` [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic Huang Ying
@ 2010-09-27 10:44   ` Robert Richter
  2010-09-27 12:56     ` huang ying
  0 siblings, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-27 10:44 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 26.09.10 20:57:06, Huang Ying wrote:
> do_nmi_callback related logic is removed, because it is useless, just
> adds code complexity.
> 
> unknown_nmi_panic sysctl is reserved to keep kernel ABI unchanged.

"unknown_nmi_panic" and "panic_on_unrecovered_nmi" are different. See
below and also Documentation/sysctl/kernel.txt.

> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> ---
>  arch/x86/include/asm/nmi.h    |   10 +++++++++-
>  arch/x86/kernel/apic/hw_nmi.c |    1 -
>  arch/x86/kernel/apic/nmi.c    |   29 +----------------------------
>  arch/x86/kernel/traps.c       |   16 ++++++++++------
>  4 files changed, 20 insertions(+), 36 deletions(-)
> 
> --- a/arch/x86/include/asm/nmi.h
> +++ b/arch/x86/include/asm/nmi.h
> @@ -30,9 +30,17 @@ extern void setup_apic_nmi_watchdog(void
>  extern void stop_apic_nmi_watchdog(void *);
>  extern void disable_timer_nmi_watchdog(void);
>  extern void enable_timer_nmi_watchdog(void);
> -extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
>  extern void cpu_nmi_set_wd_enabled(void);
>  
> +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> +extern int nmi_watchdog_tick(struct pt_regs *regs);
> +#else
> +static inline int nmi_watchdog_tick(struct pt_regs *regs)
> +{
> +	return 0;
> +}
> +#endif
> +
>  extern atomic_t nmi_active;
>  extern unsigned int nmi_watchdog;
>  #define NMI_NONE	0
> --- a/arch/x86/kernel/apic/hw_nmi.c
> +++ b/arch/x86/kernel/apic/hw_nmi.c
> @@ -100,7 +100,6 @@ void acpi_nmi_disable(void) { return; }
>  #endif
>  atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
>  EXPORT_SYMBOL(nmi_active);
> -int unknown_nmi_panic;
>  void cpu_nmi_set_wd_enabled(void) { return; }
>  void stop_apic_nmi_watchdog(void *unused) { return; }
>  void setup_apic_nmi_watchdog(void *unused) { return; }
> --- a/arch/x86/kernel/apic/nmi.c
> +++ b/arch/x86/kernel/apic/nmi.c
> @@ -37,7 +37,6 @@
>  
>  #include <asm/mach_traps.h>
>  
> -int unknown_nmi_panic;
>  int nmi_watchdog_enabled;
>  
>  /* For reliability, we're prepared to waste bits here. */
> @@ -389,7 +388,7 @@ void touch_nmi_watchdog(void)
>  EXPORT_SYMBOL(touch_nmi_watchdog);
>  
>  notrace __kprobes int
> -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
> +nmi_watchdog_tick(struct pt_regs *regs)
>  {
>  	/*
>  	 * Since current_thread_info()-> is always on the stack, and we
> @@ -483,23 +482,6 @@ static void disable_ioapic_nmi_watchdog(
>  	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
>  }
>  
> -static int __init setup_unknown_nmi_panic(char *str)
> -{
> -	unknown_nmi_panic = 1;
> -	return 1;
> -}
> -__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
> -
> -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
> -{
> -	unsigned char reason = get_nmi_reason();
> -	char buf[64];
> -
> -	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
> -	die_nmi(buf, regs, 1); /* Always panic here */
> -	return 0;

You are dropping this code that is different to panic().

> -}
> -
>  /*
>   * proc handler for /proc/sys/kernel/nmi
>   */
> @@ -540,15 +522,6 @@ int proc_nmi_enabled(struct ctl_table *t
>  
>  #endif /* CONFIG_SYSCTL */
>  
> -int do_nmi_callback(struct pt_regs *regs, int cpu)
> -{
> -#ifdef CONFIG_SYSCTL
> -	if (unknown_nmi_panic)
> -		return unknown_nmi_panic_callback(regs, cpu);
> -#endif
> -	return 0;
> -}
> -
>  void arch_trigger_all_cpu_backtrace(void)
>  {
>  	int i;
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -84,6 +84,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
>  static int ignore_nmis;
>  
>  int unknown_nmi_for_hwerr;
> +int unknown_nmi_panic;
>  
>  /*
>   * Prevent NMI reason port (0x61) being accessed simultaneously, can
> @@ -308,6 +309,13 @@ gp_in_kernel:
>  	die("general protection fault", regs, error_code);
>  }
>  
> +static int __init setup_unknown_nmi_panic(char *str)
> +{
> +	unknown_nmi_panic = 1;

We should register a handler here for this case instead of using
global variables.

> +	return 1;
> +}
> +__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
> +
>  static notrace __kprobes void
>  pci_serr_error(unsigned char reason, struct pt_regs *regs)
>  {
> @@ -385,7 +393,7 @@ unknown_nmi_error(unsigned char reason,
>  			reason, smp_processor_id());
>  
>  	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
> -	if (panic_on_unrecovered_nmi)
> +	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
>  		panic("NMI: Not continuing");

There is different behavior, we should rather keep this code and add a
handler in the unknown_nmi_panic case with:

		die_nmi(...)
		...

-Robert

>  
>  	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
> @@ -432,12 +440,8 @@ static notrace __kprobes void default_do
>  	}
>  	raw_spin_unlock(&nmi_reason_lock);
>  
> -#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> -	if (nmi_watchdog_tick(regs, reason))
> -		return;
> -	if (do_nmi_callback(regs, smp_processor_id()))
> +	if (nmi_watchdog_tick(regs))
>  		return;
> -#endif
>  
>  	if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
>  	    == NOTIFY_STOP)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-27  0:57 [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
                   ` (5 preceding siblings ...)
  2010-09-27  0:57 ` [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic Huang Ying
@ 2010-09-27 10:50 ` Robert Richter
  2010-09-27 15:29   ` Don Zickus
  6 siblings, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-27 10:50 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 26.09.10 20:57:00, Huang Ying wrote:
> Replace the NMI related magic numbers with symbol constants.
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>

Reviewed-by: Robert Richter <robert.richter@amd.com>

See my comments to patches 3, 4, 6, 7.

-Robert 

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-27  9:41   ` Robert Richter
@ 2010-09-27 12:39     ` huang ying
  2010-09-27 13:25       ` Robert Richter
  0 siblings, 1 reply; 64+ messages in thread
From: huang ying @ 2010-09-27 12:39 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

Hi, Robert,

On Mon, Sep 27, 2010 at 5:41 PM, Robert Richter <robert.richter@amd.com> wrote:
> On 26.09.10 20:57:03, Huang Ying wrote:
>> The original NMI handler is quite outdated in many aspects. This patch
>> try to fix it.
>>
>> The order to process the NMI sources are changed as follow:
>>
>> notify_die(DIE_NMI_IPI);
>> notify_die(DIE_NMI);
>> /* process io port 0x61 */
>> nmi_watchdog_touch();
>> notify_die(DIE_NMIUNKNOWN);
>> unknown_nmi();
>>
>> DIE_NMI_IPI is used to process CPU specific NMI sources, such as perf
>> event, oprofile, crash IPI, etc. While DIE_NMI is used to process
>> non-CPU-specific NMI sources, such as APEI (ACPI Platform Error
>> Interface) GHES (Generic Hardware Error Source), etc. Non-CPU-specific
>> NMI sources can be processed on any CPU,
>>
>> DIE_NMI_IPI must be processed before DIE_NMI. For example, perf event
>> trigger a NMI on CPU 1, at the same time, APEI GHES trigger another
>> NMI on CPU 0. If DIE_NMI is processed before DIE_NMI_IPI, it is
>> possible that APEI GHES is processed on CPU 1, while unknown NMI is
>> gotten on CPU 0.
>
> I think macro names DIE_NMI_IPI and DIE_NMI should be swapped as
> e.g. the perf nmi is actually local and non-IPI.

DIE_NMI_IPI may be not a good name for perf, but DIE_NMI is a even
worse name for perf! DIE_NMI is originally used for IOCHK and PCI SERR
NMI.

> We might consider to rework the IPI thing completly, but may be in a
> follow-on patch.
>
>>
>> In this new order of processing, performance sensitive NMI sources
>> such as oprofile or perf event will have better performance because
>> the time consuming IO port reading is done after them.
>>
>> Only one NMI is eaten for each NMI handler call, even for PCI SERR and
>> IOCHK NMIs. Because one NMI should be raised for each of them, eating
>> too many NMI will cause unnecessary unknown NMI.
>>
>> The die value used in NMI sources are fixed accordingly.
>>
>> The NMI handler in the patch is designed by Andi Kleen.
>>
>>
>> v2:
>>
>> - Split process NMI reason (0x61) on non-BSP into another patch
>>
>> Signed-off-by: Huang Ying <ying.huang@intel.com>
>> ---
>>  arch/x86/kernel/cpu/perf_event.c  |    1
>>  arch/x86/kernel/traps.c           |   80 +++++++++++++++++++-------------------
>>  arch/x86/oprofile/nmi_int.c       |    1
>>  arch/x86/oprofile/nmi_timer_int.c |    2
>>  drivers/char/ipmi/ipmi_watchdog.c |    2
>>  drivers/watchdog/hpwdt.c          |    2
>>  6 files changed, 43 insertions(+), 45 deletions(-)
>>
>> --- a/arch/x86/kernel/cpu/perf_event.c
>> +++ b/arch/x86/kernel/cpu/perf_event.c
>> @@ -1247,7 +1247,6 @@ perf_event_nmi_handler(struct notifier_b
>>               return NOTIFY_DONE;
>>
>>       switch (cmd) {
>> -     case DIE_NMI:
>>       case DIE_NMI_IPI:
>
> See my comment above. Same is true for oprofile and some other
> handlers below. It isn't an IPI and should be case DIE_NMI: instead.
>
>>               break;
>>       case DIE_NMIUNKNOWN:
>> --- a/arch/x86/kernel/traps.c
>> +++ b/arch/x86/kernel/traps.c
>> @@ -354,9 +354,6 @@ io_check_error(unsigned char reason, str
>>  static notrace __kprobes void
>>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
>>  {
>> -     if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
>> -                     NOTIFY_STOP)
>> -             return;
>>  #ifdef CONFIG_MCA
>>       /*
>>        * Might actually be able to figure out what the guilty party
>> @@ -385,51 +382,54 @@ static notrace __kprobes void default_do
>>
>>       cpu = smp_processor_id();
>
> This should go to if (!cpu) and maybe we drop variable cpu completly.

The variable cpu is dropped in 5/7.

>>
>> -     /* Only the BSP gets external NMIs from the system. */
>> -     if (!cpu)
>> -             reason = get_nmi_reason();
>> +     /*
>> +      * CPU-specific NMI must be processed before non-CPU-specific
>> +      * NMI, otherwise we may lose it, because the CPU-specific
>> +      * NMI can not be detected/processed on other CPUs.
>> +      */
>>
>> -     if (!(reason & NMI_REASON_MASK)) {
>> -             if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
>> -                                                             == NOTIFY_STOP)
>> -                     return;
>> +     /*
>> +      * CPU-specific NMI: send to specific CPU or NMI sources must
>> +      * be processed on specific CPU
>> +      */
>> +     if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
>> +         == NOTIFY_STOP)
>> +             return;
>>
>> -#ifdef CONFIG_X86_LOCAL_APIC
>
> Are you sure we may drop this option?

Yes. DIE_NMI is used for non-CPU-specific NMI sources now.

>> -             if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
>> -                                                     == NOTIFY_STOP)
>> -                     return;
>> +     /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
>> +     if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
>> +             return;
>
> As said, IPI and non-IPI are mixed up.

They are processed one after the other.

>>
>> -#ifndef CONFIG_LOCKUP_DETECTOR
>> -             /*
>> -              * Ok, so this is none of the documented NMI sources,
>> -              * so it must be the NMI watchdog.
>> -              */
>> -             if (nmi_watchdog_tick(regs, reason))
>> -                     return;
>> -             if (!do_nmi_callback(regs, cpu))
>> -#endif /* !CONFIG_LOCKUP_DETECTOR */
>> -                     unknown_nmi_error(reason, regs);
>> -#else
>> -             unknown_nmi_error(reason, regs);
>> +     if (!cpu) {
>> +             reason = get_nmi_reason();
>> +             if (reason & NMI_REASON_MASK) {
>> +                     if (reason & NMI_REASON_SERR)
>> +                             pci_serr_error(reason, regs);
>> +                     else if (reason & NMI_REASON_IOCHK)
>> +                             io_check_error(reason, regs);
>> +#ifdef CONFIG_X86_32
>> +                     /*
>> +                      * Reassert NMI in case it became active
>> +                      * meanwhile as it's edge-triggered:
>> +                      */
>> +                     reassert_nmi();
>>  #endif
>> +                     return;
>> +             }
>> +     }
>>
>> +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
>> +     if (nmi_watchdog_tick(regs, reason))
>>               return;
>> -     }
>> -     if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
>> +     if (do_nmi_callback(regs, smp_processor_id()))
>>               return;
>> -
>> -     /* AK: following checks seem to be broken on modern chipsets. FIXME */
>> -     if (reason & NMI_REASON_SERR)
>> -             pci_serr_error(reason, regs);
>> -     if (reason & NMI_REASON_IOCHK)
>> -             io_check_error(reason, regs);
>> -#ifdef CONFIG_X86_32
>> -     /*
>> -      * Reassert NMI in case it became active meanwhile
>> -      * as it's edge-triggered:
>> -      */
>> -     reassert_nmi();
>>  #endif
>> +
>> +     if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
>> +         == NOTIFY_STOP)
>> +             return;
>> +
>> +     unknown_nmi_error(reason, regs);
>>  }
>>
>>  dotraplinkage notrace __kprobes void
>> --- a/arch/x86/oprofile/nmi_int.c
>> +++ b/arch/x86/oprofile/nmi_int.c
>> @@ -64,7 +64,6 @@ static int profile_exceptions_notify(str
>>       int ret = NOTIFY_DONE;
>>
>>       switch (val) {
>> -     case DIE_NMI:
>>       case DIE_NMI_IPI:
>>               if (ctr_running)
>>                       model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
>> --- a/arch/x86/oprofile/nmi_timer_int.c
>> +++ b/arch/x86/oprofile/nmi_timer_int.c
>> @@ -25,7 +25,7 @@ static int profile_timer_exceptions_noti
>>       int ret = NOTIFY_DONE;
>>
>>       switch (val) {
>> -     case DIE_NMI:
>> +     case DIE_NMI_IPI:
>>               oprofile_add_sample(args->regs, 0);
>>               ret = NOTIFY_STOP;
>>               break;
>> --- a/drivers/char/ipmi/ipmi_watchdog.c
>> +++ b/drivers/char/ipmi/ipmi_watchdog.c
>> @@ -1080,7 +1080,7 @@ ipmi_nmi(struct notifier_block *self, un
>>  {
>>       struct die_args *args = data;
>>
>> -     if (val != DIE_NMI)
>> +     if (val != DIE_NMIUNKNOWN)

All watchdogs use DIE_NMIUNKNOWN in this patch. Because they should be
processed after CPU specific and non-CPU-specific NMIs. Or we define a
special DIE_NMI_XX for it? like DIE_NMI_WATCHDOG?

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-27 10:09   ` Robert Richter
@ 2010-09-27 12:47     ` huang ying
  2010-09-27 13:38       ` Robert Richter
  0 siblings, 1 reply; 64+ messages in thread
From: huang ying @ 2010-09-27 12:47 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Mon, Sep 27, 2010 at 6:09 PM, Robert Richter <robert.richter@amd.com> wrote:
> On 26.09.10 20:57:05, Huang Ying wrote:
>> On some platforms, fatal hardware error may be notified via unknown
>> NMI.
>>
>> For example, on some platform with APEI firmware first mode support,
>> firmware generates NMI for fatal error but without error record. The
>> unknown NMI should be treated as notification of fatal hardware
>> error. The unknown_nmi_for_hwerr is added for these platform, if it is
>> not zero, system will treat unknown NMI as notification of fatal
>> hardware error.
>>
>> These platforms are identified via the presentation of APEI HEST or
>> some PCI ID of the host bridge. The PCI ID of host bridge instead of
>> DMI ID is used, so that the checking can be done based on the platform
>> type instead of motherboard. This should be simpler and sufficient.
>>
>> The method to identify the platforms is designed by Andi Kleen.
>>
>> Signed-off-by: Huang Ying <ying.huang@intel.com>
>> ---
>>  arch/x86/include/asm/nmi.h |    1
>>  arch/x86/kernel/Makefile   |    2 +
>>  arch/x86/kernel/hwerr.c    |   55 +++++++++++++++++++++++++++++++++++++++++++++
>
> Instead of creating this file the code should be implemented in
>
>  arch/x86/kernel/cpu/intel.c
>
> Similar AMD NB code is implemented in amd.c and k8.c.

Why? This file is not vendor specific.

>>  arch/x86/kernel/traps.c    |   10 ++++++++
>>  drivers/acpi/apei/hest.c   |    8 ++++++
>>  5 files changed, 76 insertions(+)
>>  create mode 100644 arch/x86/kernel/hwerr.c
>>
>> --- a/arch/x86/include/asm/nmi.h
>> +++ b/arch/x86/include/asm/nmi.h
>> @@ -44,6 +44,7 @@ struct ctl_table;
>>  extern int proc_nmi_enabled(struct ctl_table *, int ,
>>                       void __user *, size_t *, loff_t *);
>>  extern int unknown_nmi_panic;
>> +extern int unknown_nmi_for_hwerr;
>>
>>  void arch_trigger_all_cpu_backtrace(void);
>>  #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
>> --- a/arch/x86/kernel/Makefile
>> +++ b/arch/x86/kernel/Makefile
>> @@ -118,6 +118,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION)
>>
>>  obj-$(CONFIG_SWIOTLB)                        += pci-swiotlb.o
>>
>> +obj-y                                        += hwerr.o
>> +
>>  ###
>>  # 64 bit specific files
>>  ifeq ($(CONFIG_X86_64),y)
>> --- /dev/null
>> +++ b/arch/x86/kernel/hwerr.c
>> @@ -0,0 +1,55 @@
>> +/*
>> + * Hardware error architecture dependent processing
>> + *
>> + * Copyright 2010 Intel Corp.
>> + *   Author: Huang Ying <ying.huang@intel.com>
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License version
>> + * 2 as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/pci.h>
>> +#include <linux/init.h>
>> +#include <linux/nmi.h>
>> +
>> +/*
>> + * On some platform, hardware errors may be notified via unknown
>> + * NMI. These platform is identified via the PCI ID of host bridge.
>> + *
>> + * The PCI ID of host bridge instead of DMI ID is used, so that the
>> + * checking can be done based on the platform instead of motherboard.
>> + * This should be simpler and sufficient.
>> + */
>> +static const
>> +struct pci_device_id unknown_nmi_for_hwerr_platform[] __initdata = {
>> +     { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3406) },
>> +     { 0, }
>> +};
>> +
>> +int __init check_unknown_nmi_for_hwerr(void)
>> +{
>> +     struct pci_dev *dev = NULL;
>> +
>> +     for_each_pci_dev(dev) {
>> +             if (pci_match_id(unknown_nmi_for_hwerr_platform, dev)) {
>> +                     pr_info(
>> +"Host bridge is identified, will treat unknown NMI as hardware error!\n");
>> +                     unknown_nmi_for_hwerr = 1;
>> +                     break;
>> +             }
>> +     }
>> +
>> +     return 0;
>> +}
>> +late_initcall(check_unknown_nmi_for_hwerr);
>
> Maybe you can use early pci functions like read_pci_config() to avoid
> late init.

I don't think late init is a big issue. Hardware error is rare after all.

>> --- a/arch/x86/kernel/traps.c
>> +++ b/arch/x86/kernel/traps.c
>> @@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
>>
>>  static int ignore_nmis;
>>
>> +int unknown_nmi_for_hwerr;
>
> If it is an nmi for hwerr, it is no longer an unknown nmi. So we
> should drop 'unknow' in the naming.

I think unkown NMI is the one we can not identify the source.
Something like anonymous.

>> +
>>  /*
>>   * Prevent NMI reason port (0x61) being accessed simultaneously, can
>>   * only be used in NMI handler.
>> @@ -360,6 +362,14 @@ io_check_error(unsigned char reason, str
>>  static notrace __kprobes void
>>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
>>  {
>> +     /*
>> +      * On some platforms, hardware errors may be notified via
>> +      * unknown NMI
>> +      */
>> +     if (unknown_nmi_for_hwerr)
>> +             panic("NMI for hardware error without error record: "
>> +                   "Not continuing");
>> +
>
> Instead of checking this flag you should implement and register an nmi
> handler for this case.

I think explicit function calls have better readability than notifier chains.

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-27 10:44   ` Robert Richter
@ 2010-09-27 12:56     ` huang ying
  2010-09-27 13:43       ` Robert Richter
  0 siblings, 1 reply; 64+ messages in thread
From: huang ying @ 2010-09-27 12:56 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Mon, Sep 27, 2010 at 6:44 PM, Robert Richter <robert.richter@amd.com> wrote:
> On 26.09.10 20:57:06, Huang Ying wrote:
>> do_nmi_callback related logic is removed, because it is useless, just
>> adds code complexity.
>>
>> unknown_nmi_panic sysctl is reserved to keep kernel ABI unchanged.
>
> "unknown_nmi_panic" and "panic_on_unrecovered_nmi" are different. See
> below and also Documentation/sysctl/kernel.txt.
>
>>
>> Signed-off-by: Huang Ying <ying.huang@intel.com>
>> ---
>>  arch/x86/include/asm/nmi.h    |   10 +++++++++-
>>  arch/x86/kernel/apic/hw_nmi.c |    1 -
>>  arch/x86/kernel/apic/nmi.c    |   29 +----------------------------
>>  arch/x86/kernel/traps.c       |   16 ++++++++++------
>>  4 files changed, 20 insertions(+), 36 deletions(-)
>>
>> --- a/arch/x86/include/asm/nmi.h
>> +++ b/arch/x86/include/asm/nmi.h
>> @@ -30,9 +30,17 @@ extern void setup_apic_nmi_watchdog(void
>>  extern void stop_apic_nmi_watchdog(void *);
>>  extern void disable_timer_nmi_watchdog(void);
>>  extern void enable_timer_nmi_watchdog(void);
>> -extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
>>  extern void cpu_nmi_set_wd_enabled(void);
>>
>> +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
>> +extern int nmi_watchdog_tick(struct pt_regs *regs);
>> +#else
>> +static inline int nmi_watchdog_tick(struct pt_regs *regs)
>> +{
>> +     return 0;
>> +}
>> +#endif
>> +
>>  extern atomic_t nmi_active;
>>  extern unsigned int nmi_watchdog;
>>  #define NMI_NONE     0
>> --- a/arch/x86/kernel/apic/hw_nmi.c
>> +++ b/arch/x86/kernel/apic/hw_nmi.c
>> @@ -100,7 +100,6 @@ void acpi_nmi_disable(void) { return; }
>>  #endif
>>  atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
>>  EXPORT_SYMBOL(nmi_active);
>> -int unknown_nmi_panic;
>>  void cpu_nmi_set_wd_enabled(void) { return; }
>>  void stop_apic_nmi_watchdog(void *unused) { return; }
>>  void setup_apic_nmi_watchdog(void *unused) { return; }
>> --- a/arch/x86/kernel/apic/nmi.c
>> +++ b/arch/x86/kernel/apic/nmi.c
>> @@ -37,7 +37,6 @@
>>
>>  #include <asm/mach_traps.h>
>>
>> -int unknown_nmi_panic;
>>  int nmi_watchdog_enabled;
>>
>>  /* For reliability, we're prepared to waste bits here. */
>> @@ -389,7 +388,7 @@ void touch_nmi_watchdog(void)
>>  EXPORT_SYMBOL(touch_nmi_watchdog);
>>
>>  notrace __kprobes int
>> -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
>> +nmi_watchdog_tick(struct pt_regs *regs)
>>  {
>>       /*
>>        * Since current_thread_info()-> is always on the stack, and we
>> @@ -483,23 +482,6 @@ static void disable_ioapic_nmi_watchdog(
>>       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
>>  }
>>
>> -static int __init setup_unknown_nmi_panic(char *str)
>> -{
>> -     unknown_nmi_panic = 1;
>> -     return 1;
>> -}
>> -__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
>> -
>> -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
>> -{
>> -     unsigned char reason = get_nmi_reason();
>> -     char buf[64];
>> -
>> -     sprintf(buf, "NMI received for unknown reason %02x\n", reason);
>> -     die_nmi(buf, regs, 1); /* Always panic here */
>> -     return 0;
>
> You are dropping this code that is different to panic().

What is the difference? Is it relevant?

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-27 12:39     ` huang ying
@ 2010-09-27 13:25       ` Robert Richter
  2010-09-27 15:29         ` Don Zickus
  2010-09-28  1:03         ` Huang Ying
  0 siblings, 2 replies; 64+ messages in thread
From: Robert Richter @ 2010-09-27 13:25 UTC (permalink / raw)
  To: huang ying
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 27.09.10 08:39:24, huang ying wrote:

Looking at all you comments below I would vote for the following:

We implement all handlers using DIE_NMI and set its priority
accordingly in struct notifier_block when registering the the nmi
handler. We define NMI priorities as macros such as
NMI_PRIORITY_LOCAL, NMI_PRIORITY_WATCHDOG, NMI_PRIORITY_IO, etc. and
require all handlers to set the priority. register_die_notifier() with
(!nb->priority) should return -EINVAL. DIE_NMI_UNKNOWN should only be
used if there is a handler for the case when all others fail such as
implemented in the perf nmi handler or when reporting an unknown nmi.

This will avoid all the confusion below and also makes the code much
cleaner.

-Robert

> Hi, Robert,
> 
> On Mon, Sep 27, 2010 at 5:41 PM, Robert Richter <robert.richter@amd.com> wrote:
> > On 26.09.10 20:57:03, Huang Ying wrote:
> >> The original NMI handler is quite outdated in many aspects. This patch
> >> try to fix it.
> >>
> >> The order to process the NMI sources are changed as follow:
> >>
> >> notify_die(DIE_NMI_IPI);
> >> notify_die(DIE_NMI);
> >> /* process io port 0x61 */
> >> nmi_watchdog_touch();
> >> notify_die(DIE_NMIUNKNOWN);
> >> unknown_nmi();
> >>
> >> DIE_NMI_IPI is used to process CPU specific NMI sources, such as perf
> >> event, oprofile, crash IPI, etc. While DIE_NMI is used to process
> >> non-CPU-specific NMI sources, such as APEI (ACPI Platform Error
> >> Interface) GHES (Generic Hardware Error Source), etc. Non-CPU-specific
> >> NMI sources can be processed on any CPU,
> >>
> >> DIE_NMI_IPI must be processed before DIE_NMI. For example, perf event
> >> trigger a NMI on CPU 1, at the same time, APEI GHES trigger another
> >> NMI on CPU 0. If DIE_NMI is processed before DIE_NMI_IPI, it is
> >> possible that APEI GHES is processed on CPU 1, while unknown NMI is
> >> gotten on CPU 0.
> >
> > I think macro names DIE_NMI_IPI and DIE_NMI should be swapped as
> > e.g. the perf nmi is actually local and non-IPI.
> 
> DIE_NMI_IPI may be not a good name for perf, but DIE_NMI is a even
> worse name for perf! DIE_NMI is originally used for IOCHK and PCI SERR
> NMI.
> 
> > We might consider to rework the IPI thing completly, but may be in a
> > follow-on patch.
> >
> >>
> >> In this new order of processing, performance sensitive NMI sources
> >> such as oprofile or perf event will have better performance because
> >> the time consuming IO port reading is done after them.
> >>
> >> Only one NMI is eaten for each NMI handler call, even for PCI SERR and
> >> IOCHK NMIs. Because one NMI should be raised for each of them, eating
> >> too many NMI will cause unnecessary unknown NMI.
> >>
> >> The die value used in NMI sources are fixed accordingly.
> >>
> >> The NMI handler in the patch is designed by Andi Kleen.
> >>
> >>
> >> v2:
> >>
> >> - Split process NMI reason (0x61) on non-BSP into another patch
> >>
> >> Signed-off-by: Huang Ying <ying.huang@intel.com>
> >> ---
> >>  arch/x86/kernel/cpu/perf_event.c  |    1
> >>  arch/x86/kernel/traps.c           |   80 +++++++++++++++++++-------------------
> >>  arch/x86/oprofile/nmi_int.c       |    1
> >>  arch/x86/oprofile/nmi_timer_int.c |    2
> >>  drivers/char/ipmi/ipmi_watchdog.c |    2
> >>  drivers/watchdog/hpwdt.c          |    2
> >>  6 files changed, 43 insertions(+), 45 deletions(-)
> >>
> >> --- a/arch/x86/kernel/cpu/perf_event.c
> >> +++ b/arch/x86/kernel/cpu/perf_event.c
> >> @@ -1247,7 +1247,6 @@ perf_event_nmi_handler(struct notifier_b
> >>               return NOTIFY_DONE;
> >>
> >>       switch (cmd) {
> >> -     case DIE_NMI:
> >>       case DIE_NMI_IPI:
> >
> > See my comment above. Same is true for oprofile and some other
> > handlers below. It isn't an IPI and should be case DIE_NMI: instead.
> >
> >>               break;
> >>       case DIE_NMIUNKNOWN:
> >> --- a/arch/x86/kernel/traps.c
> >> +++ b/arch/x86/kernel/traps.c
> >> @@ -354,9 +354,6 @@ io_check_error(unsigned char reason, str
> >>  static notrace __kprobes void
> >>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
> >>  {
> >> -     if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
> >> -                     NOTIFY_STOP)
> >> -             return;
> >>  #ifdef CONFIG_MCA
> >>       /*
> >>        * Might actually be able to figure out what the guilty party
> >> @@ -385,51 +382,54 @@ static notrace __kprobes void default_do
> >>
> >>       cpu = smp_processor_id();
> >
> > This should go to if (!cpu) and maybe we drop variable cpu completly.
> 
> The variable cpu is dropped in 5/7.
> 
> >>
> >> -     /* Only the BSP gets external NMIs from the system. */
> >> -     if (!cpu)
> >> -             reason = get_nmi_reason();
> >> +     /*
> >> +      * CPU-specific NMI must be processed before non-CPU-specific
> >> +      * NMI, otherwise we may lose it, because the CPU-specific
> >> +      * NMI can not be detected/processed on other CPUs.
> >> +      */
> >>
> >> -     if (!(reason & NMI_REASON_MASK)) {
> >> -             if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
> >> -                                                             == NOTIFY_STOP)
> >> -                     return;
> >> +     /*
> >> +      * CPU-specific NMI: send to specific CPU or NMI sources must
> >> +      * be processed on specific CPU
> >> +      */
> >> +     if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
> >> +         == NOTIFY_STOP)
> >> +             return;
> >>
> >> -#ifdef CONFIG_X86_LOCAL_APIC
> >
> > Are you sure we may drop this option?
> 
> Yes. DIE_NMI is used for non-CPU-specific NMI sources now.
> 
> >> -             if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
> >> -                                                     == NOTIFY_STOP)
> >> -                     return;
> >> +     /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
> >> +     if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> >> +             return;
> >
> > As said, IPI and non-IPI are mixed up.
> 
> They are processed one after the other.
> 
> >>
> >> -#ifndef CONFIG_LOCKUP_DETECTOR
> >> -             /*
> >> -              * Ok, so this is none of the documented NMI sources,
> >> -              * so it must be the NMI watchdog.
> >> -              */
> >> -             if (nmi_watchdog_tick(regs, reason))
> >> -                     return;
> >> -             if (!do_nmi_callback(regs, cpu))
> >> -#endif /* !CONFIG_LOCKUP_DETECTOR */
> >> -                     unknown_nmi_error(reason, regs);
> >> -#else
> >> -             unknown_nmi_error(reason, regs);
> >> +     if (!cpu) {
> >> +             reason = get_nmi_reason();
> >> +             if (reason & NMI_REASON_MASK) {
> >> +                     if (reason & NMI_REASON_SERR)
> >> +                             pci_serr_error(reason, regs);
> >> +                     else if (reason & NMI_REASON_IOCHK)
> >> +                             io_check_error(reason, regs);
> >> +#ifdef CONFIG_X86_32
> >> +                     /*
> >> +                      * Reassert NMI in case it became active
> >> +                      * meanwhile as it's edge-triggered:
> >> +                      */
> >> +                     reassert_nmi();
> >>  #endif
> >> +                     return;
> >> +             }
> >> +     }
> >>
> >> +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> >> +     if (nmi_watchdog_tick(regs, reason))
> >>               return;
> >> -     }
> >> -     if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
> >> +     if (do_nmi_callback(regs, smp_processor_id()))
> >>               return;
> >> -
> >> -     /* AK: following checks seem to be broken on modern chipsets. FIXME */
> >> -     if (reason & NMI_REASON_SERR)
> >> -             pci_serr_error(reason, regs);
> >> -     if (reason & NMI_REASON_IOCHK)
> >> -             io_check_error(reason, regs);
> >> -#ifdef CONFIG_X86_32
> >> -     /*
> >> -      * Reassert NMI in case it became active meanwhile
> >> -      * as it's edge-triggered:
> >> -      */
> >> -     reassert_nmi();
> >>  #endif
> >> +
> >> +     if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
> >> +         == NOTIFY_STOP)
> >> +             return;
> >> +
> >> +     unknown_nmi_error(reason, regs);
> >>  }
> >>
> >>  dotraplinkage notrace __kprobes void
> >> --- a/arch/x86/oprofile/nmi_int.c
> >> +++ b/arch/x86/oprofile/nmi_int.c
> >> @@ -64,7 +64,6 @@ static int profile_exceptions_notify(str
> >>       int ret = NOTIFY_DONE;
> >>
> >>       switch (val) {
> >> -     case DIE_NMI:
> >>       case DIE_NMI_IPI:
> >>               if (ctr_running)
> >>                       model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
> >> --- a/arch/x86/oprofile/nmi_timer_int.c
> >> +++ b/arch/x86/oprofile/nmi_timer_int.c
> >> @@ -25,7 +25,7 @@ static int profile_timer_exceptions_noti
> >>       int ret = NOTIFY_DONE;
> >>
> >>       switch (val) {
> >> -     case DIE_NMI:
> >> +     case DIE_NMI_IPI:
> >>               oprofile_add_sample(args->regs, 0);
> >>               ret = NOTIFY_STOP;
> >>               break;
> >> --- a/drivers/char/ipmi/ipmi_watchdog.c
> >> +++ b/drivers/char/ipmi/ipmi_watchdog.c
> >> @@ -1080,7 +1080,7 @@ ipmi_nmi(struct notifier_block *self, un
> >>  {
> >>       struct die_args *args = data;
> >>
> >> -     if (val != DIE_NMI)
> >> +     if (val != DIE_NMIUNKNOWN)
> 
> All watchdogs use DIE_NMIUNKNOWN in this patch. Because they should be
> processed after CPU specific and non-CPU-specific NMIs. Or we define a
> special DIE_NMI_XX for it? like DIE_NMI_WATCHDOG?
> 
> Best Regards,
> Huang Ying
> 

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-27 12:47     ` huang ying
@ 2010-09-27 13:38       ` Robert Richter
  2010-09-27 15:20         ` Don Zickus
  2010-09-28  1:19         ` Huang Ying
  0 siblings, 2 replies; 64+ messages in thread
From: Robert Richter @ 2010-09-27 13:38 UTC (permalink / raw)
  To: huang ying
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 27.09.10 08:47:53, huang ying wrote:

> >>  arch/x86/kernel/hwerr.c    |   55 +++++++++++++++++++++++++++++++++++++++++++++
> >
> > Instead of creating this file the code should be implemented in
> >
> >  arch/x86/kernel/cpu/intel.c
> >
> > Similar AMD NB code is implemented in amd.c and k8.c.
> 
> Why? This file is not vendor specific.

No, it only implements an Intel specific PCI device, nothing else.

> >> +late_initcall(check_unknown_nmi_for_hwerr);
> >
> > Maybe you can use early pci functions like read_pci_config() to avoid
> > late init.
> 
> I don't think late init is a big issue. Hardware error is rare after all.

Just want to let you know this as an option.

> >> --- a/arch/x86/kernel/traps.c
> >> +++ b/arch/x86/kernel/traps.c
> >> @@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
> >>
> >>  static int ignore_nmis;
> >>
> >> +int unknown_nmi_for_hwerr;
> >
> > If it is an nmi for hwerr, it is no longer an unknown nmi. So we
> > should drop 'unknow' in the naming.
> 
> I think unkown NMI is the one we can not identify the source.
> Something like anonymous.
> 
> >> +
> >>  /*
> >>   * Prevent NMI reason port (0x61) being accessed simultaneously, can
> >>   * only be used in NMI handler.
> >> @@ -360,6 +362,14 @@ io_check_error(unsigned char reason, str
> >>  static notrace __kprobes void
> >>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
> >>  {
> >> +     /*
> >> +      * On some platforms, hardware errors may be notified via
> >> +      * unknown NMI
> >> +      */
> >> +     if (unknown_nmi_for_hwerr)
> >> +             panic("NMI for hardware error without error record: "
> >> +                   "Not continuing");
> >> +
> >
> > Instead of checking this flag you should implement and register an nmi
> > handler for this case.
> 
> I think explicit function calls have better readability than notifier chains.

What is different to unknown_nmi() then?

So no, in your case you want to catch unknown nmis for a certain
hardware and then throw a panic. This should be clearly implemented in
a separate handler for this piece of hardware.

We want to cleanup this code and throw out all hardware specific
snippets, and not introduce new special cases here.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-27 12:56     ` huang ying
@ 2010-09-27 13:43       ` Robert Richter
  2010-09-27 15:16         ` Don Zickus
  0 siblings, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-27 13:43 UTC (permalink / raw)
  To: huang ying
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 27.09.10 08:56:44, huang ying wrote:

> >> -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
> >> -{
> >> -     unsigned char reason = get_nmi_reason();
> >> -     char buf[64];
> >> -
> >> -     sprintf(buf, "NMI received for unknown reason %02x\n", reason);
> >> -     die_nmi(buf, regs, 1); /* Always panic here */
> >> -     return 0;
> >
> > You are dropping this code that is different to panic().
> 
> What is the difference? Is it relevant?

I think yes, since the code behaves different. Otherwise we could
remove die_nmi() completly and replace it by panic(). But both are
different implementions. Maybe we can merge the code, but I didn't
look at it closly.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-27 13:43       ` Robert Richter
@ 2010-09-27 15:16         ` Don Zickus
  2010-09-27 16:58           ` Robert Richter
  2010-09-28  0:28           ` Huang Ying
  0 siblings, 2 replies; 64+ messages in thread
From: Don Zickus @ 2010-09-27 15:16 UTC (permalink / raw)
  To: Robert Richter
  Cc: huang ying, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Mon, Sep 27, 2010 at 03:43:41PM +0200, Robert Richter wrote:
> On 27.09.10 08:56:44, huang ying wrote:
> 
> > >> -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
> > >> -{
> > >> -     unsigned char reason = get_nmi_reason();
> > >> -     char buf[64];
> > >> -
> > >> -     sprintf(buf, "NMI received for unknown reason %02x\n", reason);
> > >> -     die_nmi(buf, regs, 1); /* Always panic here */
> > >> -     return 0;
> > >
> > > You are dropping this code that is different to panic().
> > 
> > What is the difference? Is it relevant?
> 
> I think yes, since the code behaves different. Otherwise we could
> remove die_nmi() completly and replace it by panic(). But both are
> different implementions. Maybe we can merge the code, but I didn't
> look at it closly.

Actually die_nmi is a wrapper around panic with two important pieces.
One, it dumps some registers and two it does another notifier call to
DIE_NMIWATCHDOG (which correlates to another discussion in this patch
series).

So if we do any consolidation between panic and die_nmi, it should be
convert to die_nmi.  But then I wonder if that breaks the original
semantics of 'panic_on_unrecovered_nmi'.  I don't think so though.

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-27 13:38       ` Robert Richter
@ 2010-09-27 15:20         ` Don Zickus
  2010-09-28  0:36           ` Huang Ying
  2010-09-28  1:19         ` Huang Ying
  1 sibling, 1 reply; 64+ messages in thread
From: Don Zickus @ 2010-09-27 15:20 UTC (permalink / raw)
  To: Robert Richter
  Cc: huang ying, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Mon, Sep 27, 2010 at 03:38:16PM +0200, Robert Richter wrote:
> On 27.09.10 08:47:53, huang ying wrote:
> 
> > I think explicit function calls have better readability than notifier chains.
> 
> What is different to unknown_nmi() then?
> 
> So no, in your case you want to catch unknown nmis for a certain
> hardware and then throw a panic. This should be clearly implemented in
> a separate handler for this piece of hardware.
> 
> We want to cleanup this code and throw out all hardware specific
> snippets, and not introduce new special cases here.

I tend to agree with Robert here.  I don't know if there were any 'rules'
to which handlers get directly called versus ones that go through the
die_chain, so I was originally going to let it go.  But if they aren't
any, it does look cleaner to have everything in die_chains.

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-27 13:25       ` Robert Richter
@ 2010-09-27 15:29         ` Don Zickus
  2010-09-27 17:40           ` Robert Richter
  2010-09-28  1:03         ` Huang Ying
  1 sibling, 1 reply; 64+ messages in thread
From: Don Zickus @ 2010-09-27 15:29 UTC (permalink / raw)
  To: Robert Richter
  Cc: huang ying, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Mon, Sep 27, 2010 at 03:25:38PM +0200, Robert Richter wrote:
> On 27.09.10 08:39:24, huang ying wrote:
> 
> Looking at all you comments below I would vote for the following:
> 
> We implement all handlers using DIE_NMI and set its priority
> accordingly in struct notifier_block when registering the the nmi
> handler. We define NMI priorities as macros such as
> NMI_PRIORITY_LOCAL, NMI_PRIORITY_WATCHDOG, NMI_PRIORITY_IO, etc. and
> require all handlers to set the priority. register_die_notifier() with
> (!nb->priority) should return -EINVAL. DIE_NMI_UNKNOWN should only be
> used if there is a handler for the case when all others fail such as
> implemented in the perf nmi handler or when reporting an unknown nmi.
> 
> This will avoid all the confusion below and also makes the code much
> cleaner.

This could be interesting and certainly simplify things (processing the
die_chain once, instead of 4 times I think).  But I would probably
recommend we do this as another patch on top of Huang's to layer the
changes in a way that we can easily bisect where things went wrong if
NMIs start mis-behaving.

I don't think any of the handlers really used the priority, except for
perf, which tried to be on the bottom of the list.  So assigning
priorities like this may work.

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants
  2010-09-27 10:50 ` [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Robert Richter
@ 2010-09-27 15:29   ` Don Zickus
  0 siblings, 0 replies; 64+ messages in thread
From: Don Zickus @ 2010-09-27 15:29 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Mon, Sep 27, 2010 at 12:50:15PM +0200, Robert Richter wrote:
> On 26.09.10 20:57:00, Huang Ying wrote:
> > Replace the NMI related magic numbers with symbol constants.
> > 
> > Signed-off-by: Huang Ying <ying.huang@intel.com>
> 
> Reviewed-by: Robert Richter <robert.richter@amd.com>
> 
> See my comments to patches 3, 4, 6, 7.

Thanks!

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-27  9:00       ` Robert Richter
@ 2010-09-27 15:33         ` Don Zickus
  2010-09-27 16:45           ` Robert Richter
  2010-09-28  1:22           ` Huang Ying
  0 siblings, 2 replies; 64+ messages in thread
From: Don Zickus @ 2010-09-27 15:33 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Mon, Sep 27, 2010 at 11:00:56AM +0200, Robert Richter wrote:
> On 27.09.10 04:39:20, Huang Ying wrote:
> > > I already commented on this, patch #1 and #3 are basically the same in
> > > most parts which should be merged. What remains then in this patch is
> > > the modified printk() and the comment. Both could be added to #1 too
> > > which is then some sort of code cleanup patch.
> > 
> > Don thinks it is Ok to keep 2 patches.
> 
> I don't like reviewing new changes which are thrown away with the next
> patch. I review things twice and it is much harder to see what really
> changed then. Also we should have a clean history.

I didn't care either way.  But if it makes it easier to review, it's nice
to keep reviewers happy too. :-)

Hunag, I think there is going to be a V3 of this series, could you just
combine these patches then?

> 
> And with git it is fairly easy to join patches.
> 
> > 
> > > >  #define NMI_REASON_CLEAR_IOCHK	0x08
> > > >  #define NMI_REASON_CLEAR_MASK	0x0f
> > > >  
> > > > --- a/arch/x86/kernel/traps.c
> > > > +++ b/arch/x86/kernel/traps.c
> > > > @@ -301,15 +301,14 @@ gp_in_kernel:
> > > >  }
> > > >  
> > > >  static notrace __kprobes void
> > > > -mem_parity_error(unsigned char reason, struct pt_regs *regs)
> > > > +pci_serr_error(unsigned char reason, struct pt_regs *regs)
> > > >  {
> > > > -	printk(KERN_EMERG
> > > > -		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
> > > > -			reason, smp_processor_id());
> > > > -
> > > > -	printk(KERN_EMERG
> > > > -		"You have some hardware problem, likely on the PCI bus.\n");
> > > > +	printk(KERN_EMERG "NMI: PCI system error (SERR).\n");
> > > 
> > > You should keep reporting the cpu id to identify the affected node and
> > > also the reason.
> > 
> > Ok. I will add CPU ID in message. Because we know the reason, I don't
> > think we need the reason in message.
> 
> You only know that bit 7 is set, not the rest. As this is an error
> message we should provide as much information as possible.

Well, what other info do we know besides that bit being set?  (I wish we
had more, but I don't think we do)

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-27  0:57 ` [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI Huang Ying
  2010-09-27 10:09   ` Robert Richter
@ 2010-09-27 15:38   ` Don Zickus
  2010-09-28  1:54     ` Huang Ying
  1 sibling, 1 reply; 64+ messages in thread
From: Don Zickus @ 2010-09-27 15:38 UTC (permalink / raw)
  To: Huang Ying; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

Hi Huang,

On Mon, Sep 27, 2010 at 08:57:05AM +0800, Huang Ying wrote:
> @@ -360,6 +362,14 @@ io_check_error(unsigned char reason, str
>  static notrace __kprobes void
>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
>  {
> +	/*
> +	 * On some platforms, hardware errors may be notified via
> +	 * unknown NMI
> +	 */
> +	if (unknown_nmi_for_hwerr)
> +		panic("NMI for hardware error without error record: "
> +		      "Not continuing");

Can you modify this to let users know to check the BIOS/BMC for further
messages?  Otherwise people just file bugzillas asking what the problem
is, and the first thing we have to respond with is check the BIOS/BMC.

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-27 15:33         ` Don Zickus
@ 2010-09-27 16:45           ` Robert Richter
  2010-09-27 17:50             ` Don Zickus
  2010-09-28  1:33             ` Huang Ying
  2010-09-28  1:22           ` Huang Ying
  1 sibling, 2 replies; 64+ messages in thread
From: Robert Richter @ 2010-09-27 16:45 UTC (permalink / raw)
  To: Don Zickus
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 27.09.10 11:33:15, Don Zickus wrote:
> On Mon, Sep 27, 2010 at 11:00:56AM +0200, Robert Richter wrote:
> > On 27.09.10 04:39:20, Huang Ying wrote:
> > > > I already commented on this, patch #1 and #3 are basically the same in
> > > > most parts which should be merged. What remains then in this patch is
> > > > the modified printk() and the comment. Both could be added to #1 too
> > > > which is then some sort of code cleanup patch.
> > > 
> > > Don thinks it is Ok to keep 2 patches.
> > 
> > I don't like reviewing new changes which are thrown away with the next
> > patch. I review things twice and it is much harder to see what really
> > changed then. Also we should have a clean history.
> 
> I didn't care either way.  But if it makes it easier to review, it's nice
> to keep reviewers happy too. :-)

Yes, thanks, this makes me happy. :)

> 
> Hunag, I think there is going to be a V3 of this series, could you just
> combine these patches then?
> 
> > 
> > And with git it is fairly easy to join patches.
> > 
> > > 
> > > > >  #define NMI_REASON_CLEAR_IOCHK	0x08
> > > > >  #define NMI_REASON_CLEAR_MASK	0x0f
> > > > >  
> > > > > --- a/arch/x86/kernel/traps.c
> > > > > +++ b/arch/x86/kernel/traps.c
> > > > > @@ -301,15 +301,14 @@ gp_in_kernel:
> > > > >  }
> > > > >  
> > > > >  static notrace __kprobes void
> > > > > -mem_parity_error(unsigned char reason, struct pt_regs *regs)
> > > > > +pci_serr_error(unsigned char reason, struct pt_regs *regs)
> > > > >  {
> > > > > -	printk(KERN_EMERG
> > > > > -		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
> > > > > -			reason, smp_processor_id());
> > > > > -
> > > > > -	printk(KERN_EMERG
> > > > > -		"You have some hardware problem, likely on the PCI bus.\n");
> > > > > +	printk(KERN_EMERG "NMI: PCI system error (SERR).\n");
> > > > 
> > > > You should keep reporting the cpu id to identify the affected node and
> > > > also the reason.
> > > 
> > > Ok. I will add CPU ID in message. Because we know the reason, I don't
> > > think we need the reason in message.
> > 
> > You only know that bit 7 is set, not the rest. As this is an error
> > message we should provide as much information as possible.
> 
> Well, what other info do we know besides that bit being set?  (I wish we
> had more, but I don't think we do)

We should keep printing the reason byte as it did before.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-27 15:16         ` Don Zickus
@ 2010-09-27 16:58           ` Robert Richter
  2010-09-28  1:41             ` Huang Ying
  2010-09-28  0:28           ` Huang Ying
  1 sibling, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-27 16:58 UTC (permalink / raw)
  To: Don Zickus
  Cc: huang ying, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 27.09.10 11:16:07, Don Zickus wrote:
> On Mon, Sep 27, 2010 at 03:43:41PM +0200, Robert Richter wrote:
> > On 27.09.10 08:56:44, huang ying wrote:
> > 
> > > >> -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
> > > >> -{
> > > >> -     unsigned char reason = get_nmi_reason();
> > > >> -     char buf[64];
> > > >> -
> > > >> -     sprintf(buf, "NMI received for unknown reason %02x\n", reason);
> > > >> -     die_nmi(buf, regs, 1); /* Always panic here */
> > > >> -     return 0;
> > > >
> > > > You are dropping this code that is different to panic().
> > > 
> > > What is the difference? Is it relevant?
> > 
> > I think yes, since the code behaves different. Otherwise we could
> > remove die_nmi() completly and replace it by panic(). But both are
> > different implementions. Maybe we can merge the code, but I didn't
> > look at it closly.
> 
> Actually die_nmi is a wrapper around panic with two important pieces.
> One, it dumps some registers and two it does another notifier call to
> DIE_NMIWATCHDOG (which correlates to another discussion in this patch
> series).
> 
> So if we do any consolidation between panic and die_nmi, it should be
> convert to die_nmi.  But then I wonder if that breaks the original
> semantics of 'panic_on_unrecovered_nmi'.  I don't think so though.

I agree, panic_on_unrecovered_nmi and unknown_nmi_panic almost do the
same thing, though die_nmi() is specifically for nmi handlers. In the
end we can consolidate both. We should then modify kernel.txt and
route unknown_nmi_panic to panic_on_unrecovered_nmi or vice versa.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-27 15:29         ` Don Zickus
@ 2010-09-27 17:40           ` Robert Richter
  2010-09-27 19:14             ` Don Zickus
  0 siblings, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-27 17:40 UTC (permalink / raw)
  To: Don Zickus
  Cc: huang ying, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 27.09.10 11:29:16, Don Zickus wrote:
> On Mon, Sep 27, 2010 at 03:25:38PM +0200, Robert Richter wrote:
> > On 27.09.10 08:39:24, huang ying wrote:
> > 
> > Looking at all you comments below I would vote for the following:
> > 
> > We implement all handlers using DIE_NMI and set its priority
> > accordingly in struct notifier_block when registering the the nmi
> > handler. We define NMI priorities as macros such as
> > NMI_PRIORITY_LOCAL, NMI_PRIORITY_WATCHDOG, NMI_PRIORITY_IO, etc. and
> > require all handlers to set the priority. register_die_notifier() with
> > (!nb->priority) should return -EINVAL. DIE_NMI_UNKNOWN should only be
> > used if there is a handler for the case when all others fail such as
> > implemented in the perf nmi handler or when reporting an unknown nmi.
> > 
> > This will avoid all the confusion below and also makes the code much
> > cleaner.
> 
> This could be interesting and certainly simplify things (processing the
> die_chain once, instead of 4 times I think).  But I would probably
> recommend we do this as another patch on top of Huang's to layer the
> changes in a way that we can easily bisect where things went wrong if
> NMIs start mis-behaving.
> 
> I don't think any of the handlers really used the priority, except for
> perf, which tried to be on the bottom of the list.  So assigning
> priorities like this may work.


Yes, we should do this in small steps, but also avoid to introduce new
intermediate code. So maybe we keep most changes of this patch, but
also remove DIE_NMI_IPI at all and add priorities to the affected
handlers? Later we patch the rest.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-27 16:45           ` Robert Richter
@ 2010-09-27 17:50             ` Don Zickus
  2010-09-28  1:33             ` Huang Ying
  1 sibling, 0 replies; 64+ messages in thread
From: Don Zickus @ 2010-09-27 17:50 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Mon, Sep 27, 2010 at 06:45:02PM +0200, Robert Richter wrote:
> On 27.09.10 11:33:15, Don Zickus wrote:
> > Well, what other info do we know besides that bit being set?  (I wish we
> > had more, but I don't think we do)
> 
> We should keep printing the reason byte as it did before.

oh right.  Fair enough.  Though in my experience, it always confuses
people.  They think they are getting different failures (0xa2 vs 0xb2 for
example).

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-27 17:40           ` Robert Richter
@ 2010-09-27 19:14             ` Don Zickus
  2010-09-27 22:35               ` Robert Richter
  0 siblings, 1 reply; 64+ messages in thread
From: Don Zickus @ 2010-09-27 19:14 UTC (permalink / raw)
  To: Robert Richter
  Cc: huang ying, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Mon, Sep 27, 2010 at 07:40:57PM +0200, Robert Richter wrote:
> On 27.09.10 11:29:16, Don Zickus wrote:
> > On Mon, Sep 27, 2010 at 03:25:38PM +0200, Robert Richter wrote:
> > > On 27.09.10 08:39:24, huang ying wrote:
> > > 
> > > Looking at all you comments below I would vote for the following:
> > > 
> > > We implement all handlers using DIE_NMI and set its priority
> > > accordingly in struct notifier_block when registering the the nmi
> > > handler. We define NMI priorities as macros such as
> > > NMI_PRIORITY_LOCAL, NMI_PRIORITY_WATCHDOG, NMI_PRIORITY_IO, etc. and
> > > require all handlers to set the priority. register_die_notifier() with
> > > (!nb->priority) should return -EINVAL. DIE_NMI_UNKNOWN should only be
> > > used if there is a handler for the case when all others fail such as
> > > implemented in the perf nmi handler or when reporting an unknown nmi.
> > > 
> > > This will avoid all the confusion below and also makes the code much
> > > cleaner.
> > 
> > This could be interesting and certainly simplify things (processing the
> > die_chain once, instead of 4 times I think).  But I would probably
> > recommend we do this as another patch on top of Huang's to layer the
> > changes in a way that we can easily bisect where things went wrong if
> > NMIs start mis-behaving.
> > 
> > I don't think any of the handlers really used the priority, except for
> > perf, which tried to be on the bottom of the list.  So assigning
> > priorities like this may work.
> 
> 
> Yes, we should do this in small steps, but also avoid to introduce new
> intermediate code. So maybe we keep most changes of this patch, but
> also remove DIE_NMI_IPI at all and add priorities to the affected
> handlers? Later we patch the rest.


Actually, looking through the handlers, by introducing priorities means
that people have to register multiple handlers to deal with the different
priorities.

For example, perf would need two handlers, one for DIE_NMI and one for
DIE_NMIUNKOWN.  I am not sure that is the way to go.

I would be inclined to leave the patch as is until we can come up with a
better way to handle the priorities.

Though I agree that DIE_NMI_IPI isn't a great name (doesn't it all go
through the local apic?), it isn't being introduced with this patch.

Thoughts?

Cheers,
Don


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-27 19:14             ` Don Zickus
@ 2010-09-27 22:35               ` Robert Richter
  0 siblings, 0 replies; 64+ messages in thread
From: Robert Richter @ 2010-09-27 22:35 UTC (permalink / raw)
  To: Don Zickus
  Cc: huang ying, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 27.09.10 15:14:33, Don Zickus wrote:
> Actually, looking through the handlers, by introducing priorities means
> that people have to register multiple handlers to deal with the different
> priorities.
> 
> For example, perf would need two handlers, one for DIE_NMI and one for
> DIE_NMIUNKOWN.  I am not sure that is the way to go.

Ok, this could be a problem for handlers dealing with both DIE_NMI and
DIE_NMI_UNKNOW. But without priorities as it is implemented now, the
handlers are called in registration order or even without any order.
So I don't think we need this fine grained priorities for different
cases. If we have priorities, the order would be always the same for
all chains, which should be fine for most cases.

> I would be inclined to leave the patch as is until we can come up with a
> better way to handle the priorities.
> 
> Though I agree that DIE_NMI_IPI isn't a great name (doesn't it all go
> through the local apic?), it isn't being introduced with this patch.
> 
> Thoughts?

At least we should give the above a try. If it turns out it is not as
easy as I think, the still can fall back. But if we could finally get
rid of DIE_NMI_IPI, this would eas much.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-27 15:16         ` Don Zickus
  2010-09-27 16:58           ` Robert Richter
@ 2010-09-28  0:28           ` Huang Ying
  2010-09-28 15:19             ` Don Zickus
  1 sibling, 1 reply; 64+ messages in thread
From: Huang Ying @ 2010-09-28  0:28 UTC (permalink / raw)
  To: Don Zickus
  Cc: Robert Richter, huang ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

Hi, Don,

On Mon, 2010-09-27 at 23:16 +0800, Don Zickus wrote:
> On Mon, Sep 27, 2010 at 03:43:41PM +0200, Robert Richter wrote:
> > On 27.09.10 08:56:44, huang ying wrote:
> > 
> > > >> -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
> > > >> -{
> > > >> -     unsigned char reason = get_nmi_reason();
> > > >> -     char buf[64];
> > > >> -
> > > >> -     sprintf(buf, "NMI received for unknown reason %02x\n", reason);
> > > >> -     die_nmi(buf, regs, 1); /* Always panic here */
> > > >> -     return 0;
> > > >
> > > > You are dropping this code that is different to panic().
> > > 
> > > What is the difference? Is it relevant?
> > 
> > I think yes, since the code behaves different. Otherwise we could
> > remove die_nmi() completly and replace it by panic(). But both are
> > different implementions. Maybe we can merge the code, but I didn't
> > look at it closly.
> 
> Actually die_nmi is a wrapper around panic with two important pieces.
> One, it dumps some registers and two it does another notifier call to
> DIE_NMIWATCHDOG (which correlates to another discussion in this patch
> series).
> 
> So if we do any consolidation between panic and die_nmi, it should be
> convert to die_nmi.  But then I wonder if that breaks the original
> semantics of 'panic_on_unrecovered_nmi'.  I don't think so though.

Please take a look at the original code:


                if (nmi_watchdog_tick(regs, reason))
                        return;
                if (!do_nmi_callback(regs, cpu))
#endif /* !CONFIG_LOCKUP_DETECTOR */
                        unknown_nmi_error(reason, regs);
#else
                unknown_nmi_error(reason, regs);
#endif

If NMI comes from watchdog, nmi_watchdog_tick() will return 1. So
do_nmi_callback() is NOT for watchdog NMI, but for unknown NMI. Why do
we call DIE_NMIWATCHDOG for unknown NMI (NOT watchdog NMI)? die_nmi is
for watchdog, not unknown NMI.

So another issue is registers dumping. If it is necessary, we can dump
registers in unknown_nmi_error(). Although I think unknown NMI comes
from hardware instead of software, so it does not help much to dump
registers.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-27 15:20         ` Don Zickus
@ 2010-09-28  0:36           ` Huang Ying
  2010-09-28 15:32             ` Don Zickus
  0 siblings, 1 reply; 64+ messages in thread
From: Huang Ying @ 2010-09-28  0:36 UTC (permalink / raw)
  To: Don Zickus
  Cc: Robert Richter, huang ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Mon, 2010-09-27 at 23:20 +0800, Don Zickus wrote:
> On Mon, Sep 27, 2010 at 03:38:16PM +0200, Robert Richter wrote:
> > On 27.09.10 08:47:53, huang ying wrote:
> > 
> > > I think explicit function calls have better readability than notifier chains.
> > 
> > What is different to unknown_nmi() then?
> > 
> > So no, in your case you want to catch unknown nmis for a certain
> > hardware and then throw a panic. This should be clearly implemented in
> > a separate handler for this piece of hardware.
> > 
> > We want to cleanup this code and throw out all hardware specific
> > snippets, and not introduce new special cases here.
> 
> I tend to agree with Robert here.  I don't know if there were any 'rules'
> to which handlers get directly called versus ones that go through the
> die_chain, so I was originally going to let it go.  But if they aren't
> any, it does look cleaner to have everything in die_chains.

Personally, I think directly call has better readability than
notifier_chain in general. Notifier_chain is for:

- Call functions in module.
- Need to enable/disable (via register/unregister) at run time.
- Call functions from low layer to high layer.

Otherwise, notifier_chain should be avoid if possible. So I think it is
better to keep direct call as much as possible.

Best Regards,
Huang Ying


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-27 13:25       ` Robert Richter
  2010-09-27 15:29         ` Don Zickus
@ 2010-09-28  1:03         ` Huang Ying
  2010-09-28 14:59           ` Robert Richter
  1 sibling, 1 reply; 64+ messages in thread
From: Huang Ying @ 2010-09-28  1:03 UTC (permalink / raw)
  To: Robert Richter
  Cc: huang ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Mon, 2010-09-27 at 21:25 +0800, Robert Richter wrote:
> On 27.09.10 08:39:24, huang ying wrote:
> 
> Looking at all you comments below I would vote for the following:
> 
> We implement all handlers using DIE_NMI and set its priority
> accordingly in struct notifier_block when registering the the nmi
> handler. We define NMI priorities as macros such as
> NMI_PRIORITY_LOCAL, NMI_PRIORITY_WATCHDOG, NMI_PRIORITY_IO, etc. and
> require all handlers to set the priority. register_die_notifier() with
> (!nb->priority) should return -EINVAL. DIE_NMI_UNKNOWN should only be
> used if there is a handler for the case when all others fail such as
> implemented in the perf nmi handler or when reporting an unknown nmi.
> 
> This will avoid all the confusion below and also makes the code much
> cleaner.

Use priority to enforce the order has some issues except what Don
pointed out (two registers for two call in chain):

- Almost all direct call in default_do_nmi() must be turned into
notifier_block. I know this is what you want. But I am not a big fan of
notifier chain :)

- This makes order of notifier call more implicitly. And I think the
order is important for NMI handler to work properly.

- In your scheme, both die_val (DIE_NMI or DIE_NMI_UNKNOWN) and priority
are used to determine the order of call. This makes code more complex
and no additional benefit.

So I think it is better to use different die_val to determine the order,
and insert some direct call between them.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-27 13:38       ` Robert Richter
  2010-09-27 15:20         ` Don Zickus
@ 2010-09-28  1:19         ` Huang Ying
  2010-09-28 15:27           ` Robert Richter
  1 sibling, 1 reply; 64+ messages in thread
From: Huang Ying @ 2010-09-28  1:19 UTC (permalink / raw)
  To: Robert Richter
  Cc: huang ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Mon, 2010-09-27 at 21:38 +0800, Robert Richter wrote:
> On 27.09.10 08:47:53, huang ying wrote:
> 
> > >>  arch/x86/kernel/hwerr.c    |   55 +++++++++++++++++++++++++++++++++++++++++++++
> > >
> > > Instead of creating this file the code should be implemented in
> > >
> > >  arch/x86/kernel/cpu/intel.c
> > >
> > > Similar AMD NB code is implemented in amd.c and k8.c.
> > 
> > Why? This file is not vendor specific.
> 
> No, it only implements an Intel specific PCI device, nothing else.

You can add AMD specific PCI device here too. We will add more device ID
in the future.

> > >> +late_initcall(check_unknown_nmi_for_hwerr);
> > >
> > > Maybe you can use early pci functions like read_pci_config() to avoid
> > > late init.
> > 
> > I don't think late init is a big issue. Hardware error is rare after all.
> 
> Just want to let you know this as an option.
> 
> > >> --- a/arch/x86/kernel/traps.c
> > >> +++ b/arch/x86/kernel/traps.c
> > >> @@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
> > >>
> > >>  static int ignore_nmis;
> > >>
> > >> +int unknown_nmi_for_hwerr;
> > >
> > > If it is an nmi for hwerr, it is no longer an unknown nmi. So we
> > > should drop 'unknow' in the naming.
> > 
> > I think unkown NMI is the one we can not identify the source.
> > Something like anonymous.
> > 
> > >> +
> > >>  /*
> > >>   * Prevent NMI reason port (0x61) being accessed simultaneously, can
> > >>   * only be used in NMI handler.
> > >> @@ -360,6 +362,14 @@ io_check_error(unsigned char reason, str
> > >>  static notrace __kprobes void
> > >>  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
> > >>  {
> > >> +     /*
> > >> +      * On some platforms, hardware errors may be notified via
> > >> +      * unknown NMI
> > >> +      */
> > >> +     if (unknown_nmi_for_hwerr)
> > >> +             panic("NMI for hardware error without error record: "
> > >> +                   "Not continuing");
> > >> +
> > >
> > > Instead of checking this flag you should implement and register an nmi
> > > handler for this case.
> > 
> > I think explicit function calls have better readability than notifier chains.
> 
> What is different to unknown_nmi() then?
> 
> So no, in your case you want to catch unknown nmis for a certain
> hardware and then throw a panic.

No. We do NOT catch unknown NMIs for a certain hardware here. We put the
code here because we think it is general instead of hardware specific.

It should be a general rule to treat unknown NMI as hardware error. But
to avoid to confuse some users have broken hardware (which will generate
unknown NMI not for hardware error), we use a white list (machines with
HEST or workable chipset via PCI ID).

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-27 15:33         ` Don Zickus
  2010-09-27 16:45           ` Robert Richter
@ 2010-09-28  1:22           ` Huang Ying
  1 sibling, 0 replies; 64+ messages in thread
From: Huang Ying @ 2010-09-28  1:22 UTC (permalink / raw)
  To: Don Zickus
  Cc: Robert Richter, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Mon, 2010-09-27 at 23:33 +0800, Don Zickus wrote:
> On Mon, Sep 27, 2010 at 11:00:56AM +0200, Robert Richter wrote:
> > On 27.09.10 04:39:20, Huang Ying wrote:
> > > > I already commented on this, patch #1 and #3 are basically the same in
> > > > most parts which should be merged. What remains then in this patch is
> > > > the modified printk() and the comment. Both could be added to #1 too
> > > > which is then some sort of code cleanup patch.
> > > 
> > > Don thinks it is Ok to keep 2 patches.
> > 
> > I don't like reviewing new changes which are thrown away with the next
> > patch. I review things twice and it is much harder to see what really
> > changed then. Also we should have a clean history.
> 
> I didn't care either way.  But if it makes it easier to review, it's nice
> to keep reviewers happy too. :-)
> 
> Hunag, I think there is going to be a V3 of this series, could you just
> combine these patches then?

Ok. I don't care much to this too in fact.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-27 16:45           ` Robert Richter
  2010-09-27 17:50             ` Don Zickus
@ 2010-09-28  1:33             ` Huang Ying
  2010-09-28 14:29               ` Robert Richter
  2010-09-28 15:38               ` Don Zickus
  1 sibling, 2 replies; 64+ messages in thread
From: Huang Ying @ 2010-09-28  1:33 UTC (permalink / raw)
  To: Robert Richter
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Tue, 2010-09-28 at 00:45 +0800, Robert Richter wrote:
> > > > Ok. I will add CPU ID in message. Because we know the reason, I don't
> > > > think we need the reason in message.
> > > 
> > > You only know that bit 7 is set, not the rest. As this is an error
> > > message we should provide as much information as possible.
> > 
> > Well, what other info do we know besides that bit being set?  (I wish we
> > had more, but I don't think we do)
> 
> We should keep printing the reason byte as it did before.

The reason is printed before because mem_parity_error is treated as
something like unknown reason. And iochk_error is treated as known
reason and will not print the reason byte. Please the check the original
code.

But now we treat pci_serr_error (renamed from mem_parity_error) as known
reason. So it is not necessary to print the reason byte. I suggest to
print the reason byte only if (!(reason & 0xc0) && reason), where the
reason is really unknown.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-27 16:58           ` Robert Richter
@ 2010-09-28  1:41             ` Huang Ying
  2010-09-28 15:16               ` Robert Richter
  2010-09-28 15:21               ` Don Zickus
  0 siblings, 2 replies; 64+ messages in thread
From: Huang Ying @ 2010-09-28  1:41 UTC (permalink / raw)
  To: Robert Richter
  Cc: Don Zickus, huang ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Tue, 2010-09-28 at 00:58 +0800, Robert Richter wrote:
> On 27.09.10 11:16:07, Don Zickus wrote:
> > On Mon, Sep 27, 2010 at 03:43:41PM +0200, Robert Richter wrote:
> > > On 27.09.10 08:56:44, huang ying wrote:
> > > 
> > > > >> -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
> > > > >> -{
> > > > >> -     unsigned char reason = get_nmi_reason();
> > > > >> -     char buf[64];
> > > > >> -
> > > > >> -     sprintf(buf, "NMI received for unknown reason %02x\n", reason);
> > > > >> -     die_nmi(buf, regs, 1); /* Always panic here */
> > > > >> -     return 0;
> > > > >
> > > > > You are dropping this code that is different to panic().
> > > > 
> > > > What is the difference? Is it relevant?
> > > 
> > > I think yes, since the code behaves different. Otherwise we could
> > > remove die_nmi() completly and replace it by panic(). But both are
> > > different implementions. Maybe we can merge the code, but I didn't
> > > look at it closly.
> > 
> > Actually die_nmi is a wrapper around panic with two important pieces.
> > One, it dumps some registers and two it does another notifier call to
> > DIE_NMIWATCHDOG (which correlates to another discussion in this patch
> > series).
> > 
> > So if we do any consolidation between panic and die_nmi, it should be
> > convert to die_nmi.  But then I wonder if that breaks the original
> > semantics of 'panic_on_unrecovered_nmi'.  I don't think so though.
> 
> I agree, panic_on_unrecovered_nmi and unknown_nmi_panic almost do the
> same thing, though die_nmi() is specifically for nmi handlers. In the
> end we can consolidate both. We should then modify kernel.txt and
> route unknown_nmi_panic to panic_on_unrecovered_nmi or vice versa.

unknown_nmi_panic will cause panic for unknown NMI (can not identify the
NMI sources).

panic_on_unrecovered_nmi should panic for unrecovered hardware errors,
for known and unknown NMI. For example, panic_on_unrecovered_nmi will
cause panic in mem_parity_error too, which can be considered known NMI.

Is it reasonable?

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-27 15:38   ` Don Zickus
@ 2010-09-28  1:54     ` Huang Ying
  0 siblings, 0 replies; 64+ messages in thread
From: Huang Ying @ 2010-09-28  1:54 UTC (permalink / raw)
  To: Don Zickus; +Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Mon, 2010-09-27 at 23:38 +0800, Don Zickus wrote:
> Hi Huang,
> 
> On Mon, Sep 27, 2010 at 08:57:05AM +0800, Huang Ying wrote:
> > @@ -360,6 +362,14 @@ io_check_error(unsigned char reason, str
> >  static notrace __kprobes void
> >  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
> >  {
> > +	/*
> > +	 * On some platforms, hardware errors may be notified via
> > +	 * unknown NMI
> > +	 */
> > +	if (unknown_nmi_for_hwerr)
> > +		panic("NMI for hardware error without error record: "
> > +		      "Not continuing");
> 
> Can you modify this to let users know to check the BIOS/BMC for further
> messages?  Otherwise people just file bugzillas asking what the problem
> is, and the first thing we have to respond with is check the BIOS/BMC.

Sorry, forget about that. Will change it in next version.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-28  1:33             ` Huang Ying
@ 2010-09-28 14:29               ` Robert Richter
  2010-09-29  7:56                 ` huang ying
  2010-09-28 15:38               ` Don Zickus
  1 sibling, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-28 14:29 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On 27.09.10 21:33:37, Huang Ying wrote:

> I suggest to
> print the reason byte only if (!(reason & 0xc0) && reason), where the
> reason is really unknown.

Why make it this complex for an error message?

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-28  1:03         ` Huang Ying
@ 2010-09-28 14:59           ` Robert Richter
  2010-09-29  7:54             ` huang ying
  0 siblings, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-28 14:59 UTC (permalink / raw)
  To: Huang Ying
  Cc: huang ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 27.09.10 21:03:52, Huang Ying wrote:
> Use priority to enforce the order has some issues except what Don
> pointed out (two registers for two call in chain):
> 
> - Almost all direct call in default_do_nmi() must be turned into
> notifier_block. I know this is what you want. But I am not a big fan of
> notifier chain :)
> 
> - This makes order of notifier call more implicitly. And I think the
> order is important for NMI handler to work properly.
> 
> - In your scheme, both die_val (DIE_NMI or DIE_NMI_UNKNOWN) and priority
> are used to determine the order of call. This makes code more complex
> and no additional benefit.
> 
> So I think it is better to use different die_val to determine the order,
> and insert some direct call between them.

Huang,

registering a notifier is the only clean way to add an NMI handler.
Modifying the NMI control flow in traps.c with global flags is not the
right approach. Currently, execution order depends on die_val, this is
not obvious when reading the code and we all were wondering in the
beginning why there was DIE_NMI, DIE_NMI_IPI and DIE_NMI_UNKNOWN, etc.

So, better we say instead, this is the notifier function with that
priority, add it to the call chain. With priorities we are able to
clearly specify the execution order even more fine grained than
now. And, code in traps.c will become fairly easy. Also, we don't need
two notifiers, because there is no need for different priorities
then. Give me one use case, I don't know of any.

And the setup of a notifier is not that much overhead or complex, sure
we need to change the code, but actually this the work to be done to
clean it up.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-28  1:41             ` Huang Ying
@ 2010-09-28 15:16               ` Robert Richter
  2010-09-28 15:21               ` Don Zickus
  1 sibling, 0 replies; 64+ messages in thread
From: Robert Richter @ 2010-09-28 15:16 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, huang ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 27.09.10 21:41:43, Huang Ying wrote:
> unknown_nmi_panic will cause panic for unknown NMI (can not identify the
> NMI sources).
> 
> panic_on_unrecovered_nmi should panic for unrecovered hardware errors,
> for known and unknown NMI. For example, panic_on_unrecovered_nmi will
> cause panic in mem_parity_error too, which can be considered known NMI.
> 
> Is it reasonable?

Yes, this makes it clear. Thanks,

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-28  0:28           ` Huang Ying
@ 2010-09-28 15:19             ` Don Zickus
  2010-09-29  6:55               ` huang ying
  0 siblings, 1 reply; 64+ messages in thread
From: Don Zickus @ 2010-09-28 15:19 UTC (permalink / raw)
  To: Huang Ying
  Cc: Robert Richter, huang ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Tue, Sep 28, 2010 at 08:28:25AM +0800, Huang Ying wrote:
> Hi, Don,
> 
> On Mon, 2010-09-27 at 23:16 +0800, Don Zickus wrote:
> > On Mon, Sep 27, 2010 at 03:43:41PM +0200, Robert Richter wrote:
> > > On 27.09.10 08:56:44, huang ying wrote:
> > > 
> > > > >> -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
> > > > >> -{
> > > > >> -     unsigned char reason = get_nmi_reason();
> > > > >> -     char buf[64];
> > > > >> -
> > > > >> -     sprintf(buf, "NMI received for unknown reason %02x\n", reason);
> > > > >> -     die_nmi(buf, regs, 1); /* Always panic here */
> > > > >> -     return 0;
> > > > >
> > > > > You are dropping this code that is different to panic().
> > > > 
> > > > What is the difference? Is it relevant?
> > > 
> > > I think yes, since the code behaves different. Otherwise we could
> > > remove die_nmi() completly and replace it by panic(). But both are
> > > different implementions. Maybe we can merge the code, but I didn't
> > > look at it closly.
> > 
> > Actually die_nmi is a wrapper around panic with two important pieces.
> > One, it dumps some registers and two it does another notifier call to
> > DIE_NMIWATCHDOG (which correlates to another discussion in this patch
> > series).
> > 
> > So if we do any consolidation between panic and die_nmi, it should be
> > convert to die_nmi.  But then I wonder if that breaks the original
> > semantics of 'panic_on_unrecovered_nmi'.  I don't think so though.
> 
> Please take a look at the original code:
> 
> 
>                 if (nmi_watchdog_tick(regs, reason))
>                         return;
>                 if (!do_nmi_callback(regs, cpu))
> #endif /* !CONFIG_LOCKUP_DETECTOR */
>                         unknown_nmi_error(reason, regs);
> #else
>                 unknown_nmi_error(reason, regs);
> #endif
> 
> If NMI comes from watchdog, nmi_watchdog_tick() will return 1. So
> do_nmi_callback() is NOT for watchdog NMI, but for unknown NMI. Why do
> we call DIE_NMIWATCHDOG for unknown NMI (NOT watchdog NMI)? die_nmi is
> for watchdog, not unknown NMI.

I think watchdog is an overloaded term.  I was under the impression that
once the nmi watchdog determined a problem, it called the DIE_NMIWATCHDOG
die chain to see if any other drivers wanted to clean up or do their thing
first before panic'ing (namely drivers in drivers/char/watchdog).

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-28  1:41             ` Huang Ying
  2010-09-28 15:16               ` Robert Richter
@ 2010-09-28 15:21               ` Don Zickus
  1 sibling, 0 replies; 64+ messages in thread
From: Don Zickus @ 2010-09-28 15:21 UTC (permalink / raw)
  To: Huang Ying
  Cc: Robert Richter, huang ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Tue, Sep 28, 2010 at 09:41:43AM +0800, Huang Ying wrote:
> On Tue, 2010-09-28 at 00:58 +0800, Robert Richter wrote:
> > On 27.09.10 11:16:07, Don Zickus wrote:
> > > Actually die_nmi is a wrapper around panic with two important pieces.
> > > One, it dumps some registers and two it does another notifier call to
> > > DIE_NMIWATCHDOG (which correlates to another discussion in this patch
> > > series).
> > > 
> > > So if we do any consolidation between panic and die_nmi, it should be
> > > convert to die_nmi.  But then I wonder if that breaks the original
> > > semantics of 'panic_on_unrecovered_nmi'.  I don't think so though.
> > 
> > I agree, panic_on_unrecovered_nmi and unknown_nmi_panic almost do the
> > same thing, though die_nmi() is specifically for nmi handlers. In the
> > end we can consolidate both. We should then modify kernel.txt and
> > route unknown_nmi_panic to panic_on_unrecovered_nmi or vice versa.
> 
> unknown_nmi_panic will cause panic for unknown NMI (can not identify the
> NMI sources).
> 
> panic_on_unrecovered_nmi should panic for unrecovered hardware errors,
> for known and unknown NMI. For example, panic_on_unrecovered_nmi will
> cause panic in mem_parity_error too, which can be considered known NMI.
> 
> Is it reasonable?

Routing unknown_nmi_panic to panic_on_unrecovered_nmi makes sense to me,
just more difficult to type out. :-)

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-28  1:19         ` Huang Ying
@ 2010-09-28 15:27           ` Robert Richter
  2010-09-29  8:07             ` huang ying
  0 siblings, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-28 15:27 UTC (permalink / raw)
  To: Huang Ying
  Cc: huang ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 27.09.10 21:19:21, Huang Ying wrote:
> On Mon, 2010-09-27 at 21:38 +0800, Robert Richter wrote:
> > On 27.09.10 08:47:53, huang ying wrote:
> > 
> > > >>  arch/x86/kernel/hwerr.c    |   55 +++++++++++++++++++++++++++++++++++++++++++++
> > > >
> > > > Instead of creating this file the code should be implemented in
> > > >
> > > >  arch/x86/kernel/cpu/intel.c
> > > >
> > > > Similar AMD NB code is implemented in amd.c and k8.c.
> > > 
> > > Why? This file is not vendor specific.
> > 
> > No, it only implements an Intel specific PCI device, nothing else.
> 
> You can add AMD specific PCI device here too. We will add more device ID
> in the future.

I think it is not worth to introduce this file. There is no generic
code in and we have over places for vendor specific code.

> No. We do NOT catch unknown NMIs for a certain hardware here. We put the
> code here because we think it is general instead of hardware specific.
> 
> It should be a general rule to treat unknown NMI as hardware error. But
> to avoid to confuse some users have broken hardware (which will generate
> unknown NMI not for hardware error), we use a white list (machines with
> HEST or workable chipset via PCI ID).

Ok, a white list makes sense. This was not obvious in your
implementation.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-28  0:36           ` Huang Ying
@ 2010-09-28 15:32             ` Don Zickus
  2010-09-29  8:17               ` huang ying
  0 siblings, 1 reply; 64+ messages in thread
From: Don Zickus @ 2010-09-28 15:32 UTC (permalink / raw)
  To: Huang Ying
  Cc: Robert Richter, huang ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Tue, Sep 28, 2010 at 08:36:12AM +0800, Huang Ying wrote:
> > I tend to agree with Robert here.  I don't know if there were any 'rules'
> > to which handlers get directly called versus ones that go through the
> > die_chain, so I was originally going to let it go.  But if they aren't
> > any, it does look cleaner to have everything in die_chains.
> 
> Personally, I think directly call has better readability than

I am confused what type of readability you are looking for?  Can we create
a sysfs entry to give you that info?

> notifier_chain in general. Notifier_chain is for:
> 
> - Call functions in module.
> - Need to enable/disable (via register/unregister) at run time.
> - Call functions from low layer to high layer.
> 
> Otherwise, notifier_chain should be avoid if possible. So I think it is
> better to keep direct call as much as possible.

But the problem is you have to export all this platform specific stuff to
traps.c and surround the code with #ifdef's, which start to look ugly.

Is there any reason why traps.c should know about MCA/HEST/<other hardware
errors>?  Shouldn't it be abstracted away?

Honestly, I would be interested in creating a southbridge driver and
moving the port 0x61 code there and keeping the default_do_nmi() function
stupidly simple (just a call to the die_chain and the
unknown_nmi_error()).

Just my two cents.

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-28  1:33             ` Huang Ying
  2010-09-28 14:29               ` Robert Richter
@ 2010-09-28 15:38               ` Don Zickus
  1 sibling, 0 replies; 64+ messages in thread
From: Don Zickus @ 2010-09-28 15:38 UTC (permalink / raw)
  To: Huang Ying
  Cc: Robert Richter, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen

On Tue, Sep 28, 2010 at 09:33:37AM +0800, Huang Ying wrote:
> On Tue, 2010-09-28 at 00:45 +0800, Robert Richter wrote:
> > > > > Ok. I will add CPU ID in message. Because we know the reason, I don't
> > > > > think we need the reason in message.
> > > > 
> > > > You only know that bit 7 is set, not the rest. As this is an error
> > > > message we should provide as much information as possible.
> > > 
> > > Well, what other info do we know besides that bit being set?  (I wish we
> > > had more, but I don't think we do)
> > 
> > We should keep printing the reason byte as it did before.
> 
> The reason is printed before because mem_parity_error is treated as
> something like unknown reason. And iochk_error is treated as known
> reason and will not print the reason byte. Please the check the original
> code.
> 
> But now we treat pci_serr_error (renamed from mem_parity_error) as known
> reason. So it is not necessary to print the reason byte. I suggest to
> print the reason byte only if (!(reason & 0xc0) && reason), where the
> reason is really unknown.

So you are just matching the iochk_error.  I understand your reasoning.
Yesterday I thought you changed the unknown_nmi_error code.  I guess I
don't see it as a problem, but I'll let Robert chime in.

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-28 15:19             ` Don Zickus
@ 2010-09-29  6:55               ` huang ying
  2010-09-30  4:04                 ` Don Zickus
  0 siblings, 1 reply; 64+ messages in thread
From: huang ying @ 2010-09-29  6:55 UTC (permalink / raw)
  To: Don Zickus
  Cc: Huang Ying, Robert Richter, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

Hi, Don,

On Tue, Sep 28, 2010 at 11:19 PM, Don Zickus <dzickus@redhat.com> wrote:
>> If NMI comes from watchdog, nmi_watchdog_tick() will return 1. So
>> do_nmi_callback() is NOT for watchdog NMI, but for unknown NMI. Why do
>> we call DIE_NMIWATCHDOG for unknown NMI (NOT watchdog NMI)? die_nmi is
>> for watchdog, not unknown NMI.
>
> I think watchdog is an overloaded term.  I was under the impression that
> once the nmi watchdog determined a problem, it called the DIE_NMIWATCHDOG
> die chain to see if any other drivers wanted to clean up or do their thing
> first before panic'ing (namely drivers in drivers/char/watchdog).

Yes. I think so too. And in original code, almost all DIE_NMIxxx is
used in this way:

DIE_NMI is called after read port 0x61, to see if any other driver
wanted to recover the error notified based on reason read from port
0x61.

DIE_NMIWATCHDOG is used to see if any other drivers wanted to clean up
or do their thing before panic

DIE_NMIUNKNOWN is used to see if any other driver wanted to clean up
or debug before default unknown logic (such as panic).

DIE_NMI_IPI is used to see if any driver want to process the NMI (sent
via APIC? Maybe named after that).

So the original implementation of defualt_do_nmi() is:

- determine the reason/source of NMI in default_do_nmi(). Although the
exact reason/source is not determined, such as perf.

- notify_die() for corresponding NMI reason/source, to see if any
driver want to process this instead of the default operation

- If no other driver processed it, call default operation, such as
panic for DIE_NMIUNKNOWN.


The original implementation need to be changed, because it only uses
port 0x61 to determine the reason/source of NMI. We need a order based
scheme to determine the reason/source of NMI. The order is as follow:

CPU-specific (CPU local) NMI
non-CPU-specific (global) NMI
port 0x61
NMI Watchdog

I think we all agree that to use order to determine the reason/source
of NMI. The difference is that I want to keep as many direct calls in
default_do_nmi() as possible, while you guys want to wrap almost all
code in default_do_nmi() into notifier handler and leave only one
notify_die() in defualt_do_nmi(). And I want to use different die_val
(and their calling order in default_do_nmi()) to determine the order
while you guys want to use priority (based on its value) to determine
the order.

On the other hand, I think we should call corresponding DIE_NMIxxx
before the default operations, such as for watchdog, call
DIE_NMIWATCHDOG before go panic, for unknown nmi, call DIE_NMIUNKNOWN
before the default processing (may panic).

I think it is important to distinguish between die chain used to
determine the source/reason of NMI and the die chain used to see if
any other driver wanted to do some processing before the default
operation.

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler
  2010-09-28 14:59           ` Robert Richter
@ 2010-09-29  7:54             ` huang ying
  0 siblings, 0 replies; 64+ messages in thread
From: huang ying @ 2010-09-29  7:54 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

Hi, Robert,

On Tue, Sep 28, 2010 at 10:59 PM, Robert Richter <robert.richter@amd.com> wrote:
> On 27.09.10 21:03:52, Huang Ying wrote:
>> Use priority to enforce the order has some issues except what Don
>> pointed out (two registers for two call in chain):
>>
>> - Almost all direct call in default_do_nmi() must be turned into
>> notifier_block. I know this is what you want. But I am not a big fan of
>> notifier chain :)
>>
>> - This makes order of notifier call more implicitly. And I think the
>> order is important for NMI handler to work properly.
>>
>> - In your scheme, both die_val (DIE_NMI or DIE_NMI_UNKNOWN) and priority
>> are used to determine the order of call. This makes code more complex
>> and no additional benefit.
>>
>> So I think it is better to use different die_val to determine the order,
>> and insert some direct call between them.
>
> Huang,
>
> registering a notifier is the only clean way to add an NMI handler.
> Modifying the NMI control flow in traps.c with global flags is not the
> right approach.

If you mean my code in [PATCH 6/7], in that patch, I add another logic
into the general unknown NMI processing: treat all unknown NMI as
hardware error and go panic. The global flag is just used to implement
white list, it is not the point. I am not adding another NMI handler
there.

In general, I think it is better to call the NMI handler in
default_do_nmi() directly if possible. That is clearer, and the code
is much easier to be understood too.

> Currently, execution order depends on die_val, this is
> not obvious when reading the code

I think that is more obvious for reading than priority value. When you
look at the new default_do_nmi(), it is very clear that the order is
as follow:

DIE_NMI_IPI
DIE_NMI
port 0x61
watchdog
DIE_NMIUNKNOWN

Because the corresponding notifiers are called in this order.

With priority value, people may say: Oh, this is data-driven
programming, the execution order is based on data value. That is not
so obvious for me.

> and we all were wondering in the
> beginning why there was DIE_NMI, DIE_NMI_IPI and DIE_NMI_UNKNOWN, etc.

What I thought about this is described in my reply to Don in 3/7 thread.

> So, better we say instead, this is the notifier function with that
> priority, add it to the call chain. With priorities we are able to
> clearly specify the execution order even more fine grained than
> now.

Do we need fine grained order? Do you have some use cases. Fine
grained order is hard to managed too. Especially because some NMI
handler may be hardware independent.

> And, code in traps.c will become fairly easy. Also, we don't need
> two notifiers, because there is no need for different priorities
> then. Give me one use case, I don't know of any.

If my understanding is correct, in your previous mail, you said there
will be two notifier chain, DIE_NMI and DIE_NMI_UNKNOWN, so I said
that.

> And the setup of a notifier is not that much overhead or complex, sure
> we need to change the code, but actually this the work to be done to
> clean it up.

I just want to clean it up in different way as you :)

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error
  2010-09-28 14:29               ` Robert Richter
@ 2010-09-29  7:56                 ` huang ying
  0 siblings, 0 replies; 64+ messages in thread
From: huang ying @ 2010-09-29  7:56 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Tue, Sep 28, 2010 at 10:29 PM, Robert Richter <robert.richter@amd.com> wrote:
> On 27.09.10 21:33:37, Huang Ying wrote:
>
>> I suggest to
>> print the reason byte only if (!(reason & 0xc0) && reason), where the
>> reason is really unknown.
>
> Why make it this complex for an error message?

It's just a "else" branch for original "if (reason & 0xc0)" code, so
it is not too complex.

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-28 15:27           ` Robert Richter
@ 2010-09-29  8:07             ` huang ying
  0 siblings, 0 replies; 64+ messages in thread
From: huang ying @ 2010-09-29  8:07 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Tue, Sep 28, 2010 at 11:27 PM, Robert Richter <robert.richter@amd.com> wrote:
> On 27.09.10 21:19:21, Huang Ying wrote:
>> On Mon, 2010-09-27 at 21:38 +0800, Robert Richter wrote:
>> > On 27.09.10 08:47:53, huang ying wrote:
>> >
>> > > >>  arch/x86/kernel/hwerr.c    |   55 +++++++++++++++++++++++++++++++++++++++++++++
>> > > >
>> > > > Instead of creating this file the code should be implemented in
>> > > >
>> > > >  arch/x86/kernel/cpu/intel.c
>> > > >
>> > > > Similar AMD NB code is implemented in amd.c and k8.c.
>> > >
>> > > Why? This file is not vendor specific.
>> >
>> > No, it only implements an Intel specific PCI device, nothing else.
>>
>> You can add AMD specific PCI device here too. We will add more device ID
>> in the future.
>
> I think it is not worth to introduce this file. There is no generic
> code in and we have over places for vendor specific code.

It's not vendor specific code. It is general code. In fact it is a
white list for systems that can treat unknown NMI as hardware error
(no broken hardware to generate unknown NMI). If you can find an
appropriate existing file, I am very glad to put the contents of this
file into it.

>> No. We do NOT catch unknown NMIs for a certain hardware here. We put the
>> code here because we think it is general instead of hardware specific.
>>
>> It should be a general rule to treat unknown NMI as hardware error. But
>> to avoid to confuse some users have broken hardware (which will generate
>> unknown NMI not for hardware error), we use a white list (machines with
>> HEST or workable chipset via PCI ID).
>
> Ok, a white list makes sense. This was not obvious in your
> implementation.

I have some comments in my original code.

+/*
+ * On some platform, hardware errors may be notified via unknown
+ * NMI. These platform is identified via the PCI ID of host bridge.
+ *
+ * The PCI ID of host bridge instead of DMI ID is used, so that the
+ * checking can be done based on the platform instead of motherboard.
+ * This should be simpler and sufficient.
+ */

If you think that is not obvious enough, I will change the comments to
make it more obvious.

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-28 15:32             ` Don Zickus
@ 2010-09-29  8:17               ` huang ying
  2010-09-30  4:36                 ` Don Zickus
  0 siblings, 1 reply; 64+ messages in thread
From: huang ying @ 2010-09-29  8:17 UTC (permalink / raw)
  To: Don Zickus
  Cc: Huang Ying, Robert Richter, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Tue, Sep 28, 2010 at 11:32 PM, Don Zickus <dzickus@redhat.com> wrote:
> On Tue, Sep 28, 2010 at 08:36:12AM +0800, Huang Ying wrote:
>> > I tend to agree with Robert here.  I don't know if there were any 'rules'
>> > to which handlers get directly called versus ones that go through the
>> > die_chain, so I was originally going to let it go.  But if they aren't
>> > any, it does look cleaner to have everything in die_chains.
>>
>> Personally, I think directly call has better readability than
>
> I am confused what type of readability you are looking for?  Can we create
> a sysfs entry to give you that info?

Sorry for my poor English. I mean code readability here. That is,
notifier_chain makes code harder to understand than direct call.

>> notifier_chain in general. Notifier_chain is for:
>>
>> - Call functions in module.
>> - Need to enable/disable (via register/unregister) at run time.
>> - Call functions from low layer to high layer.
>>
>> Otherwise, notifier_chain should be avoid if possible. So I think it is
>> better to keep direct call as much as possible.
>
> But the problem is you have to export all this platform specific stuff to
> traps.c and surround the code with #ifdef's, which start to look ugly.

There is no #ifdef in my final default_do_nmi(), so I think the code
can be cleaned up without converting everything into notifier block. I
think the rule can be: architecture specific thing should go direct
call, while device driver should be turned into notifier block.

> Is there any reason why traps.c should know about MCA/HEST/<other hardware
> errors>?  Shouldn't it be abstracted away?

Yes. The device drivers should be abstracted away, leaving
architectural logic, such as port 0x61 as direct call. We need
notifier chain, but I just suggest reduce its usage if possible.

> Honestly, I would be interested in creating a southbridge driver and
> moving the port 0x61 code there and keeping the default_do_nmi() function
> stupidly simple (just a call to the die_chain and the
> unknown_nmi_error()).

I think the southbridge drivers should go notifier block, but the port
0x61 code is architectural and should be kept in default_do_nmi().

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-29  6:55               ` huang ying
@ 2010-09-30  4:04                 ` Don Zickus
  2010-09-30  5:21                   ` Huang Ying
  2010-09-30  8:23                   ` Robert Richter
  0 siblings, 2 replies; 64+ messages in thread
From: Don Zickus @ 2010-09-30  4:04 UTC (permalink / raw)
  To: huang ying
  Cc: Huang Ying, Robert Richter, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Wed, Sep 29, 2010 at 02:55:58PM +0800, huang ying wrote:
> Hi, Don,
> 
> I think we all agree that to use order to determine the reason/source
> of NMI. The difference is that I want to keep as many direct calls in
> default_do_nmi() as possible, while you guys want to wrap almost all
> code in default_do_nmi() into notifier handler and leave only one
> notify_die() in defualt_do_nmi(). And I want to use different die_val
> (and their calling order in default_do_nmi()) to determine the order
> while you guys want to use priority (based on its value) to determine
> the order.

Well, I just wanted to see if we can minimize the number of times we
walked the die_chain.  Priorities was an interesting idea, I am not sure
it works out.  Registering two handlers, seems clunky.  But I am open to
the discussions.

> 
> On the other hand, I think we should call corresponding DIE_NMIxxx
> before the default operations, such as for watchdog, call
> DIE_NMIWATCHDOG before go panic, for unknown nmi, call DIE_NMIUNKNOWN
> before the default processing (may panic).
> 
> I think it is important to distinguish between die chain used to
> determine the source/reason of NMI and the die chain used to see if
> any other driver wanted to do some processing before the default
> operation.

I guess I still prefer to take your patch set with its change and then
layer any new ideas on top.  I have a feeling this discussion could go on
forever regarding how die_chains can work.

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-29  8:17               ` huang ying
@ 2010-09-30  4:36                 ` Don Zickus
  2010-09-30  4:57                   ` Huang Ying
  2010-09-30  8:25                   ` Andi Kleen
  0 siblings, 2 replies; 64+ messages in thread
From: Don Zickus @ 2010-09-30  4:36 UTC (permalink / raw)
  To: huang ying
  Cc: Huang Ying, Robert Richter, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Wed, Sep 29, 2010 at 04:17:30PM +0800, huang ying wrote:
> On Tue, Sep 28, 2010 at 11:32 PM, Don Zickus <dzickus@redhat.com> wrote:
> 
> > But the problem is you have to export all this platform specific stuff to
> > traps.c and surround the code with #ifdef's, which start to look ugly.
> 
> There is no #ifdef in my final default_do_nmi(), so I think the code
> can be cleaned up without converting everything into notifier block. I
> think the rule can be: architecture specific thing should go direct
> call, while device driver should be turned into notifier block.

That sounds like a good rule, but then my definition of architecture
specific is whatever is written in the intel/amd x86_64 architecture
manual (that sits on my desk, dated 2002), which wouldn't include any
of the error handling you propose, nor MCE, nor perf.

I guess I look at all that stuff as cpu features because not all the cpus
on the market have them.  Shouldn't traps.c just contain core architecture
stuff and all those hardware error features could go under
arch/x86/kernel/cpu with the rest of the features, no?

> 
> > Is there any reason why traps.c should know about MCA/HEST/<other hardware
> > errors>?  Shouldn't it be abstracted away?
> 
> Yes. The device drivers should be abstracted away, leaving
> architectural logic, such as port 0x61 as direct call. We need
> notifier chain, but I just suggest reduce its usage if possible.
> 
> > Honestly, I would be interested in creating a southbridge driver and
> > moving the port 0x61 code there and keeping the default_do_nmi() function
> > stupidly simple (just a call to the die_chain and the
> > unknown_nmi_error()).
> 
> I think the southbridge drivers should go notifier block, but the port
> 0x61 code is architectural and should be kept in default_do_nmi().

Is port 0x61 architectural?  I thought it a southbridge thing.  In fact I
thought with modern chipsets you can access the same thing through port
0x70 or 0x71 (I can't seem to figure out which Intel doc I saw that in).
(Not that this conversation has any bearing on your patchset, just an idea
I had).

Cheers,
Don

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-30  4:36                 ` Don Zickus
@ 2010-09-30  4:57                   ` Huang Ying
  2010-09-30  8:38                     ` Robert Richter
  2010-10-01 20:00                     ` Maciej W. Rozycki
  2010-09-30  8:25                   ` Andi Kleen
  1 sibling, 2 replies; 64+ messages in thread
From: Huang Ying @ 2010-09-30  4:57 UTC (permalink / raw)
  To: Don Zickus
  Cc: huang ying, Robert Richter, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

Hi, Don,

On Thu, 2010-09-30 at 12:36 +0800, Don Zickus wrote:
> On Wed, Sep 29, 2010 at 04:17:30PM +0800, huang ying wrote:
> > On Tue, Sep 28, 2010 at 11:32 PM, Don Zickus <dzickus@redhat.com> wrote:
> > 
> > > But the problem is you have to export all this platform specific stuff to
> > > traps.c and surround the code with #ifdef's, which start to look ugly.
> > 
> > There is no #ifdef in my final default_do_nmi(), so I think the code
> > can be cleaned up without converting everything into notifier block. I
> > think the rule can be: architecture specific thing should go direct
> > call, while device driver should be turned into notifier block.
> 
> That sounds like a good rule, but then my definition of architecture
> specific is whatever is written in the intel/amd x86_64 architecture
> manual (that sits on my desk, dated 2002), which wouldn't include any
> of the error handling you propose, nor MCE, nor perf.

MCE is at least in new version of Intel SDM vol 3A. But we do not need
to process MCE in NMI handler. Performance monitor counter is in Intel
SDM Vol 3B.

> I guess I look at all that stuff as cpu features because not all the cpus
> on the market have them.  Shouldn't traps.c just contain core architecture
> stuff and all those hardware error features could go under
> arch/x86/kernel/cpu with the rest of the features, no?

Yes. Both MCE and perf are CPU features. I think they can be thought as
optional architectural features. I think it is good to put similar
features into arch/x86/kernel/cpu instead of traps.c. But if necessary,
we can put direct call in traps.c instead of notifier block.

> > > Is there any reason why traps.c should know about MCA/HEST/<other hardware
> > > errors>?  Shouldn't it be abstracted away?
> > 
> > Yes. The device drivers should be abstracted away, leaving
> > architectural logic, such as port 0x61 as direct call. We need
> > notifier chain, but I just suggest reduce its usage if possible.
> > 
> > > Honestly, I would be interested in creating a southbridge driver and
> > > moving the port 0x61 code there and keeping the default_do_nmi() function
> > > stupidly simple (just a call to the die_chain and the
> > > unknown_nmi_error()).
> > 
> > I think the southbridge drivers should go notifier block, but the port
> > 0x61 code is architectural and should be kept in default_do_nmi().
> 
> Is port 0x61 architectural?  I thought it a southbridge thing.  In fact I
> thought with modern chipsets you can access the same thing through port
> 0x70 or 0x71 (I can't seem to figure out which Intel doc I saw that in).
> (Not that this conversation has any bearing on your patchset, just an idea
> I had).

At least until now, I think port 0x61 can be considered architectural
(just like we think PIT is architectural before). Maybe in the future it
will become deprecated and turned into something like a device driver.
But why not wait until it be.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-30  4:04                 ` Don Zickus
@ 2010-09-30  5:21                   ` Huang Ying
  2010-09-30  8:24                     ` Andi Kleen
  2010-09-30  8:23                   ` Robert Richter
  1 sibling, 1 reply; 64+ messages in thread
From: Huang Ying @ 2010-09-30  5:21 UTC (permalink / raw)
  To: Don Zickus
  Cc: huang ying, Robert Richter, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Thu, 2010-09-30 at 12:04 +0800, Don Zickus wrote:
> On Wed, Sep 29, 2010 at 02:55:58PM +0800, huang ying wrote:
> > Hi, Don,
> > 
> > I think we all agree that to use order to determine the reason/source
> > of NMI. The difference is that I want to keep as many direct calls in
> > default_do_nmi() as possible, while you guys want to wrap almost all
> > code in default_do_nmi() into notifier handler and leave only one
> > notify_die() in defualt_do_nmi(). And I want to use different die_val
> > (and their calling order in default_do_nmi()) to determine the order
> > while you guys want to use priority (based on its value) to determine
> > the order.
> 
> Well, I just wanted to see if we can minimize the number of times we
> walked the die_chain.  Priorities was an interesting idea, I am not sure
> it works out.  Registering two handlers, seems clunky.  But I am open to
> the discussions.

die_chain is used for multiple purposes now.

Although I don't like the idea. I think it may be possible to use just
one walking of die_chain to determine the source/reason of NMI.

But we need several walking of die_chain after determining the
source/reason and before the default processing. Such as DIE_NMIWATCHDOG
before go panic.

> > On the other hand, I think we should call corresponding DIE_NMIxxx
> > before the default operations, such as for watchdog, call
> > DIE_NMIWATCHDOG before go panic, for unknown nmi, call DIE_NMIUNKNOWN
> > before the default processing (may panic).
> > 
> > I think it is important to distinguish between die chain used to
> > determine the source/reason of NMI and the die chain used to see if
> > any other driver wanted to do some processing before the default
> > operation.
> 
> I guess I still prefer to take your patch set with its change and then
> layer any new ideas on top.  I have a feeling this discussion could go on
> forever regarding how die_chains can work.

Yes. I will send out a new version soon.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-30  4:04                 ` Don Zickus
  2010-09-30  5:21                   ` Huang Ying
@ 2010-09-30  8:23                   ` Robert Richter
  1 sibling, 0 replies; 64+ messages in thread
From: Robert Richter @ 2010-09-30  8:23 UTC (permalink / raw)
  To: Don Zickus
  Cc: huang ying, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 30.09.10 00:04:32, Don Zickus wrote:
> I guess I still prefer to take your patch set with its change and then
> layer any new ideas on top.  I have a feeling this discussion could go on
> forever regarding how die_chains can work.

I agree here, the patches are the beginning to cleanup the code. But
we should avoid fundamental functional changes which could lead to
unexpected new behavior, esp. we should keep kernel interfaces like
parameters and printouts.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic
  2010-09-30  5:21                   ` Huang Ying
@ 2010-09-30  8:24                     ` Andi Kleen
  0 siblings, 0 replies; 64+ messages in thread
From: Andi Kleen @ 2010-09-30  8:24 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, huang ying, Robert Richter, Ingo Molnar,
	H. Peter Anvin, linux-kernel, Andi Kleen

> die_chain is used for multiple purposes now.

Really should probably split it up. Having a fully separate notifier
for NMIs is very appropiate.

-Andi

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-30  4:36                 ` Don Zickus
  2010-09-30  4:57                   ` Huang Ying
@ 2010-09-30  8:25                   ` Andi Kleen
  1 sibling, 0 replies; 64+ messages in thread
From: Andi Kleen @ 2010-09-30  8:25 UTC (permalink / raw)
  To: Don Zickus
  Cc: huang ying, Huang Ying, Robert Richter, Ingo Molnar,
	H. Peter Anvin, linux-kernel, Andi Kleen

> > I think the southbridge drivers should go notifier block, but the port
> > 0x61 code is architectural and should be kept in default_do_nmi().
> 
> Is port 0x61 architectural?  I thought it a southbridge thing.  In fact I

Technically not part of x86, It is part of the "PC" defacto standard.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-30  4:57                   ` Huang Ying
@ 2010-09-30  8:38                     ` Robert Richter
  2010-09-30  9:36                       ` huang ying
  2010-10-01 20:00                     ` Maciej W. Rozycki
  1 sibling, 1 reply; 64+ messages in thread
From: Robert Richter @ 2010-09-30  8:38 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, huang ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On 30.09.10 00:57:10, Huang Ying wrote:
> Yes. Both MCE and perf are CPU features. I think they can be thought as
> optional architectural features. I think it is good to put similar
> features into arch/x86/kernel/cpu instead of traps.c. But if necessary,
> we can put direct call in traps.c instead of notifier block.

As you see it seems not being obviously, what goes here and what goes
there. The approach is wrong. If we want to handle some hardware
feature, we should simply register a handler for this. Implemetations
for unhandled or unrecovered interrupts should be in traps.c. It's
that simple.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-30  8:38                     ` Robert Richter
@ 2010-09-30  9:36                       ` huang ying
  2010-09-30  9:51                         ` Andi Kleen
  0 siblings, 1 reply; 64+ messages in thread
From: huang ying @ 2010-09-30  9:36 UTC (permalink / raw)
  To: Robert Richter
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel, Andi Kleen

On Thu, Sep 30, 2010 at 4:38 PM, Robert Richter <robert.richter@amd.com> wrote:
> On 30.09.10 00:57:10, Huang Ying wrote:
>> Yes. Both MCE and perf are CPU features. I think they can be thought as
>> optional architectural features. I think it is good to put similar
>> features into arch/x86/kernel/cpu instead of traps.c. But if necessary,
>> we can put direct call in traps.c instead of notifier block.
>
> As you see it seems not being obviously, what goes here and what goes
> there. The approach is wrong. If we want to handle some hardware
> feature, we should simply register a handler for this. Implemetations
> for unhandled or unrecovered interrupts should be in traps.c. It's
> that simple.

That is possible. I just don't like it. We should keep things as
"straightforward" as possible. Direct call is more straightfoward than
indirect call like notifier chain.

Best Regards,
Huang Ying

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-30  9:36                       ` huang ying
@ 2010-09-30  9:51                         ` Andi Kleen
  0 siblings, 0 replies; 64+ messages in thread
From: Andi Kleen @ 2010-09-30  9:51 UTC (permalink / raw)
  To: huang ying
  Cc: Robert Richter, Huang Ying, Don Zickus, Ingo Molnar,
	H. Peter Anvin, linux-kernel, Andi Kleen

> That is possible. I just don't like it. We should keep things as
> "straightforward" as possible. Direct call is more straightfoward than
> indirect call like notifier chain.

Fully agreed.  Indirect calls should be only used when absolutely
needed due to their impact on code maintainability.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI
  2010-09-30  4:57                   ` Huang Ying
  2010-09-30  8:38                     ` Robert Richter
@ 2010-10-01 20:00                     ` Maciej W. Rozycki
  1 sibling, 0 replies; 64+ messages in thread
From: Maciej W. Rozycki @ 2010-10-01 20:00 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, huang ying, Robert Richter, Ingo Molnar,
	H. Peter Anvin, linux-kernel, Andi Kleen

On Thu, 30 Sep 2010, Huang Ying wrote:

> > Is port 0x61 architectural?  I thought it a southbridge thing.  In fact I
> > thought with modern chipsets you can access the same thing through port
> > 0x70 or 0x71 (I can't seem to figure out which Intel doc I saw that in).
> > (Not that this conversation has any bearing on your patchset, just an idea
> > I had).
> 
> At least until now, I think port 0x61 can be considered architectural
> (just like we think PIT is architectural before). Maybe in the future it
> will become deprecated and turned into something like a device driver.
> But why not wait until it be.

 This port-mapped I/O register has been first defined by IBM back in 1981 
with their original PC and its 8255 PPI whose port A (at 0x60) served as a 
keyboard interface chip and the remaining ports B (0x61) and C (0x62) were 
used as GPIO (there was also a control register at 0x63, whose purpose was 
to configure the ports; the PPI was a general-purpose chip).  The 
arrangement remained the same with the PC/XT (with the tape recorder 
interface removed ;) ).

 That functionality was then slightly modified and carried over to their 
PC/AT where the functionality previously provided by 8255's port A and 
some GPIO was replaced with an 8042 microcontroller and the remaining GPIO 
was implemented with some discrete glue logic.  With time, as technology 
advanced, that logic was carried over to integrated ASICs like the Intel 
82357 Integrated System Peripheral (ISP) and eventually, with the advent 
of PCI, to the south bridge, alongside all the legacy PC/AT motherboard 
junk I/O (though a discrete 8042 has remained common; these days hanging 
off the LPC bus).

 I am fairly sure you can consider the NMI status register at 0x61 a 
standard cast in stone and I don't think you'll ever find a workstation 
x86 system without one (it also controls the binary speaker, BTW), though 
obviously its presence may vary across embedded x86 systems.  Things may 
of course eventually change if some company decides (and Intel would 
certainly seem capable enough) to get rid of some legacy junk.  I would 
start with getting rid of the 8259A pair first though.

  Maciej

^ permalink raw reply	[flat|nested] 64+ messages in thread

end of thread, other threads:[~2010-10-01 20:00 UTC | newest]

Thread overview: 64+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-27  0:57 [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Huang Ying
2010-09-27  0:57 ` [PATCH -v2 2/7] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
2010-09-27  0:57 ` [PATCH -v2 3/7] x86, NMI, Rename memory parity error to PCI SERR error Huang Ying
2010-09-27  8:01   ` Robert Richter
2010-09-27  8:39     ` Huang Ying
2010-09-27  9:00       ` Robert Richter
2010-09-27 15:33         ` Don Zickus
2010-09-27 16:45           ` Robert Richter
2010-09-27 17:50             ` Don Zickus
2010-09-28  1:33             ` Huang Ying
2010-09-28 14:29               ` Robert Richter
2010-09-29  7:56                 ` huang ying
2010-09-28 15:38               ` Don Zickus
2010-09-28  1:22           ` Huang Ying
2010-09-27  0:57 ` [PATCH -v2 4/7] x86, NMI, Rewrite NMI handler Huang Ying
2010-09-27  9:41   ` Robert Richter
2010-09-27 12:39     ` huang ying
2010-09-27 13:25       ` Robert Richter
2010-09-27 15:29         ` Don Zickus
2010-09-27 17:40           ` Robert Richter
2010-09-27 19:14             ` Don Zickus
2010-09-27 22:35               ` Robert Richter
2010-09-28  1:03         ` Huang Ying
2010-09-28 14:59           ` Robert Richter
2010-09-29  7:54             ` huang ying
2010-09-27  0:57 ` [PATCH -v2 5/7] Make NMI reason io port (0x61) can be processed on any CPU Huang Ying
2010-09-27  0:57 ` [PATCH -v2 6/7] x86, NMI, Add support to notify hardware error with unknown NMI Huang Ying
2010-09-27 10:09   ` Robert Richter
2010-09-27 12:47     ` huang ying
2010-09-27 13:38       ` Robert Richter
2010-09-27 15:20         ` Don Zickus
2010-09-28  0:36           ` Huang Ying
2010-09-28 15:32             ` Don Zickus
2010-09-29  8:17               ` huang ying
2010-09-30  4:36                 ` Don Zickus
2010-09-30  4:57                   ` Huang Ying
2010-09-30  8:38                     ` Robert Richter
2010-09-30  9:36                       ` huang ying
2010-09-30  9:51                         ` Andi Kleen
2010-10-01 20:00                     ` Maciej W. Rozycki
2010-09-30  8:25                   ` Andi Kleen
2010-09-28  1:19         ` Huang Ying
2010-09-28 15:27           ` Robert Richter
2010-09-29  8:07             ` huang ying
2010-09-27 15:38   ` Don Zickus
2010-09-28  1:54     ` Huang Ying
2010-09-27  0:57 ` [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic Huang Ying
2010-09-27 10:44   ` Robert Richter
2010-09-27 12:56     ` huang ying
2010-09-27 13:43       ` Robert Richter
2010-09-27 15:16         ` Don Zickus
2010-09-27 16:58           ` Robert Richter
2010-09-28  1:41             ` Huang Ying
2010-09-28 15:16               ` Robert Richter
2010-09-28 15:21               ` Don Zickus
2010-09-28  0:28           ` Huang Ying
2010-09-28 15:19             ` Don Zickus
2010-09-29  6:55               ` huang ying
2010-09-30  4:04                 ` Don Zickus
2010-09-30  5:21                   ` Huang Ying
2010-09-30  8:24                     ` Andi Kleen
2010-09-30  8:23                   ` Robert Richter
2010-09-27 10:50 ` [PATCH -v2 1/7] x86, NMI, Add symbol definition for NMI magic constants Robert Richter
2010-09-27 15:29   ` Don Zickus

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).