All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/2] x86: mce: kexec: switch MCE handler for kexec/kdump
@ 2015-03-03  9:01 Naoya Horiguchi
  2015-03-03  9:01 ` [PATCH v3 2/2] x86: mce: comment about MCE synchronization timeout on definition of tolerant Naoya Horiguchi
                   ` (2 more replies)
  0 siblings, 3 replies; 44+ messages in thread
From: Naoya Horiguchi @ 2015-03-03  9:01 UTC (permalink / raw)
  To: Tony Luck, Borislav Petkov
  Cc: Prarit Bhargava, Vivek Goyal, linux-kernel, Junichi Nomura, Kiyoshi Ueda

kexec disables (or "shoots down") all CPUs other than a crashing CPU before
entering the 2nd kernel. But the MCE handler is still enabled after that,
so if MCE happens and broadcasts over the CPUs after the main thread starts
the 2nd kernel (which might not initialize MCE device yet, or might decide
not to enable it,) MCE handler runs only on the other CPUs (not on the main
thread,) leading to kernel panic with MCE synchronization. The user-visible
effect of this bug is kdump failure.

Our standard MCE handler do_machine_check() assumes some about system's
status and it's hard to alter it to cover kexec/kdump context, so let's add
another kdump-specific one and switch to it.

Note that this problem exists since current MCE handler was implemented in
2.6.32, and recently commit 716079f66eac ("mce: Panic when a core has reached
a timeout") made it more visible by changing the default behavior of the
synchronization timeout from "ignore" to "panic".

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: <stable@vger.kernel.org>        [2.6.32+]
---
ChangeLog v2 -> v3
- go to "switch MCE handler" approach

ChangeLog v1 -> v2
- clear MSR_IA32_MCG_CTL, MSR_IA32_MCx_CTL, and CR4.MCE instead of using
  global flag to ignore MCE events.
- fixed the description of the problem
---
 arch/x86/include/asm/mce.h       |  6 +++++
 arch/x86/kernel/cpu/mcheck/mce.c | 47 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/crash.c          |  3 +++
 3 files changed, 56 insertions(+)

diff --git v3.19.orig/arch/x86/include/asm/mce.h v3.19/arch/x86/include/asm/mce.h
index 51b26e895933..8010d4b77183 100644
--- v3.19.orig/arch/x86/include/asm/mce.h
+++ v3.19/arch/x86/include/asm/mce.h
@@ -114,6 +114,9 @@ struct mca_config {
 	int monarch_timeout;
 	int panic_timeout;
 	u32 rip_msr;
+#ifdef CONFIG_KEXEC
+	int kdump_cpu;
+#endif
 };
 
 extern struct mca_config mca_cfg;
@@ -175,6 +178,9 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
 #endif
 
 int mce_available(struct cpuinfo_x86 *c);
+#ifdef CONFIG_KEXEC
+void switch_mce_handler_for_kdump(void);
+#endif
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
 DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git v3.19.orig/arch/x86/kernel/cpu/mcheck/mce.c v3.19/arch/x86/kernel/cpu/mcheck/mce.c
index 3112b79ace8e..6e7730a72b79 100644
--- v3.19.orig/arch/x86/kernel/cpu/mcheck/mce.c
+++ v3.19/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1219,6 +1219,36 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
 
+#ifdef CONFIG_KEXEC
+/*
+ * kdump-specific machine check handler
+ *
+ * When kexec/kdump is running, what the MCE handler is expected to do
+ * changes depending on whether the CPU is running the main thread or not.
+ *
+ * The crashing CPU, controlling the whole system exclusively, should try to
+ * get kdump as hard as possible even if an MCE happens concurrently, because
+ * some types of MCEs (for example, uncorrected errors like SRAO and SRAR,)
+ * are not fatal or don't ruin reliablility of the kdump (consider that an
+ * MCE can hit the other CPU, in which case corrupted data is never consumed.)
+ * If an MCE critically breaks the kdump operation, we are unlucky so let's
+ * accept the fate of whatever HW causes, hoping a dying message reaches admins.
+ *
+ * The other CPUs are supposed to be quiet during kexec/kdump, so after the
+ * crashing CPU shot them down, they should not do anything except clearing
+ * MCG_STATUS (without this the system is reset, which is undesirable.)
+ * Note that this is also true after the crashing CPU enter the 2nd kernel.
+ */
+static void machine_check_under_kdump(struct pt_regs *regs, long error_code)
+{
+	if (mca_cfg.kdump_cpu == smp_processor_id())
+		pr_emerg("MCE triggered when kdumping. If you are lucky enough, you will have a kdump. Otherwise, this is a dying message.\n");
+
+	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+	sync_core();
+}
+#endif
+
 #ifndef CONFIG_MEMORY_FAILURE
 int memory_failure(unsigned long pfn, int vector, int flags)
 {
@@ -2104,6 +2134,23 @@ static void mce_syscore_shutdown(void)
 	mce_disable_error_reporting();
 }
 
+#ifdef CONFIG_KEXEC
+/*
+ * Called in kdump entering code to switch the MCE handler to a primitive and
+ * kdump-specific one.
+ *
+ * In kexec/kdump context, getting kdump is prior to handling MCEs because
+ * what the users are really interested in is to find what caused the crashing,
+ * not what caused the crashing to fail. So the kdump-specific MCE handler does
+ * very little things not to disrupt kdumping.
+ */
+void switch_mce_handler_for_kdump(void)
+{
+	mca_cfg.kdump_cpu = smp_processor_id();
+	machine_check_vector = machine_check_under_kdump;
+}
+#endif
+
 /*
  * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
  * Only one CPU is active at this time, the others get re-added later using
diff --git v3.19.orig/arch/x86/kernel/crash.c v3.19/arch/x86/kernel/crash.c
index 6f3baedcb6f6..273805e772f6 100644
--- v3.19.orig/arch/x86/kernel/crash.c
+++ v3.19/arch/x86/kernel/crash.c
@@ -34,6 +34,7 @@
 #include <asm/cpu.h>
 #include <asm/reboot.h>
 #include <asm/virtext.h>
+#include <asm/mce.h>
 
 /* Alignment required for elf header segment */
 #define ELF_CORE_HEADER_ALIGN   4096
@@ -166,6 +167,8 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	/* The kernel is broken so disable interrupts */
 	local_irq_disable();
 
+	switch_mce_handler_for_kdump();
+
 	kdump_nmi_shootdown_cpus();
 
 	/*
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2015-04-28  8:42 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-03  9:01 [PATCH v3 1/2] x86: mce: kexec: switch MCE handler for kexec/kdump Naoya Horiguchi
2015-03-03  9:01 ` [PATCH v3 2/2] x86: mce: comment about MCE synchronization timeout on definition of tolerant Naoya Horiguchi
2015-03-03 18:09 ` [PATCH v3 1/2] x86: mce: kexec: switch MCE handler for kexec/kdump Luck, Tony
2015-03-04  7:41   ` [PATCH v4] " Naoya Horiguchi
2015-03-04 23:12     ` Luck, Tony
2015-03-05  1:24       ` Naoya Horiguchi
2015-03-05  6:45         ` [PATCH v5] " Naoya Horiguchi
2015-03-05  8:57           ` Borislav Petkov
2015-03-05  9:37             ` Naoya Horiguchi
2015-03-06  2:59               ` [PATCH v6] " Naoya Horiguchi
2015-03-06  8:34                 ` Borislav Petkov
2015-03-06  9:09                   ` Naoya Horiguchi
2015-03-06  9:27                     ` Borislav Petkov
2015-03-06  9:32                       ` Naoya Horiguchi
2015-03-06 10:22                         ` [PATCH v7] " Naoya Horiguchi
2015-04-06  7:18                           ` Naoya Horiguchi
2015-04-06 11:59                             ` Borislav Petkov
2015-04-07  8:00                               ` Naoya Horiguchi
2015-04-07  8:02                                 ` [PATCH v8] " Naoya Horiguchi
2015-04-09  6:13                                   ` Borislav Petkov
2015-04-09  6:57                                     ` Naoya Horiguchi
2015-04-09  7:02                                       ` Borislav Petkov
2015-04-09 18:07                                         ` Luck, Tony
2015-04-09  8:00                                     ` Ingo Molnar
2015-04-09  8:21                                       ` Borislav Petkov
2015-04-09  8:59                                         ` Naoya Horiguchi
2015-04-09  9:53                                           ` Borislav Petkov
2015-04-09 18:22                                             ` Luck, Tony
2015-04-09 19:05                                               ` Borislav Petkov
2015-04-10  0:49                                                 ` Naoya Horiguchi
2015-04-10  4:07                                                   ` Naoya Horiguchi
2015-04-10  7:24                                                     ` Borislav Petkov
2015-04-28  8:41                                                   ` Baoquan He
2015-04-09  8:39                                       ` Naoya Horiguchi
2015-04-09  9:13                                         ` Ingo Molnar
2015-04-06 11:56                           ` [PATCH v7] " Borislav Petkov
2015-04-07  7:59                             ` Naoya Horiguchi
2015-03-06  8:28               ` [PATCH v5] " Borislav Petkov
2015-03-06  5:44         ` [PATCH v4] " Naoya Horiguchi
2015-03-05  8:48       ` Borislav Petkov
2015-03-03 18:53 ` [PATCH v3 1/2] " Borislav Petkov
2015-03-04  7:51   ` Naoya Horiguchi
2015-03-04  9:12     ` Borislav Petkov
2015-03-05  1:27       ` Naoya Horiguchi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.