All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code
@ 2009-07-11  7:44 Andi Kleen
  2009-07-11  7:44 ` [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h Andi Kleen
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Andi Kleen @ 2009-07-11  7:44 UTC (permalink / raw)
  To: x86, linux-kernel


Some more cleanups, following up the previous mce cleanup series.
This doesn't change any code behaviour, just tidies up the code
a bit.

This applies on top of the previous cleanup series.

-Andi


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h
  2009-07-11  7:44 [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code Andi Kleen
@ 2009-07-11  7:44 ` Andi Kleen
  2009-07-11  7:44 ` [PATCH] [2/3] x86: mce: Improve comments in CMCI code Andi Kleen
  2009-07-11  7:44 ` [PATCH] [3/3] x86: mce: Improve comments in mce.c Andi Kleen
  2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2009-07-11  7:44 UTC (permalink / raw)
  To: x86, linux-kernel


Move MCE subsystem internal prototypes and externs into mce-internal.h
This way they don't pollute the global include name space (but
are still global on the linker level).

I didn't move all, especially not prototypes that are logically 
not internal (like thermal setup)

No code behaviour changes.

Signed-off-by: Andi Kleen <ak@linux.intel.com>

---
 arch/x86/include/asm/mce.h                |   48 +++++-------------------------
 arch/x86/kernel/cpu/mcheck/mce-inject.c   |    2 +
 arch/x86/kernel/cpu/mcheck/mce-internal.h |   42 ++++++++++++++++++++++++++
 arch/x86/kernel/cpu/mcheck/mce_amd.c      |    2 +
 arch/x86/kernel/cpu/mcheck/mce_intel.c    |    1 
 5 files changed, 56 insertions(+), 39 deletions(-)

Index: linux/arch/x86/include/asm/mce.h
===================================================================
--- linux.orig/arch/x86/include/asm/mce.h
+++ linux/arch/x86/include/asm/mce.h
@@ -106,9 +106,6 @@ struct mce_log {
 #include <linux/init.h>
 #include <asm/atomic.h>
 
-extern int mce_disabled;
-extern int mce_p5_enabled;
-
 #ifdef CONFIG_X86_MCE
 void mcheck_init(struct cpuinfo_x86 *c);
 #else
@@ -127,7 +124,6 @@ static inline void enable_p5_mce(void) {
 
 void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct sys_device, mce_dev);
 
 /*
  * Maximum banks number.
@@ -136,28 +132,6 @@ DECLARE_PER_CPU(struct sys_device, mce_d
  */
 #define MAX_NR_BANKS 32
 
-#ifdef CONFIG_X86_MCE_INTEL
-extern int mce_cmci_disabled;
-extern int mce_ignore_ce;
-void mce_intel_feature_init(struct cpuinfo_x86 *c);
-void cmci_clear(void);
-void cmci_reenable(void);
-void cmci_rediscover(int dying);
-void cmci_recheck(void);
-#else
-static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
-static inline void cmci_clear(void) {}
-static inline void cmci_reenable(void) {}
-static inline void cmci_rediscover(int dying) {}
-static inline void cmci_recheck(void) {}
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-void mce_amd_feature_init(struct cpuinfo_x86 *c);
-#else
-static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
-#endif
-
 int mce_available(struct cpuinfo_x86 *c);
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
@@ -165,22 +139,9 @@ DECLARE_PER_CPU(unsigned, mce_poll_count
 
 extern atomic_t mce_entry;
 
-typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
-DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
-
-enum mcp_flags {
-	MCP_TIMESTAMP = (1 << 0),	/* log time stamp */
-	MCP_UC = (1 << 1),		/* log uncorrected errors */
-	MCP_DONTLOG = (1 << 2),		/* only clear, don't log */
-};
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
-
 int mce_notify_irq(void);
 void mce_notify_process(void);
 
-DECLARE_PER_CPU(struct mce, injectm);
-extern struct file_operations mce_chrdev_ops;
-
 /*
  * Exception handler
  */
@@ -204,5 +165,14 @@ void intel_init_thermal(struct cpuinfo_x
 
 void mce_log_therm_throt_event(__u64 status);
 
+/*
+ * Intel CMCI
+ */
+#ifdef CONFIG_X86_MCE_INTEL
+void cmci_recheck(void);
+#else
+static inline void cmci_recheck(void) {}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_MCE_H */
Index: linux/arch/x86/kernel/cpu/mcheck/mce-inject.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -20,6 +20,8 @@
 #include <linux/smp.h>
 #include <asm/mce.h>
 
+#include "mce-internal.h"
+
 /* Update fake mce registers on current CPU. */
 static void inject_mce(struct mce *m)
 {
Index: linux/arch/x86/kernel/cpu/mcheck/mce-internal.h
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ linux/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,4 +1,5 @@
 #include <linux/sysdev.h>
+#include <linux/percpu.h>
 #include <asm/mce.h>
 
 enum severity_level {
@@ -24,6 +25,47 @@ struct mce_bank {
 int mce_severity(struct mce *a, int tolerant, char **msg);
 
 extern int mce_ser;
+extern int mce_ignore_ce;
 
 extern struct mce_bank *mce_banks;
 
+extern int mce_disabled;
+extern int mce_p5_enabled;
+
+DECLARE_PER_CPU(struct sys_device, mce_dev);
+
+/*
+ * MCE corrected error support
+ */
+
+typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
+DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
+
+enum mcp_flags {
+	MCP_TIMESTAMP = (1 << 0),	/* log time stamp */
+	MCP_UC = (1 << 1),		/* log uncorrected errors */
+	MCP_DONTLOG = (1 << 2),		/* only clear, don't log */
+};
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+
+#ifdef CONFIG_X86_MCE_INTEL
+extern int mce_cmci_disabled;
+void mce_intel_feature_init(struct cpuinfo_x86 *c);
+void cmci_clear(void);
+void cmci_reenable(void);
+void cmci_rediscover(int dying);
+#else
+static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
+static inline void cmci_clear(void) {}
+static inline void cmci_reenable(void) {}
+static inline void cmci_rediscover(int dying) {}
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+void mce_amd_feature_init(struct cpuinfo_x86 *c);
+#else
+static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
+#endif
+
+DECLARE_PER_CPU(struct mce, injectm);
+extern struct file_operations mce_chrdev_ops;
Index: linux/arch/x86/kernel/cpu/mcheck/mce_amd.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -30,6 +30,8 @@
 #include <asm/mce.h>
 #include <asm/msr.h>
 
+#include "mce-internal.h"
+
 #define PFX               "mce_threshold: "
 #define VERSION           "version 1.1.1"
 #define NR_BANKS          6
Index: linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -12,6 +12,7 @@
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/mce.h>
+#include "mce-internal.h"
 
 /*
  * Support for Intel Correct Machine Check Interrupts. This allows

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] [2/3] x86: mce: Improve comments in CMCI code
  2009-07-11  7:44 [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code Andi Kleen
  2009-07-11  7:44 ` [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h Andi Kleen
@ 2009-07-11  7:44 ` Andi Kleen
  2009-07-11  7:44 ` [PATCH] [3/3] x86: mce: Improve comments in mce.c Andi Kleen
  2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2009-07-11  7:44 UTC (permalink / raw)
  To: x86, linux-kernel


Improve the comments in the CMCI code in mce_intel.c. This documents
some of the design decisions and adds references to the appropiate
manuals. 

No code changes.

Signed-off-by: Andi Kleen <ak@linux.intel.com>

---
 arch/x86/kernel/cpu/mcheck/mce_intel.c |   14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

Index: linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -19,8 +19,16 @@
  * the CPU to raise an interrupt when a corrected machine check happened.
  * Normally we pick those up using a regular polling timer.
  * Also supports reliable discovery of shared banks.
+ *
+ * For reference see the Intel 64 Software Developer's Manual, Volume 3a,
+ * 15.5.2. This code is a relatively faithful implementation of the
+ * recommendations there.
  */
 
+/*
+ * Ownership of MCE banks per CPU. To avoid duplicated events
+ * for shared banks we assign ownership to specific CPUs.
+ */
 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
 
 /*
@@ -29,6 +37,10 @@ static DEFINE_PER_CPU(mce_banks_t, mce_b
  */
 static DEFINE_SPINLOCK(cmci_discover_lock);
 
+/*
+ * CMCI threshold in hardware has some drawbacks. We chose to log every event
+ * and hardcode 1
+ */
 #define CMCI_THRESHOLD 1
 
 static int cmci_supported(int *banks)
@@ -163,7 +175,7 @@ void cmci_clear(void)
 
 /*
  * After a CPU went down cycle through all the others and rediscover
- * Must run in process context.
+ * bank ownership.  Must run in process context.
  */
 void cmci_rediscover(int dying)
 {

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] [3/3] x86: mce: Improve comments in mce.c
  2009-07-11  7:44 [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code Andi Kleen
  2009-07-11  7:44 ` [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h Andi Kleen
  2009-07-11  7:44 ` [PATCH] [2/3] x86: mce: Improve comments in CMCI code Andi Kleen
@ 2009-07-11  7:44 ` Andi Kleen
  2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2009-07-11  7:44 UTC (permalink / raw)
  To: x86, linux-kernel


- Add references to documentation
- Add a top level comment giving a quick overview.
- Improve a few other comments.

No code changes

Signed-off-by: Andi Kleen <ak@linux.intel.com>

---
 arch/x86/kernel/cpu/mcheck/mce.c |   49 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 3 deletions(-)

Index: linux/arch/x86/kernel/cpu/mcheck/mce.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1,11 +1,31 @@
 /*
- * Machine check handler.
+ * Machine check handler. This handles hardware errors detected by
+ * the CPU.
  *
  * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
  * Rest from unknown author(s).
  * 2004 Andi Kleen. Rewrote most of it.
  * Copyright 2008 Intel Corporation
  * Author: Andi Kleen
+ *
+ * This code handles both corrected (by hardware) errors and
+ * uncorrected errors. The corrected errors are only logged and
+ * handled by machine_check_poll() et.al. The entry point for
+ * uncorrected errors is do_machine_check() which handles the machine
+ * check exception (int 18) raised by the CPU. Uncorrected errors can
+ * either panic or in some special cases be recovered. The logging of
+ * machine check events is done through a special /dev/mcelog
+ * device. Then there is a lot of support code for setting up machine
+ * checks and configuring them.
+ *
+ * References:
+ * Intel 64 Software developer's manual (SDM)
+ * System Programming Guide Volume 3a
+ * Chapter 15 "Machine-check architecture"
+ * You should read that before changing anything.
+ *
+ * Old, outdated paper, but gives a reasonable overview
+ * http://halobates.de/mce.pdf
  */
 #include <linux/thread_info.h>
 #include <linux/capability.h>
@@ -164,6 +184,11 @@ void mce_log(struct mce *mce)
 	set_bit(0, &mce_need_notify);
 }
 
+/*
+ * Panic handling. Print machine checks to the console in case of a
+ * unrecoverable error.
+ */
+
 static void print_mce(struct mce *m)
 {
 	printk(KERN_EMERG
@@ -260,7 +285,9 @@ static void mce_panic(char *msg, struct
 	panic(msg);
 }
 
-/* Support code for software error injection */
+/*
+ * Support code for software error injection
+ */
 
 static int msr_to_offset(u32 msr)
 {
@@ -409,6 +436,11 @@ asmlinkage void smp_mce_self_interrupt(s
 }
 #endif
 
+/*
+ * Schedule further processing of a machine check event after
+ * the exception handler ran. Has to be careful about context because
+ * MCEs run lockless independent from any normal kernel locks.
+ */
 static void mce_report_event(struct pt_regs *regs)
 {
 	if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
@@ -454,6 +486,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count)
  * Poll for corrected events or events that happened before reset.
  * Those are just logged through /dev/mcelog.
  *
+ * Either called regularly from a timer, or by special corrected
+ * error interrupts.
+ *
  * This is executed in standard interrupt context.
  *
  * Note: spec recommends to panic for fatal unsignalled
@@ -547,6 +582,10 @@ static int mce_no_way_out(struct mce *m,
 }
 
 /*
+ * Support for synchronizing machine checks over all CPUs.
+ */
+
+/*
  * Variable to establish order between CPUs while scanning.
  * Each CPU spins initially until executing is equal its number.
  */
@@ -1221,7 +1260,11 @@ static void mce_init(void)
 	}
 }
 
-/* Add per CPU specific workarounds here */
+/*
+ * This function contains workarounds for various machine check
+ * related CPU quirks. Primarly it disables broken machine check
+ * events.
+ */
 static void mce_cpu_quirks(struct cpuinfo_x86 *c)
 {
 	/* This should be disabled by the BIOS, but isn't always */

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-07-11  7:45 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-07-11  7:44 [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code Andi Kleen
2009-07-11  7:44 ` [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h Andi Kleen
2009-07-11  7:44 ` [PATCH] [2/3] x86: mce: Improve comments in CMCI code Andi Kleen
2009-07-11  7:44 ` [PATCH] [3/3] x86: mce: Improve comments in mce.c Andi Kleen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.