linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/8] x86/RAS: Some accumulated stuff
@ 2018-02-21 10:18 Borislav Petkov
  2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
                   ` (7 more replies)
  0 siblings, 8 replies; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
  To: X86 ML; +Cc: Tony Luck, LKML

From: Borislav Petkov <bp@suse.de>

Hi,

first 3 are cleanups, 4th makes MCA code collect error info without
looking at the Valid bits because there might be cases where they're not
set.

The last 4 fix the IPI-with-IRQs-off issue which was reported recently.

Thx.

Borislav Petkov (4):
  x86/MCE: Put private structures and definitions into the internal
    header
  x86/MCE: Convert mca_config bools to a bitfield
  x86/mce: Issue the mcelog --ascii message on !AMD
  x86/MCE/AMD: Collect error info even if valid bits are not set

Yazen Ghannam (4):
  x86/MCE/AMD: Pass the bank number to smca_get_bank_type()
  x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type
  x86/MCE/AMD: Get address from already initialized block
  x86/MCE/AMD: Carve out SMCA get_block_address() code

 arch/x86/include/asm/mce.h                | 53 +--------------------
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 55 ++++++++++++++++++++++
 arch/x86/kernel/cpu/mcheck/mce.c          | 34 ++++++++++----
 arch/x86/kernel/cpu/mcheck/mce_amd.c      | 78 ++++++++++++++++++++-----------
 drivers/edac/mce_amd.c                    | 11 +++--
 5 files changed, 140 insertions(+), 91 deletions(-)

-- 
2.13.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header
  2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
  2018-02-21 11:39   ` Ingo Molnar
  2018-02-21 17:58   ` [tip:ras/core] x86/mce: " tip-bot for Borislav Petkov
  2018-02-21 10:18 ` [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield Borislav Petkov
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
  To: X86 ML; +Cc: Tony Luck, LKML

From: Borislav Petkov <bp@suse.de>

... because they don't need to be exported outside of MCE.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/mce.h                | 52 -------------------------------
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 52 +++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 96ea4b5ba658..c3fb9a792e13 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -138,58 +138,6 @@ struct mce_log_buffer {
 	struct mce entry[MCE_LOG_LEN];
 };
 
-struct mca_config {
-	bool dont_log_ce;
-	bool cmci_disabled;
-	bool lmce_disabled;
-	bool ignore_ce;
-	bool disabled;
-	bool ser;
-	bool recovery;
-	bool bios_cmci_threshold;
-	u8 banks;
-	s8 bootlog;
-	int tolerant;
-	int monarch_timeout;
-	int panic_timeout;
-	u32 rip_msr;
-};
-
-struct mce_vendor_flags {
-	/*
-	 * Indicates that overflow conditions are not fatal, when set.
-	 */
-	__u64 overflow_recov	: 1,
-
-	/*
-	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
-	 * Recovery. It indicates support for data poisoning in HW and deferred
-	 * error interrupts.
-	 */
-	      succor		: 1,
-
-	/*
-	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
-	 * the register space for each MCA bank and also increases number of
-	 * banks. Also, to accommodate the new banks and registers, the MCA
-	 * register space is moved to a new MSR range.
-	 */
-	      smca		: 1,
-
-	      __reserved_0	: 61;
-};
-
-struct mca_msr_regs {
-	u32 (*ctl)	(int bank);
-	u32 (*status)	(int bank);
-	u32 (*addr)	(int bank);
-	u32 (*misc)	(int bank);
-};
-
-extern struct mce_vendor_flags mce_flags;
-
-extern struct mca_msr_regs msr_ops;
-
 enum mce_notifier_prios {
 	MCE_PRIO_FIRST		= INT_MAX,
 	MCE_PRIO_SRAO		= INT_MAX - 1,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index aa0d5df9dc60..986c8dd2d320 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -113,6 +113,58 @@ static inline void mce_register_injector_chain(struct notifier_block *nb)	{ }
 static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 #endif
 
+struct mca_config {
+	bool dont_log_ce;
+	bool cmci_disabled;
+	bool lmce_disabled;
+	bool ignore_ce;
+	bool disabled;
+	bool ser;
+	bool recovery;
+	bool bios_cmci_threshold;
+	u8 banks;
+	s8 bootlog;
+	int tolerant;
+	int monarch_timeout;
+	int panic_timeout;
+	u32 rip_msr;
+};
+
 extern struct mca_config mca_cfg;
 
+struct mce_vendor_flags {
+	/*
+	 * Indicates that overflow conditions are not fatal, when set.
+	 */
+	__u64 overflow_recov	: 1,
+
+	/*
+	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+	 * Recovery. It indicates support for data poisoning in HW and deferred
+	 * error interrupts.
+	 */
+	      succor		: 1,
+
+	/*
+	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+	 * the register space for each MCA bank and also increases number of
+	 * banks. Also, to accommodate the new banks and registers, the MCA
+	 * register space is moved to a new MSR range.
+	 */
+	      smca		: 1,
+
+	      __reserved_0	: 61;
+};
+
+extern struct mce_vendor_flags mce_flags;
+
+struct mca_msr_regs {
+	u32 (*ctl)	(int bank);
+	u32 (*status)	(int bank);
+	u32 (*addr)	(int bank);
+	u32 (*misc)	(int bank);
+};
+
+extern struct mca_msr_regs msr_ops;
+
 #endif /* __X86_MCE_INTERNAL_H__ */
-- 
2.13.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield
  2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
  2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
  2018-02-21 17:58   ` [tip:ras/core] x86/mce: Convert 'struct mca_config' " tip-bot for Borislav Petkov
  2018-02-21 10:18 ` [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD Borislav Petkov
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
  To: X86 ML; +Cc: Tony Luck, LKML

From: Borislav Petkov <bp@suse.de>

... to save space when future flags are added.

No functionality change.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 13 ++++++++-----
 arch/x86/kernel/cpu/mcheck/mce.c          | 16 ++++++++--------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 986c8dd2d320..ceb67cd5918f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -116,12 +116,15 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 struct mca_config {
 	bool dont_log_ce;
 	bool cmci_disabled;
-	bool lmce_disabled;
 	bool ignore_ce;
-	bool disabled;
-	bool ser;
-	bool recovery;
-	bool bios_cmci_threshold;
+
+	__u64 lmce_disabled		: 1,
+	      disabled			: 1,
+	      ser			: 1,
+	      recovery			: 1,
+	      bios_cmci_threshold	: 1,
+	      __reserved		: 59;
+
 	u8 banks;
 	s8 bootlog;
 	int tolerant;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 75f405ac085c..9c8cb58c77f8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1506,7 +1506,7 @@ static int __mcheck_cpu_cap_init(void)
 		mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
 
 	if (cap & MCG_SER_P)
-		mca_cfg.ser = true;
+		mca_cfg.ser = 1;
 
 	return 0;
 }
@@ -1814,12 +1814,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
 		return;
 
 	if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
-		mca_cfg.disabled = true;
+		mca_cfg.disabled = 1;
 		return;
 	}
 
 	if (mce_gen_pool_init()) {
-		mca_cfg.disabled = true;
+		mca_cfg.disabled = 1;
 		pr_emerg("Couldn't allocate MCE records pool!\n");
 		return;
 	}
@@ -1897,11 +1897,11 @@ static int __init mcheck_enable(char *str)
 	if (*str == '=')
 		str++;
 	if (!strcmp(str, "off"))
-		cfg->disabled = true;
+		cfg->disabled = 1;
 	else if (!strcmp(str, "no_cmci"))
 		cfg->cmci_disabled = true;
 	else if (!strcmp(str, "no_lmce"))
-		cfg->lmce_disabled = true;
+		cfg->lmce_disabled = 1;
 	else if (!strcmp(str, "dont_log_ce"))
 		cfg->dont_log_ce = true;
 	else if (!strcmp(str, "ignore_ce"))
@@ -1909,9 +1909,9 @@ static int __init mcheck_enable(char *str)
 	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
 		cfg->bootlog = (str[0] == 'b');
 	else if (!strcmp(str, "bios_cmci_threshold"))
-		cfg->bios_cmci_threshold = true;
+		cfg->bios_cmci_threshold = 1;
 	else if (!strcmp(str, "recovery"))
-		cfg->recovery = true;
+		cfg->recovery = 1;
 	else if (isdigit(str[0])) {
 		if (get_option(&str, &cfg->tolerant) == 2)
 			get_option(&str, &(cfg->monarch_timeout));
@@ -2376,7 +2376,7 @@ device_initcall_sync(mcheck_init_device);
  */
 static int __init mcheck_disable(char *str)
 {
-	mca_cfg.disabled = true;
+	mca_cfg.disabled = 1;
 	return 1;
 }
 __setup("nomce", mcheck_disable);
-- 
2.13.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD
  2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
  2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
  2018-02-21 10:18 ` [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
  2018-02-21 17:59   ` [tip:ras/core] x86/mce: Issue the 'mcelog --ascii' message only " tip-bot for Borislav Petkov
  2018-02-21 10:18 ` [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set Borislav Petkov
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
  To: X86 ML; +Cc: Tony Luck, LKML

From: Borislav Petkov <bp@suse.de>

mcelog cannot decode AMD MCEs.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9c8cb58c77f8..b16b184d90c5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -264,7 +264,9 @@ static void __print_mce(struct mce *m)
 static void print_mce(struct mce *m)
 {
 	__print_mce(m);
-	pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
+
+	if (m->cpuvendor != X86_VENDOR_AMD)
+		pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
 }
 
 #define PANIC_TIMEOUT 5 /* 5 seconds */
-- 
2.13.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set
  2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
                   ` (2 preceding siblings ...)
  2018-02-21 10:18 ` [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
  2018-02-21 17:59   ` [tip:ras/core] x86/mce/AMD: " tip-bot for Borislav Petkov
  2018-02-21 10:18 ` [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type() Borislav Petkov
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
  To: X86 ML; +Cc: Tony Luck, LKML

From: Borislav Petkov <bp@suse.de>

The MCA banks log error info into MCA_ADDR, MCA_MISC0, and MCA_SYND even
if the corresponding valid bits are not set:

"Error handlers should save the values in MCA_ADDR, MCA_MISC0,
and MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
MCA_STATUS[SyndV] are zero."

Do so by setting those bits so that code down the MCE processing path
doesn't need to be changed.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b16b184d90c5..dd713335f5d9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -437,6 +437,20 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
 		if (mca_cfg.rip_msr)
 			m->ip = mce_rdmsrl(mca_cfg.rip_msr);
 	}
+
+	/*
+	 * Error handlers should save the values in MCA_ADDR, MCA_MISC0, and
+	 * MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
+	 * MCA_STATUS[SyndV] are zero.
+	 */
+	if (m->cpuvendor == X86_VENDOR_AMD) {
+		u64 status = MCI_STATUS_ADDRV | MCI_STATUS_MISCV;
+
+		if (mce_flags.smca)
+			status |= MCI_STATUS_SYNDV;
+
+		m->status |= status;
+	}
 }
 
 int mce_available(struct cpuinfo_x86 *c)
-- 
2.13.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type()
  2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
                   ` (3 preceding siblings ...)
  2018-02-21 10:18 ` [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
  2018-02-21 18:00   ` [tip:ras/core] x86/mce/AMD: " tip-bot for Yazen Ghannam
  2018-02-21 10:18 ` [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type Borislav Petkov
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
  To: X86 ML; +Cc: Tony Luck, LKML

From: Yazen Ghannam <yazen.ghannam@amd.com>

Pass the bank number to smca_get_bank_type() since that's all we need.

Also, we should compare the bank number to MAX_NR_BANKS (size of the
smca_banks array) not the number of bank types. Bank types are reused
for multiple banks, so the number of types can be different from the
number of banks in a system and thus we could return an invalid bank
type.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Cc: <stable@vger.kernel.org> # 4.14.x: 11cf887728a3 x86/MCE/AMD: Define a function to get SMCA bank type
Cc: <stable@vger.kernel.org> # 4.14.x: c6708d50f166 x86/MCE: Report only DRAM ECC as memory errors on AMD systems
Cc: <stable@vger.kernel.org> # 4.14.x
Link: https://lkml.kernel.org/r/20180215210943.11530-1-Yazen.Ghannam@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 0f32ad242324..7fbb19cb1859 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,14 +110,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
 }
 EXPORT_SYMBOL_GPL(smca_get_long_name);
 
-static enum smca_bank_types smca_get_bank_type(struct mce *m)
+static enum smca_bank_types smca_get_bank_type(unsigned int bank)
 {
 	struct smca_bank *b;
 
-	if (m->bank >= N_SMCA_BANK_TYPES)
+	if (bank >= MAX_NR_BANKS)
 		return N_SMCA_BANK_TYPES;
 
-	b = &smca_banks[m->bank];
+	b = &smca_banks[bank];
 	if (!b->hwid)
 		return N_SMCA_BANK_TYPES;
 
@@ -760,7 +760,7 @@ bool amd_mce_is_memory_error(struct mce *m)
 	u8 xec = (m->status >> 16) & 0x1f;
 
 	if (mce_flags.smca)
-		return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
+		return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;
 
 	return m->bank == 4 && xec == 0x8;
 }
@@ -1063,7 +1063,7 @@ static struct kobj_type threshold_ktype = {
 
 static const char *get_name(unsigned int bank, struct threshold_block *b)
 {
-	unsigned int bank_type;
+	enum smca_bank_types bank_type;
 
 	if (!mce_flags.smca) {
 		if (b && bank == 4)
@@ -1072,11 +1072,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
 		return th_names[bank];
 	}
 
-	if (!smca_banks[bank].hwid)
+	bank_type = smca_get_bank_type(bank);
+	if (bank_type >= N_SMCA_BANK_TYPES)
 		return NULL;
 
-	bank_type = smca_banks[bank].hwid->bank_type;
-
 	if (b && bank_type == SMCA_UMC) {
 		if (b->block < ARRAY_SIZE(smca_umc_block_names))
 			return smca_umc_block_names[b->block];
-- 
2.13.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type
  2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
                   ` (4 preceding siblings ...)
  2018-02-21 10:18 ` [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type() Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
  2018-02-21 18:00   ` [tip:ras/core] x86/mce/AMD, " tip-bot for Yazen Ghannam
  2018-02-21 10:18 ` [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block Borislav Petkov
  2018-02-21 10:19 ` [PATCH 8/8] x86/MCE/AMD: Carve out SMCA get_block_address() code Borislav Petkov
  7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
  To: X86 ML; +Cc: Tony Luck, LKML

From: Yazen Ghannam <yazen.ghannam@amd.com>

Currently, bank 4 is reserved on Fam17h, so we chose not to initialize
bank 4 in the smca_banks array. This means that when we check if a bank
is initialized, like during boot or resume, we will see that bank 4 is
not initialized and try to initialize it.

This will cause a call trace, when resuming from suspend, due to
rdmsr_*on_cpu() calls in the init path. The rdmsr_*on_cpu() calls issue
an IPI but we're running with interrupts disabled. This triggers:

  WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0
  ...

Reserved banks will be read-as-zero, so their MCA_IPID register will be
zero. So, like the smca_banks array, the threshold_banks array will not
have an entry for a reserved bank since all its MCA_MISC* registers will
be zero.

Enumerate a "Reserved" bank type that matches on a HWID_MCATYPE of 0,0.

Use the "Reserved" type when checking if a bank is reserved. It's
possible that other bank numbers may be reserved on future systems.

Don't try to find the block address on reserved banks.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Cc: <stable@vger.kernel.org> # 4.14.x
Link: https://lkml.kernel.org/r/20180215210943.11530-2-Yazen.Ghannam@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/mce.h           |  1 +
 arch/x86/kernel/cpu/mcheck/mce_amd.c |  7 +++++++
 drivers/edac/mce_amd.c               | 11 +++++++----
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c3fb9a792e13..8c7b3e5a2d01 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -294,6 +294,7 @@ enum smca_bank_types {
 	SMCA_IF,	/* Instruction Fetch */
 	SMCA_L2_CACHE,	/* L2 Cache */
 	SMCA_DE,	/* Decoder Unit */
+	SMCA_RESERVED,	/* Reserved */
 	SMCA_EX,	/* Execution Unit */
 	SMCA_FP,	/* Floating Point */
 	SMCA_L3_CACHE,	/* L3 Cache */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 7fbb19cb1859..d8ba9d0c3f01 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = {
 	[SMCA_IF]	= { "insn_fetch",	"Instruction Fetch Unit" },
 	[SMCA_L2_CACHE]	= { "l2_cache",		"L2 Cache" },
 	[SMCA_DE]	= { "decode_unit",	"Decode Unit" },
+	[SMCA_RESERVED]	= { "reserved",		"Reserved" },
 	[SMCA_EX]	= { "execution_unit",	"Execution Unit" },
 	[SMCA_FP]	= { "floating_point",	"Floating Point Unit" },
 	[SMCA_L3_CACHE]	= { "l3_cache",		"L3 Cache" },
@@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
 static struct smca_hwid smca_hwid_mcatypes[] = {
 	/* { bank_type, hwid_mcatype, xec_bitmap } */
 
+	/* Reserved type */
+	{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+
 	/* ZN Core (HWID=0xB0) MCA types */
 	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
 	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
@@ -433,6 +437,9 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
 	u32 addr = 0, offset = 0;
 
 	if (mce_flags.smca) {
+		if (smca_get_bank_type(bank) == SMCA_RESERVED)
+			return addr;
+
 		if (!block) {
 			addr = MSR_AMD64_SMCA_MCx_MISC(bank);
 		} else {
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index a11a671c7a38..2ab4d61ee47e 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -854,21 +854,24 @@ static void decode_mc6_mce(struct mce *m)
 static void decode_smca_error(struct mce *m)
 {
 	struct smca_hwid *hwid;
-	unsigned int bank_type;
+	enum smca_bank_types bank_type;
 	const char *ip_name;
 	u8 xec = XEC(m->status, xec_mask);
 
 	if (m->bank >= ARRAY_SIZE(smca_banks))
 		return;
 
-	if (x86_family(m->cpuid) >= 0x17 && m->bank == 4)
-		pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
-
 	hwid = smca_banks[m->bank].hwid;
 	if (!hwid)
 		return;
 
 	bank_type = hwid->bank_type;
+
+	if (bank_type == SMCA_RESERVED) {
+		pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
+		return;
+	}
+
 	ip_name = smca_get_long_name(bank_type);
 
 	pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
-- 
2.13.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block
  2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
                   ` (5 preceding siblings ...)
  2018-02-21 10:18 ` [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
  2018-02-21 18:01   ` [tip:ras/core] x86/mce/AMD: " tip-bot for Yazen Ghannam
  2018-02-21 10:19 ` [PATCH 8/8] x86/MCE/AMD: Carve out SMCA get_block_address() code Borislav Petkov
  7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
  To: X86 ML; +Cc: Tony Luck, LKML

From: Yazen Ghannam <yazen.ghannam@amd.com>

The block address is saved after the block is initialized when
threshold_init_device() is called.

Use the saved block address, if available, rather than trying to
rediscover it.

This will avoid a call trace, when resuming from suspend, due to the
rdmsr_safe_on_cpu() call in get_block_address(). The rdmsr_safe_on_cpu()
call issues an IPI but we're running with interrupts disabled. This
triggers:

    WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Cc: <stable@vger.kernel.org> # 4.14.x
Link: https://lkml.kernel.org/r/20180215210943.11530-3-Yazen.Ghannam@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index d8ba9d0c3f01..12bc2863a4d6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
 {
 	u32 addr = 0, offset = 0;
 
+	if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+		return addr;
+
+	/* Get address from already initialized block. */
+	if (per_cpu(threshold_banks, cpu)) {
+		struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
+
+		if (bankp && bankp->blocks) {
+			struct threshold_block *blockp = &bankp->blocks[block];
+
+			if (blockp)
+				return blockp->address;
+		}
+	}
+
 	if (mce_flags.smca) {
 		if (smca_get_bank_type(bank) == SMCA_RESERVED)
 			return addr;
-- 
2.13.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 8/8] x86/MCE/AMD: Carve out SMCA get_block_address() code
  2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
                   ` (6 preceding siblings ...)
  2018-02-21 10:18 ` [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block Borislav Petkov
@ 2018-02-21 10:19 ` Borislav Petkov
  7 siblings, 0 replies; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:19 UTC (permalink / raw)
  To: X86 ML; +Cc: Tony Luck, LKML

From: Yazen Ghannam <yazen.ghannam@amd.com>

Carve out the SMCA code in get_block_address() into a separate helper
function.

No functional change.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/20180215210943.11530-4-Yazen.Ghannam@amd.com
[ Save an indentation level. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 57 ++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 12bc2863a4d6..f7666eef4a87 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -431,6 +431,35 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
 	wrmsr(MSR_CU_DEF_ERR, low, high);
 }
 
+static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
+				  unsigned int block)
+{
+	u32 low, high;
+	u32 addr = 0;
+
+	if (smca_get_bank_type(bank) == SMCA_RESERVED)
+		return addr;
+
+	if (!block)
+		return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+	/*
+	 * For SMCA enabled processors, BLKPTR field of the first MISC register
+	 * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
+	 */
+	if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+		return addr;
+
+	if (!(low & MCI_CONFIG_MCAX))
+		return addr;
+
+	if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+	    (low & MASK_BLKPTR_LO))
+		return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+
+	return addr;
+}
+
 static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
 			     unsigned int bank, unsigned int block)
 {
@@ -451,32 +480,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
 		}
 	}
 
-	if (mce_flags.smca) {
-		if (smca_get_bank_type(bank) == SMCA_RESERVED)
-			return addr;
-
-		if (!block) {
-			addr = MSR_AMD64_SMCA_MCx_MISC(bank);
-		} else {
-			/*
-			 * For SMCA enabled processors, BLKPTR field of the
-			 * first MISC register (MCx_MISC0) indicates presence of
-			 * additional MISC register set (MISC1-4).
-			 */
-			u32 low, high;
-
-			if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
-				return addr;
-
-			if (!(low & MCI_CONFIG_MCAX))
-				return addr;
-
-			if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
-			    (low & MASK_BLKPTR_LO))
-				addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
-		}
-		return addr;
-	}
+	if (mce_flags.smca)
+		return smca_get_block_address(cpu, bank, block);
 
 	/* Fall back to method we used for older processors: */
 	switch (block) {
-- 
2.13.0

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header
  2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
@ 2018-02-21 11:39   ` Ingo Molnar
  2018-02-21 13:28     ` Borislav Petkov
  2018-02-21 17:58   ` [tip:ras/core] x86/mce: " tip-bot for Borislav Petkov
  1 sibling, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2018-02-21 11:39 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: X86 ML, Tony Luck, LKML


* Borislav Petkov <bp@alien8.de> wrote:

> From: Borislav Petkov <bp@suse.de>
> 
> ... because they don't need to be exported outside of MCE.
> 
> Signed-off-by: Borislav Petkov <bp@suse.de>
> ---
>  arch/x86/include/asm/mce.h                | 52 -------------------------------
>  arch/x86/kernel/cpu/mcheck/mce-internal.h | 52 +++++++++++++++++++++++++++++++
>  2 files changed, 52 insertions(+), 52 deletions(-)

Hm, this doesn't apply to latest upstream anymore:

 Hunk #1 FAILED at 113.
 1 out of 1 hunk FAILED -- saving rejects to file arch/x86/kernel/cpu/mcheck/mce-internal.h.rej

mind forward porting it?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header
  2018-02-21 11:39   ` Ingo Molnar
@ 2018-02-21 13:28     ` Borislav Petkov
  0 siblings, 0 replies; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 13:28 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: X86 ML, Tony Luck, LKML

On Wed, Feb 21, 2018 at 12:39:05PM +0100, Ingo Molnar wrote:
> Hm, this doesn't apply to latest upstream anymore:
> 
>  Hunk #1 FAILED at 113.
>  1 out of 1 hunk FAILED -- saving rejects to file arch/x86/kernel/cpu/mcheck/mce-internal.h.rej
> 
> mind forward porting it?

Ah, you took Tony's patch in the meantime. Ok, here's a new version.
The others I'm not sending again as they should apply ok with a small
offset.

Thx.

---
From: Borislav Petkov <bp@suse.de>
Date: Mon, 20 Nov 2017 17:18:25 +0100
Subject: [PATCH] x86/MCE: Put private structures and definitions into the internal header

... because they don't need to be exported outside of MCE.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/mce.h                | 52 ----------------------------
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 56 +++++++++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 96ea4b5ba658..c3fb9a792e13 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -138,58 +138,6 @@ struct mce_log_buffer {
 	struct mce entry[MCE_LOG_LEN];
 };
 
-struct mca_config {
-	bool dont_log_ce;
-	bool cmci_disabled;
-	bool lmce_disabled;
-	bool ignore_ce;
-	bool disabled;
-	bool ser;
-	bool recovery;
-	bool bios_cmci_threshold;
-	u8 banks;
-	s8 bootlog;
-	int tolerant;
-	int monarch_timeout;
-	int panic_timeout;
-	u32 rip_msr;
-};
-
-struct mce_vendor_flags {
-	/*
-	 * Indicates that overflow conditions are not fatal, when set.
-	 */
-	__u64 overflow_recov	: 1,
-
-	/*
-	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
-	 * Recovery. It indicates support for data poisoning in HW and deferred
-	 * error interrupts.
-	 */
-	      succor		: 1,
-
-	/*
-	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
-	 * the register space for each MCA bank and also increases number of
-	 * banks. Also, to accommodate the new banks and registers, the MCA
-	 * register space is moved to a new MSR range.
-	 */
-	      smca		: 1,
-
-	      __reserved_0	: 61;
-};
-
-struct mca_msr_regs {
-	u32 (*ctl)	(int bank);
-	u32 (*status)	(int bank);
-	u32 (*addr)	(int bank);
-	u32 (*misc)	(int bank);
-};
-
-extern struct mce_vendor_flags mce_flags;
-
-extern struct mca_msr_regs msr_ops;
-
 enum mce_notifier_prios {
 	MCE_PRIO_FIRST		= INT_MAX,
 	MCE_PRIO_SRAO		= INT_MAX - 1,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index e956eb267061..ed3c5aee22cf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -113,8 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb)	{ }
 static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 #endif
 
-extern struct mca_config mca_cfg;
-
 #ifndef CONFIG_X86_64
 /*
  * On 32-bit systems it would be difficult to safely unmap a poison page
@@ -130,4 +128,58 @@ static inline void mce_unmap_kpfn(unsigned long pfn) {}
 #define mce_unmap_kpfn mce_unmap_kpfn
 #endif
 
+struct mca_config {
+	bool dont_log_ce;
+	bool cmci_disabled;
+	bool lmce_disabled;
+	bool ignore_ce;
+	bool disabled;
+	bool ser;
+	bool recovery;
+	bool bios_cmci_threshold;
+	u8 banks;
+	s8 bootlog;
+	int tolerant;
+	int monarch_timeout;
+	int panic_timeout;
+	u32 rip_msr;
+};
+
+extern struct mca_config mca_cfg;
+
+struct mce_vendor_flags {
+	/*
+	 * Indicates that overflow conditions are not fatal, when set.
+	 */
+	__u64 overflow_recov	: 1,
+
+	/*
+	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+	 * Recovery. It indicates support for data poisoning in HW and deferred
+	 * error interrupts.
+	 */
+	      succor		: 1,
+
+	/*
+	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+	 * the register space for each MCA bank and also increases number of
+	 * banks. Also, to accommodate the new banks and registers, the MCA
+	 * register space is moved to a new MSR range.
+	 */
+	      smca		: 1,
+
+	      __reserved_0	: 61;
+};
+
+extern struct mce_vendor_flags mce_flags;
+
+struct mca_msr_regs {
+	u32 (*ctl)	(int bank);
+	u32 (*status)	(int bank);
+	u32 (*addr)	(int bank);
+	u32 (*misc)	(int bank);
+};
+
+extern struct mca_msr_regs msr_ops;
+
 #endif /* __X86_MCE_INTERNAL_H__ */
-- 
2.13.0

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [tip:ras/core] x86/mce: Put private structures and definitions into the internal header
  2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
  2018-02-21 11:39   ` Ingo Molnar
@ 2018-02-21 17:58   ` tip-bot for Borislav Petkov
  1 sibling, 0 replies; 18+ messages in thread
From: tip-bot for Borislav Petkov @ 2018-02-21 17:58 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, linux-edac, torvalds, tony.luck, peterz, bp, hpa,
	bp, tglx, mingo

Commit-ID:  a189c03235639a31343215f82b83b49985c55336
Gitweb:     https://git.kernel.org/tip/a189c03235639a31343215f82b83b49985c55336
Author:     Borislav Petkov <bp@suse.de>
AuthorDate: Wed, 21 Feb 2018 11:18:53 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:53 +0100

x86/mce: Put private structures and definitions into the internal header

... because they don't need to be exported outside of MCE.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-2-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mce.h                | 52 ----------------------------
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 56 +++++++++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 96ea4b5..c3fb9a7 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -138,58 +138,6 @@ struct mce_log_buffer {
 	struct mce entry[MCE_LOG_LEN];
 };
 
-struct mca_config {
-	bool dont_log_ce;
-	bool cmci_disabled;
-	bool lmce_disabled;
-	bool ignore_ce;
-	bool disabled;
-	bool ser;
-	bool recovery;
-	bool bios_cmci_threshold;
-	u8 banks;
-	s8 bootlog;
-	int tolerant;
-	int monarch_timeout;
-	int panic_timeout;
-	u32 rip_msr;
-};
-
-struct mce_vendor_flags {
-	/*
-	 * Indicates that overflow conditions are not fatal, when set.
-	 */
-	__u64 overflow_recov	: 1,
-
-	/*
-	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
-	 * Recovery. It indicates support for data poisoning in HW and deferred
-	 * error interrupts.
-	 */
-	      succor		: 1,
-
-	/*
-	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
-	 * the register space for each MCA bank and also increases number of
-	 * banks. Also, to accommodate the new banks and registers, the MCA
-	 * register space is moved to a new MSR range.
-	 */
-	      smca		: 1,
-
-	      __reserved_0	: 61;
-};
-
-struct mca_msr_regs {
-	u32 (*ctl)	(int bank);
-	u32 (*status)	(int bank);
-	u32 (*addr)	(int bank);
-	u32 (*misc)	(int bank);
-};
-
-extern struct mce_vendor_flags mce_flags;
-
-extern struct mca_msr_regs msr_ops;
-
 enum mce_notifier_prios {
 	MCE_PRIO_FIRST		= INT_MAX,
 	MCE_PRIO_SRAO		= INT_MAX - 1,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index e956eb2..ed3c5ae 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -113,8 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb)	{ }
 static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 #endif
 
-extern struct mca_config mca_cfg;
-
 #ifndef CONFIG_X86_64
 /*
  * On 32-bit systems it would be difficult to safely unmap a poison page
@@ -130,4 +128,58 @@ static inline void mce_unmap_kpfn(unsigned long pfn) {}
 #define mce_unmap_kpfn mce_unmap_kpfn
 #endif
 
+struct mca_config {
+	bool dont_log_ce;
+	bool cmci_disabled;
+	bool lmce_disabled;
+	bool ignore_ce;
+	bool disabled;
+	bool ser;
+	bool recovery;
+	bool bios_cmci_threshold;
+	u8 banks;
+	s8 bootlog;
+	int tolerant;
+	int monarch_timeout;
+	int panic_timeout;
+	u32 rip_msr;
+};
+
+extern struct mca_config mca_cfg;
+
+struct mce_vendor_flags {
+	/*
+	 * Indicates that overflow conditions are not fatal, when set.
+	 */
+	__u64 overflow_recov	: 1,
+
+	/*
+	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+	 * Recovery. It indicates support for data poisoning in HW and deferred
+	 * error interrupts.
+	 */
+	      succor		: 1,
+
+	/*
+	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+	 * the register space for each MCA bank and also increases number of
+	 * banks. Also, to accommodate the new banks and registers, the MCA
+	 * register space is moved to a new MSR range.
+	 */
+	      smca		: 1,
+
+	      __reserved_0	: 61;
+};
+
+extern struct mce_vendor_flags mce_flags;
+
+struct mca_msr_regs {
+	u32 (*ctl)	(int bank);
+	u32 (*status)	(int bank);
+	u32 (*addr)	(int bank);
+	u32 (*misc)	(int bank);
+};
+
+extern struct mca_msr_regs msr_ops;
+
 #endif /* __X86_MCE_INTERNAL_H__ */

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [tip:ras/core] x86/mce: Convert 'struct mca_config' bools to a bitfield
  2018-02-21 10:18 ` [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield Borislav Petkov
@ 2018-02-21 17:58   ` tip-bot for Borislav Petkov
  0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Borislav Petkov @ 2018-02-21 17:58 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, peterz, hpa, linux-kernel, torvalds, mingo, tglx, bp,
	linux-edac, tony.luck

Commit-ID:  09933946643bcc8e0a9bd4ede192ed854e22db8f
Gitweb:     https://git.kernel.org/tip/09933946643bcc8e0a9bd4ede192ed854e22db8f
Author:     Borislav Petkov <bp@suse.de>
AuthorDate: Wed, 21 Feb 2018 11:18:54 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:53 +0100

x86/mce: Convert 'struct mca_config' bools to a bitfield

... to save space when future flags are added.

No functionality change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-3-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 13 ++++++++-----
 arch/x86/kernel/cpu/mcheck/mce.c          | 16 ++++++++--------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index ed3c5ae..374d1aa 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -131,12 +131,15 @@ static inline void mce_unmap_kpfn(unsigned long pfn) {}
 struct mca_config {
 	bool dont_log_ce;
 	bool cmci_disabled;
-	bool lmce_disabled;
 	bool ignore_ce;
-	bool disabled;
-	bool ser;
-	bool recovery;
-	bool bios_cmci_threshold;
+
+	__u64 lmce_disabled		: 1,
+	      disabled			: 1,
+	      ser			: 1,
+	      recovery			: 1,
+	      bios_cmci_threshold	: 1,
+	      __reserved		: 59;
+
 	u8 banks;
 	s8 bootlog;
 	int tolerant;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8ff94d1..db5b1e4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1511,7 +1511,7 @@ static int __mcheck_cpu_cap_init(void)
 		mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
 
 	if (cap & MCG_SER_P)
-		mca_cfg.ser = true;
+		mca_cfg.ser = 1;
 
 	return 0;
 }
@@ -1819,12 +1819,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
 		return;
 
 	if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
-		mca_cfg.disabled = true;
+		mca_cfg.disabled = 1;
 		return;
 	}
 
 	if (mce_gen_pool_init()) {
-		mca_cfg.disabled = true;
+		mca_cfg.disabled = 1;
 		pr_emerg("Couldn't allocate MCE records pool!\n");
 		return;
 	}
@@ -1902,11 +1902,11 @@ static int __init mcheck_enable(char *str)
 	if (*str == '=')
 		str++;
 	if (!strcmp(str, "off"))
-		cfg->disabled = true;
+		cfg->disabled = 1;
 	else if (!strcmp(str, "no_cmci"))
 		cfg->cmci_disabled = true;
 	else if (!strcmp(str, "no_lmce"))
-		cfg->lmce_disabled = true;
+		cfg->lmce_disabled = 1;
 	else if (!strcmp(str, "dont_log_ce"))
 		cfg->dont_log_ce = true;
 	else if (!strcmp(str, "ignore_ce"))
@@ -1914,9 +1914,9 @@ static int __init mcheck_enable(char *str)
 	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
 		cfg->bootlog = (str[0] == 'b');
 	else if (!strcmp(str, "bios_cmci_threshold"))
-		cfg->bios_cmci_threshold = true;
+		cfg->bios_cmci_threshold = 1;
 	else if (!strcmp(str, "recovery"))
-		cfg->recovery = true;
+		cfg->recovery = 1;
 	else if (isdigit(str[0])) {
 		if (get_option(&str, &cfg->tolerant) == 2)
 			get_option(&str, &(cfg->monarch_timeout));
@@ -2381,7 +2381,7 @@ device_initcall_sync(mcheck_init_device);
  */
 static int __init mcheck_disable(char *str)
 {
-	mca_cfg.disabled = true;
+	mca_cfg.disabled = 1;
 	return 1;
 }
 __setup("nomce", mcheck_disable);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [tip:ras/core] x86/mce: Issue the 'mcelog --ascii' message only on !AMD
  2018-02-21 10:18 ` [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD Borislav Petkov
@ 2018-02-21 17:59   ` tip-bot for Borislav Petkov
  0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Borislav Petkov @ 2018-02-21 17:59 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tony.luck, bp, hpa, bp, tglx, peterz, torvalds, linux-kernel,
	linux-edac, mingo

Commit-ID:  b2fbf6f282147b42d669a4bd4a7b1de2e2d6a792
Gitweb:     https://git.kernel.org/tip/b2fbf6f282147b42d669a4bd4a7b1de2e2d6a792
Author:     Borislav Petkov <bp@suse.de>
AuthorDate: Wed, 21 Feb 2018 11:18:55 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:53 +0100

x86/mce: Issue the 'mcelog --ascii' message only on !AMD

mcelog cannot decode AMD MCEs.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-4-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index db5b1e4..d7dff23 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -268,7 +268,9 @@ static void __print_mce(struct mce *m)
 static void print_mce(struct mce *m)
 {
 	__print_mce(m);
-	pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
+
+	if (m->cpuvendor != X86_VENDOR_AMD)
+		pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
 }
 
 #define PANIC_TIMEOUT 5 /* 5 seconds */

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [tip:ras/core] x86/mce/AMD: Collect error info even if valid bits are not set
  2018-02-21 10:18 ` [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set Borislav Petkov
@ 2018-02-21 17:59   ` tip-bot for Borislav Petkov
  0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Borislav Petkov @ 2018-02-21 17:59 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, bp, hpa, peterz, tglx, mingo, torvalds, linux-edac,
	tony.luck, linux-kernel

Commit-ID:  4b1e84276a6172980c5bf39aa091ba13e90d6dad
Gitweb:     https://git.kernel.org/tip/4b1e84276a6172980c5bf39aa091ba13e90d6dad
Author:     Borislav Petkov <bp@suse.de>
AuthorDate: Wed, 21 Feb 2018 11:18:56 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:54 +0100

x86/mce/AMD: Collect error info even if valid bits are not set

The MCA banks log error info into MCA_ADDR, MCA_MISC0, and MCA_SYND even
if the corresponding valid bits are not set:

"Error handlers should save the values in MCA_ADDR, MCA_MISC0,
and MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
MCA_STATUS[SyndV] are zero."

Do so by setting those bits so that code down the MCE processing path
doesn't need to be changed.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-5-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d7dff23..3c9a25b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -441,6 +441,20 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
 		if (mca_cfg.rip_msr)
 			m->ip = mce_rdmsrl(mca_cfg.rip_msr);
 	}
+
+	/*
+	 * Error handlers should save the values in MCA_ADDR, MCA_MISC0, and
+	 * MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
+	 * MCA_STATUS[SyndV] are zero.
+	 */
+	if (m->cpuvendor == X86_VENDOR_AMD) {
+		u64 status = MCI_STATUS_ADDRV | MCI_STATUS_MISCV;
+
+		if (mce_flags.smca)
+			status |= MCI_STATUS_SYNDV;
+
+		m->status |= status;
+	}
 }
 
 int mce_available(struct cpuinfo_x86 *c)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [tip:ras/core] x86/mce/AMD: Pass the bank number to smca_get_bank_type()
  2018-02-21 10:18 ` [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type() Borislav Petkov
@ 2018-02-21 18:00   ` tip-bot for Yazen Ghannam
  0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Yazen Ghannam @ 2018-02-21 18:00 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, linux-edac, bp, linux-kernel, tony.luck, mingo, peterz,
	yazen.ghannam, bp, tglx, torvalds

Commit-ID:  e5d6a126d4c473499f354254a15ca0c2d8c84ca3
Gitweb:     https://git.kernel.org/tip/e5d6a126d4c473499f354254a15ca0c2d8c84ca3
Author:     Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Wed, 21 Feb 2018 11:18:57 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:54 +0100

x86/mce/AMD: Pass the bank number to smca_get_bank_type()

Pass the bank number to smca_get_bank_type() since that's all we need.

Also, we should compare the bank number to MAX_NR_BANKS (size of the
smca_banks array) not the number of bank types. Bank types are reused
for multiple banks, so the number of types can be different from the
number of banks in a system and thus we could return an invalid bank
type.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # 4.14.x
Cc: <stable@vger.kernel.org> # 4.14.x: 11cf887728a3 x86/MCE/AMD: Define a function to get SMCA bank type
Cc: <stable@vger.kernel.org> # 4.14.x: c6708d50f166 x86/MCE: Report only DRAM ECC as memory errors on AMD systems
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-6-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 0f32ad2..7fbb19c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,14 +110,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
 }
 EXPORT_SYMBOL_GPL(smca_get_long_name);
 
-static enum smca_bank_types smca_get_bank_type(struct mce *m)
+static enum smca_bank_types smca_get_bank_type(unsigned int bank)
 {
 	struct smca_bank *b;
 
-	if (m->bank >= N_SMCA_BANK_TYPES)
+	if (bank >= MAX_NR_BANKS)
 		return N_SMCA_BANK_TYPES;
 
-	b = &smca_banks[m->bank];
+	b = &smca_banks[bank];
 	if (!b->hwid)
 		return N_SMCA_BANK_TYPES;
 
@@ -760,7 +760,7 @@ bool amd_mce_is_memory_error(struct mce *m)
 	u8 xec = (m->status >> 16) & 0x1f;
 
 	if (mce_flags.smca)
-		return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
+		return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;
 
 	return m->bank == 4 && xec == 0x8;
 }
@@ -1063,7 +1063,7 @@ static struct kobj_type threshold_ktype = {
 
 static const char *get_name(unsigned int bank, struct threshold_block *b)
 {
-	unsigned int bank_type;
+	enum smca_bank_types bank_type;
 
 	if (!mce_flags.smca) {
 		if (b && bank == 4)
@@ -1072,11 +1072,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
 		return th_names[bank];
 	}
 
-	if (!smca_banks[bank].hwid)
+	bank_type = smca_get_bank_type(bank);
+	if (bank_type >= N_SMCA_BANK_TYPES)
 		return NULL;
 
-	bank_type = smca_banks[bank].hwid->bank_type;
-
 	if (b && bank_type == SMCA_UMC) {
 		if (b->block < ARRAY_SIZE(smca_umc_block_names))
 			return smca_umc_block_names[b->block];

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [tip:ras/core] x86/mce/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type
  2018-02-21 10:18 ` [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type Borislav Petkov
@ 2018-02-21 18:00   ` tip-bot for Yazen Ghannam
  0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Yazen Ghannam @ 2018-02-21 18:00 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, mingo, yazen.ghannam, peterz, bp, bp, linux-kernel,
	tony.luck, linux-edac, torvalds, hpa

Commit-ID:  68627a697c195937672ce07683094c72b1174786
Gitweb:     https://git.kernel.org/tip/68627a697c195937672ce07683094c72b1174786
Author:     Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Wed, 21 Feb 2018 11:18:58 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:54 +0100

x86/mce/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type

Currently, bank 4 is reserved on Fam17h, so we chose not to initialize
bank 4 in the smca_banks array. This means that when we check if a bank
is initialized, like during boot or resume, we will see that bank 4 is
not initialized and try to initialize it.

This will cause a call trace, when resuming from suspend, due to
rdmsr_*on_cpu() calls in the init path. The rdmsr_*on_cpu() calls issue
an IPI but we're running with interrupts disabled. This triggers:

  WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0
  ...

Reserved banks will be read-as-zero, so their MCA_IPID register will be
zero. So, like the smca_banks array, the threshold_banks array will not
have an entry for a reserved bank since all its MCA_MISC* registers will
be zero.

Enumerate a "Reserved" bank type that matches on a HWID_MCATYPE of 0,0.

Use the "Reserved" type when checking if a bank is reserved. It's
possible that other bank numbers may be reserved on future systems.

Don't try to find the block address on reserved banks.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # 4.14.x
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-7-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mce.h           |  1 +
 arch/x86/kernel/cpu/mcheck/mce_amd.c |  7 +++++++
 drivers/edac/mce_amd.c               | 11 +++++++----
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c3fb9a7..8c7b3e5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -294,6 +294,7 @@ enum smca_bank_types {
 	SMCA_IF,	/* Instruction Fetch */
 	SMCA_L2_CACHE,	/* L2 Cache */
 	SMCA_DE,	/* Decoder Unit */
+	SMCA_RESERVED,	/* Reserved */
 	SMCA_EX,	/* Execution Unit */
 	SMCA_FP,	/* Floating Point */
 	SMCA_L3_CACHE,	/* L3 Cache */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 7fbb19c..d8ba9d0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = {
 	[SMCA_IF]	= { "insn_fetch",	"Instruction Fetch Unit" },
 	[SMCA_L2_CACHE]	= { "l2_cache",		"L2 Cache" },
 	[SMCA_DE]	= { "decode_unit",	"Decode Unit" },
+	[SMCA_RESERVED]	= { "reserved",		"Reserved" },
 	[SMCA_EX]	= { "execution_unit",	"Execution Unit" },
 	[SMCA_FP]	= { "floating_point",	"Floating Point Unit" },
 	[SMCA_L3_CACHE]	= { "l3_cache",		"L3 Cache" },
@@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
 static struct smca_hwid smca_hwid_mcatypes[] = {
 	/* { bank_type, hwid_mcatype, xec_bitmap } */
 
+	/* Reserved type */
+	{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+
 	/* ZN Core (HWID=0xB0) MCA types */
 	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
 	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
@@ -433,6 +437,9 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
 	u32 addr = 0, offset = 0;
 
 	if (mce_flags.smca) {
+		if (smca_get_bank_type(bank) == SMCA_RESERVED)
+			return addr;
+
 		if (!block) {
 			addr = MSR_AMD64_SMCA_MCx_MISC(bank);
 		} else {
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index a11a671..2ab4d61 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -854,21 +854,24 @@ static void decode_mc6_mce(struct mce *m)
 static void decode_smca_error(struct mce *m)
 {
 	struct smca_hwid *hwid;
-	unsigned int bank_type;
+	enum smca_bank_types bank_type;
 	const char *ip_name;
 	u8 xec = XEC(m->status, xec_mask);
 
 	if (m->bank >= ARRAY_SIZE(smca_banks))
 		return;
 
-	if (x86_family(m->cpuid) >= 0x17 && m->bank == 4)
-		pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
-
 	hwid = smca_banks[m->bank].hwid;
 	if (!hwid)
 		return;
 
 	bank_type = hwid->bank_type;
+
+	if (bank_type == SMCA_RESERVED) {
+		pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
+		return;
+	}
+
 	ip_name = smca_get_long_name(bank_type);
 
 	pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [tip:ras/core] x86/mce/AMD: Get address from already initialized block
  2018-02-21 10:18 ` [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block Borislav Petkov
@ 2018-02-21 18:01   ` tip-bot for Yazen Ghannam
  0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Yazen Ghannam @ 2018-02-21 18:01 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, linux-edac, peterz, mingo, tony.luck, hpa, tglx, bp,
	yazen.ghannam, torvalds, linux-kernel

Commit-ID:  27bd59502702fe51d9eb00450a75b727ec6bfcb4
Gitweb:     https://git.kernel.org/tip/27bd59502702fe51d9eb00450a75b727ec6bfcb4
Author:     Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Wed, 21 Feb 2018 11:18:59 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:55 +0100

x86/mce/AMD: Get address from already initialized block

The block address is saved after the block is initialized when
threshold_init_device() is called.

Use the saved block address, if available, rather than trying to
rediscover it.

This will avoid a call trace, when resuming from suspend, due to the
rdmsr_safe_on_cpu() call in get_block_address(). The rdmsr_safe_on_cpu()
call issues an IPI but we're running with interrupts disabled. This
triggers:

    WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # 4.14.x
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-8-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index d8ba9d0..12bc286 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
 {
 	u32 addr = 0, offset = 0;
 
+	if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+		return addr;
+
+	/* Get address from already initialized block. */
+	if (per_cpu(threshold_banks, cpu)) {
+		struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
+
+		if (bankp && bankp->blocks) {
+			struct threshold_block *blockp = &bankp->blocks[block];
+
+			if (blockp)
+				return blockp->address;
+		}
+	}
+
 	if (mce_flags.smca) {
 		if (smca_get_bank_type(bank) == SMCA_RESERVED)
 			return addr;

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2018-02-21 18:02 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
2018-02-21 11:39   ` Ingo Molnar
2018-02-21 13:28     ` Borislav Petkov
2018-02-21 17:58   ` [tip:ras/core] x86/mce: " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield Borislav Petkov
2018-02-21 17:58   ` [tip:ras/core] x86/mce: Convert 'struct mca_config' " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD Borislav Petkov
2018-02-21 17:59   ` [tip:ras/core] x86/mce: Issue the 'mcelog --ascii' message only " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set Borislav Petkov
2018-02-21 17:59   ` [tip:ras/core] x86/mce/AMD: " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type() Borislav Petkov
2018-02-21 18:00   ` [tip:ras/core] x86/mce/AMD: " tip-bot for Yazen Ghannam
2018-02-21 10:18 ` [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type Borislav Petkov
2018-02-21 18:00   ` [tip:ras/core] x86/mce/AMD, " tip-bot for Yazen Ghannam
2018-02-21 10:18 ` [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block Borislav Petkov
2018-02-21 18:01   ` [tip:ras/core] x86/mce/AMD: " tip-bot for Yazen Ghannam
2018-02-21 10:19 ` [PATCH 8/8] x86/MCE/AMD: Carve out SMCA get_block_address() code Borislav Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).