* [PATCH 0/8] x86/RAS: Some accumulated stuff
@ 2018-02-21 10:18 Borislav Petkov
2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
` (7 more replies)
0 siblings, 8 replies; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
To: X86 ML; +Cc: Tony Luck, LKML
From: Borislav Petkov <bp@suse.de>
Hi,
first 3 are cleanups, 4th makes MCA code collect error info without
looking at the Valid bits because there might be cases where they're not
set.
The last 4 fix the IPI-with-IRQs-off issue which was reported recently.
Thx.
Borislav Petkov (4):
x86/MCE: Put private structures and definitions into the internal
header
x86/MCE: Convert mca_config bools to a bitfield
x86/mce: Issue the mcelog --ascii message on !AMD
x86/MCE/AMD: Collect error info even if valid bits are not set
Yazen Ghannam (4):
x86/MCE/AMD: Pass the bank number to smca_get_bank_type()
x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type
x86/MCE/AMD: Get address from already initialized block
x86/MCE/AMD: Carve out SMCA get_block_address() code
arch/x86/include/asm/mce.h | 53 +--------------------
arch/x86/kernel/cpu/mcheck/mce-internal.h | 55 ++++++++++++++++++++++
arch/x86/kernel/cpu/mcheck/mce.c | 34 ++++++++++----
arch/x86/kernel/cpu/mcheck/mce_amd.c | 78 ++++++++++++++++++++-----------
drivers/edac/mce_amd.c | 11 +++--
5 files changed, 140 insertions(+), 91 deletions(-)
--
2.13.0
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
2018-02-21 11:39 ` Ingo Molnar
2018-02-21 17:58 ` [tip:ras/core] x86/mce: " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield Borislav Petkov
` (6 subsequent siblings)
7 siblings, 2 replies; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
To: X86 ML; +Cc: Tony Luck, LKML
From: Borislav Petkov <bp@suse.de>
... because they don't need to be exported outside of MCE.
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/include/asm/mce.h | 52 -------------------------------
arch/x86/kernel/cpu/mcheck/mce-internal.h | 52 +++++++++++++++++++++++++++++++
2 files changed, 52 insertions(+), 52 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 96ea4b5ba658..c3fb9a792e13 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -138,58 +138,6 @@ struct mce_log_buffer {
struct mce entry[MCE_LOG_LEN];
};
-struct mca_config {
- bool dont_log_ce;
- bool cmci_disabled;
- bool lmce_disabled;
- bool ignore_ce;
- bool disabled;
- bool ser;
- bool recovery;
- bool bios_cmci_threshold;
- u8 banks;
- s8 bootlog;
- int tolerant;
- int monarch_timeout;
- int panic_timeout;
- u32 rip_msr;
-};
-
-struct mce_vendor_flags {
- /*
- * Indicates that overflow conditions are not fatal, when set.
- */
- __u64 overflow_recov : 1,
-
- /*
- * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
- * Recovery. It indicates support for data poisoning in HW and deferred
- * error interrupts.
- */
- succor : 1,
-
- /*
- * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
- * the register space for each MCA bank and also increases number of
- * banks. Also, to accommodate the new banks and registers, the MCA
- * register space is moved to a new MSR range.
- */
- smca : 1,
-
- __reserved_0 : 61;
-};
-
-struct mca_msr_regs {
- u32 (*ctl) (int bank);
- u32 (*status) (int bank);
- u32 (*addr) (int bank);
- u32 (*misc) (int bank);
-};
-
-extern struct mce_vendor_flags mce_flags;
-
-extern struct mca_msr_regs msr_ops;
-
enum mce_notifier_prios {
MCE_PRIO_FIRST = INT_MAX,
MCE_PRIO_SRAO = INT_MAX - 1,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index aa0d5df9dc60..986c8dd2d320 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -113,6 +113,58 @@ static inline void mce_register_injector_chain(struct notifier_block *nb) { }
static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
#endif
+struct mca_config {
+ bool dont_log_ce;
+ bool cmci_disabled;
+ bool lmce_disabled;
+ bool ignore_ce;
+ bool disabled;
+ bool ser;
+ bool recovery;
+ bool bios_cmci_threshold;
+ u8 banks;
+ s8 bootlog;
+ int tolerant;
+ int monarch_timeout;
+ int panic_timeout;
+ u32 rip_msr;
+};
+
extern struct mca_config mca_cfg;
+struct mce_vendor_flags {
+ /*
+ * Indicates that overflow conditions are not fatal, when set.
+ */
+ __u64 overflow_recov : 1,
+
+ /*
+ * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+ * Recovery. It indicates support for data poisoning in HW and deferred
+ * error interrupts.
+ */
+ succor : 1,
+
+ /*
+ * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+ * the register space for each MCA bank and also increases number of
+ * banks. Also, to accommodate the new banks and registers, the MCA
+ * register space is moved to a new MSR range.
+ */
+ smca : 1,
+
+ __reserved_0 : 61;
+};
+
+extern struct mce_vendor_flags mce_flags;
+
+struct mca_msr_regs {
+ u32 (*ctl) (int bank);
+ u32 (*status) (int bank);
+ u32 (*addr) (int bank);
+ u32 (*misc) (int bank);
+};
+
+extern struct mca_msr_regs msr_ops;
+
#endif /* __X86_MCE_INTERNAL_H__ */
--
2.13.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
2018-02-21 17:58 ` [tip:ras/core] x86/mce: Convert 'struct mca_config' " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD Borislav Petkov
` (5 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
To: X86 ML; +Cc: Tony Luck, LKML
From: Borislav Petkov <bp@suse.de>
... to save space when future flags are added.
No functionality change.
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/kernel/cpu/mcheck/mce-internal.h | 13 ++++++++-----
arch/x86/kernel/cpu/mcheck/mce.c | 16 ++++++++--------
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 986c8dd2d320..ceb67cd5918f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -116,12 +116,15 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
struct mca_config {
bool dont_log_ce;
bool cmci_disabled;
- bool lmce_disabled;
bool ignore_ce;
- bool disabled;
- bool ser;
- bool recovery;
- bool bios_cmci_threshold;
+
+ __u64 lmce_disabled : 1,
+ disabled : 1,
+ ser : 1,
+ recovery : 1,
+ bios_cmci_threshold : 1,
+ __reserved : 59;
+
u8 banks;
s8 bootlog;
int tolerant;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 75f405ac085c..9c8cb58c77f8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1506,7 +1506,7 @@ static int __mcheck_cpu_cap_init(void)
mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
if (cap & MCG_SER_P)
- mca_cfg.ser = true;
+ mca_cfg.ser = 1;
return 0;
}
@@ -1814,12 +1814,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
return;
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
- mca_cfg.disabled = true;
+ mca_cfg.disabled = 1;
return;
}
if (mce_gen_pool_init()) {
- mca_cfg.disabled = true;
+ mca_cfg.disabled = 1;
pr_emerg("Couldn't allocate MCE records pool!\n");
return;
}
@@ -1897,11 +1897,11 @@ static int __init mcheck_enable(char *str)
if (*str == '=')
str++;
if (!strcmp(str, "off"))
- cfg->disabled = true;
+ cfg->disabled = 1;
else if (!strcmp(str, "no_cmci"))
cfg->cmci_disabled = true;
else if (!strcmp(str, "no_lmce"))
- cfg->lmce_disabled = true;
+ cfg->lmce_disabled = 1;
else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce"))
@@ -1909,9 +1909,9 @@ static int __init mcheck_enable(char *str)
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
- cfg->bios_cmci_threshold = true;
+ cfg->bios_cmci_threshold = 1;
else if (!strcmp(str, "recovery"))
- cfg->recovery = true;
+ cfg->recovery = 1;
else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
@@ -2376,7 +2376,7 @@ device_initcall_sync(mcheck_init_device);
*/
static int __init mcheck_disable(char *str)
{
- mca_cfg.disabled = true;
+ mca_cfg.disabled = 1;
return 1;
}
__setup("nomce", mcheck_disable);
--
2.13.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
2018-02-21 10:18 ` [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
2018-02-21 17:59 ` [tip:ras/core] x86/mce: Issue the 'mcelog --ascii' message only " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set Borislav Petkov
` (4 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
To: X86 ML; +Cc: Tony Luck, LKML
From: Borislav Petkov <bp@suse.de>
mcelog cannot decode AMD MCEs.
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/kernel/cpu/mcheck/mce.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9c8cb58c77f8..b16b184d90c5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -264,7 +264,9 @@ static void __print_mce(struct mce *m)
static void print_mce(struct mce *m)
{
__print_mce(m);
- pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
+
+ if (m->cpuvendor != X86_VENDOR_AMD)
+ pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
}
#define PANIC_TIMEOUT 5 /* 5 seconds */
--
2.13.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
` (2 preceding siblings ...)
2018-02-21 10:18 ` [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
2018-02-21 17:59 ` [tip:ras/core] x86/mce/AMD: " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type() Borislav Petkov
` (3 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
To: X86 ML; +Cc: Tony Luck, LKML
From: Borislav Petkov <bp@suse.de>
The MCA banks log error info into MCA_ADDR, MCA_MISC0, and MCA_SYND even
if the corresponding valid bits are not set:
"Error handlers should save the values in MCA_ADDR, MCA_MISC0,
and MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
MCA_STATUS[SyndV] are zero."
Do so by setting those bits so that code down the MCE processing path
doesn't need to be changed.
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/kernel/cpu/mcheck/mce.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b16b184d90c5..dd713335f5d9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -437,6 +437,20 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
if (mca_cfg.rip_msr)
m->ip = mce_rdmsrl(mca_cfg.rip_msr);
}
+
+ /*
+ * Error handlers should save the values in MCA_ADDR, MCA_MISC0, and
+ * MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
+ * MCA_STATUS[SyndV] are zero.
+ */
+ if (m->cpuvendor == X86_VENDOR_AMD) {
+ u64 status = MCI_STATUS_ADDRV | MCI_STATUS_MISCV;
+
+ if (mce_flags.smca)
+ status |= MCI_STATUS_SYNDV;
+
+ m->status |= status;
+ }
}
int mce_available(struct cpuinfo_x86 *c)
--
2.13.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type()
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
` (3 preceding siblings ...)
2018-02-21 10:18 ` [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
2018-02-21 18:00 ` [tip:ras/core] x86/mce/AMD: " tip-bot for Yazen Ghannam
2018-02-21 10:18 ` [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type Borislav Petkov
` (2 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
To: X86 ML; +Cc: Tony Luck, LKML
From: Yazen Ghannam <yazen.ghannam@amd.com>
Pass the bank number to smca_get_bank_type() since that's all we need.
Also, we should compare the bank number to MAX_NR_BANKS (size of the
smca_banks array) not the number of bank types. Bank types are reused
for multiple banks, so the number of types can be different from the
number of banks in a system and thus we could return an invalid bank
type.
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Cc: <stable@vger.kernel.org> # 4.14.x: 11cf887728a3 x86/MCE/AMD: Define a function to get SMCA bank type
Cc: <stable@vger.kernel.org> # 4.14.x: c6708d50f166 x86/MCE: Report only DRAM ECC as memory errors on AMD systems
Cc: <stable@vger.kernel.org> # 4.14.x
Link: https://lkml.kernel.org/r/20180215210943.11530-1-Yazen.Ghannam@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 0f32ad242324..7fbb19cb1859 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,14 +110,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);
-static enum smca_bank_types smca_get_bank_type(struct mce *m)
+static enum smca_bank_types smca_get_bank_type(unsigned int bank)
{
struct smca_bank *b;
- if (m->bank >= N_SMCA_BANK_TYPES)
+ if (bank >= MAX_NR_BANKS)
return N_SMCA_BANK_TYPES;
- b = &smca_banks[m->bank];
+ b = &smca_banks[bank];
if (!b->hwid)
return N_SMCA_BANK_TYPES;
@@ -760,7 +760,7 @@ bool amd_mce_is_memory_error(struct mce *m)
u8 xec = (m->status >> 16) & 0x1f;
if (mce_flags.smca)
- return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
+ return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;
return m->bank == 4 && xec == 0x8;
}
@@ -1063,7 +1063,7 @@ static struct kobj_type threshold_ktype = {
static const char *get_name(unsigned int bank, struct threshold_block *b)
{
- unsigned int bank_type;
+ enum smca_bank_types bank_type;
if (!mce_flags.smca) {
if (b && bank == 4)
@@ -1072,11 +1072,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return th_names[bank];
}
- if (!smca_banks[bank].hwid)
+ bank_type = smca_get_bank_type(bank);
+ if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;
- bank_type = smca_banks[bank].hwid->bank_type;
-
if (b && bank_type == SMCA_UMC) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
--
2.13.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
` (4 preceding siblings ...)
2018-02-21 10:18 ` [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type() Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
2018-02-21 18:00 ` [tip:ras/core] x86/mce/AMD, " tip-bot for Yazen Ghannam
2018-02-21 10:18 ` [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block Borislav Petkov
2018-02-21 10:19 ` [PATCH 8/8] x86/MCE/AMD: Carve out SMCA get_block_address() code Borislav Petkov
7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
To: X86 ML; +Cc: Tony Luck, LKML
From: Yazen Ghannam <yazen.ghannam@amd.com>
Currently, bank 4 is reserved on Fam17h, so we chose not to initialize
bank 4 in the smca_banks array. This means that when we check if a bank
is initialized, like during boot or resume, we will see that bank 4 is
not initialized and try to initialize it.
This will cause a call trace, when resuming from suspend, due to
rdmsr_*on_cpu() calls in the init path. The rdmsr_*on_cpu() calls issue
an IPI but we're running with interrupts disabled. This triggers:
WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0
...
Reserved banks will be read-as-zero, so their MCA_IPID register will be
zero. So, like the smca_banks array, the threshold_banks array will not
have an entry for a reserved bank since all its MCA_MISC* registers will
be zero.
Enumerate a "Reserved" bank type that matches on a HWID_MCATYPE of 0,0.
Use the "Reserved" type when checking if a bank is reserved. It's
possible that other bank numbers may be reserved on future systems.
Don't try to find the block address on reserved banks.
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Cc: <stable@vger.kernel.org> # 4.14.x
Link: https://lkml.kernel.org/r/20180215210943.11530-2-Yazen.Ghannam@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/include/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce_amd.c | 7 +++++++
drivers/edac/mce_amd.c | 11 +++++++----
3 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c3fb9a792e13..8c7b3e5a2d01 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -294,6 +294,7 @@ enum smca_bank_types {
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
+ SMCA_RESERVED, /* Reserved */
SMCA_EX, /* Execution Unit */
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 7fbb19cb1859..d8ba9d0c3f01 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = {
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
[SMCA_DE] = { "decode_unit", "Decode Unit" },
+ [SMCA_RESERVED] = { "reserved", "Reserved" },
[SMCA_EX] = { "execution_unit", "Execution Unit" },
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
@@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */
+ /* Reserved type */
+ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
@@ -433,6 +437,9 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
u32 addr = 0, offset = 0;
if (mce_flags.smca) {
+ if (smca_get_bank_type(bank) == SMCA_RESERVED)
+ return addr;
+
if (!block) {
addr = MSR_AMD64_SMCA_MCx_MISC(bank);
} else {
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index a11a671c7a38..2ab4d61ee47e 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -854,21 +854,24 @@ static void decode_mc6_mce(struct mce *m)
static void decode_smca_error(struct mce *m)
{
struct smca_hwid *hwid;
- unsigned int bank_type;
+ enum smca_bank_types bank_type;
const char *ip_name;
u8 xec = XEC(m->status, xec_mask);
if (m->bank >= ARRAY_SIZE(smca_banks))
return;
- if (x86_family(m->cpuid) >= 0x17 && m->bank == 4)
- pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
-
hwid = smca_banks[m->bank].hwid;
if (!hwid)
return;
bank_type = hwid->bank_type;
+
+ if (bank_type == SMCA_RESERVED) {
+ pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
+ return;
+ }
+
ip_name = smca_get_long_name(bank_type);
pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
--
2.13.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
` (5 preceding siblings ...)
2018-02-21 10:18 ` [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type Borislav Petkov
@ 2018-02-21 10:18 ` Borislav Petkov
2018-02-21 18:01 ` [tip:ras/core] x86/mce/AMD: " tip-bot for Yazen Ghannam
2018-02-21 10:19 ` [PATCH 8/8] x86/MCE/AMD: Carve out SMCA get_block_address() code Borislav Petkov
7 siblings, 1 reply; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:18 UTC (permalink / raw)
To: X86 ML; +Cc: Tony Luck, LKML
From: Yazen Ghannam <yazen.ghannam@amd.com>
The block address is saved after the block is initialized when
threshold_init_device() is called.
Use the saved block address, if available, rather than trying to
rediscover it.
This will avoid a call trace, when resuming from suspend, due to the
rdmsr_safe_on_cpu() call in get_block_address(). The rdmsr_safe_on_cpu()
call issues an IPI but we're running with interrupts disabled. This
triggers:
WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Cc: <stable@vger.kernel.org> # 4.14.x
Link: https://lkml.kernel.org/r/20180215210943.11530-3-Yazen.Ghannam@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index d8ba9d0c3f01..12bc2863a4d6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
{
u32 addr = 0, offset = 0;
+ if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ return addr;
+
+ /* Get address from already initialized block. */
+ if (per_cpu(threshold_banks, cpu)) {
+ struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
+
+ if (bankp && bankp->blocks) {
+ struct threshold_block *blockp = &bankp->blocks[block];
+
+ if (blockp)
+ return blockp->address;
+ }
+ }
+
if (mce_flags.smca) {
if (smca_get_bank_type(bank) == SMCA_RESERVED)
return addr;
--
2.13.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 8/8] x86/MCE/AMD: Carve out SMCA get_block_address() code
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
` (6 preceding siblings ...)
2018-02-21 10:18 ` [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block Borislav Petkov
@ 2018-02-21 10:19 ` Borislav Petkov
7 siblings, 0 replies; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 10:19 UTC (permalink / raw)
To: X86 ML; +Cc: Tony Luck, LKML
From: Yazen Ghannam <yazen.ghannam@amd.com>
Carve out the SMCA code in get_block_address() into a separate helper
function.
No functional change.
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/20180215210943.11530-4-Yazen.Ghannam@amd.com
[ Save an indentation level. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 57 ++++++++++++++++++++----------------
1 file changed, 31 insertions(+), 26 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 12bc2863a4d6..f7666eef4a87 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -431,6 +431,35 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
+static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
+ unsigned int block)
+{
+ u32 low, high;
+ u32 addr = 0;
+
+ if (smca_get_bank_type(bank) == SMCA_RESERVED)
+ return addr;
+
+ if (!block)
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+ /*
+ * For SMCA enabled processors, BLKPTR field of the first MISC register
+ * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
+ */
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return addr;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return addr;
+
+ if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ (low & MASK_BLKPTR_LO))
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+
+ return addr;
+}
+
static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
@@ -451,32 +480,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
}
}
- if (mce_flags.smca) {
- if (smca_get_bank_type(bank) == SMCA_RESERVED)
- return addr;
-
- if (!block) {
- addr = MSR_AMD64_SMCA_MCx_MISC(bank);
- } else {
- /*
- * For SMCA enabled processors, BLKPTR field of the
- * first MISC register (MCx_MISC0) indicates presence of
- * additional MISC register set (MISC1-4).
- */
- u32 low, high;
-
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
-
- if (!(low & MCI_CONFIG_MCAX))
- return addr;
-
- if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
- (low & MASK_BLKPTR_LO))
- addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
- }
- return addr;
- }
+ if (mce_flags.smca)
+ return smca_get_block_address(cpu, bank, block);
/* Fall back to method we used for older processors: */
switch (block) {
--
2.13.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header
2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
@ 2018-02-21 11:39 ` Ingo Molnar
2018-02-21 13:28 ` Borislav Petkov
2018-02-21 17:58 ` [tip:ras/core] x86/mce: " tip-bot for Borislav Petkov
1 sibling, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2018-02-21 11:39 UTC (permalink / raw)
To: Borislav Petkov; +Cc: X86 ML, Tony Luck, LKML
* Borislav Petkov <bp@alien8.de> wrote:
> From: Borislav Petkov <bp@suse.de>
>
> ... because they don't need to be exported outside of MCE.
>
> Signed-off-by: Borislav Petkov <bp@suse.de>
> ---
> arch/x86/include/asm/mce.h | 52 -------------------------------
> arch/x86/kernel/cpu/mcheck/mce-internal.h | 52 +++++++++++++++++++++++++++++++
> 2 files changed, 52 insertions(+), 52 deletions(-)
Hm, this doesn't apply to latest upstream anymore:
Hunk #1 FAILED at 113.
1 out of 1 hunk FAILED -- saving rejects to file arch/x86/kernel/cpu/mcheck/mce-internal.h.rej
mind forward porting it?
Thanks,
Ingo
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header
2018-02-21 11:39 ` Ingo Molnar
@ 2018-02-21 13:28 ` Borislav Petkov
0 siblings, 0 replies; 18+ messages in thread
From: Borislav Petkov @ 2018-02-21 13:28 UTC (permalink / raw)
To: Ingo Molnar; +Cc: X86 ML, Tony Luck, LKML
On Wed, Feb 21, 2018 at 12:39:05PM +0100, Ingo Molnar wrote:
> Hm, this doesn't apply to latest upstream anymore:
>
> Hunk #1 FAILED at 113.
> 1 out of 1 hunk FAILED -- saving rejects to file arch/x86/kernel/cpu/mcheck/mce-internal.h.rej
>
> mind forward porting it?
Ah, you took Tony's patch in the meantime. Ok, here's a new version.
The others I'm not sending again as they should apply ok with a small
offset.
Thx.
---
From: Borislav Petkov <bp@suse.de>
Date: Mon, 20 Nov 2017 17:18:25 +0100
Subject: [PATCH] x86/MCE: Put private structures and definitions into the internal header
... because they don't need to be exported outside of MCE.
Signed-off-by: Borislav Petkov <bp@suse.de>
---
arch/x86/include/asm/mce.h | 52 ----------------------------
arch/x86/kernel/cpu/mcheck/mce-internal.h | 56 +++++++++++++++++++++++++++++--
2 files changed, 54 insertions(+), 54 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 96ea4b5ba658..c3fb9a792e13 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -138,58 +138,6 @@ struct mce_log_buffer {
struct mce entry[MCE_LOG_LEN];
};
-struct mca_config {
- bool dont_log_ce;
- bool cmci_disabled;
- bool lmce_disabled;
- bool ignore_ce;
- bool disabled;
- bool ser;
- bool recovery;
- bool bios_cmci_threshold;
- u8 banks;
- s8 bootlog;
- int tolerant;
- int monarch_timeout;
- int panic_timeout;
- u32 rip_msr;
-};
-
-struct mce_vendor_flags {
- /*
- * Indicates that overflow conditions are not fatal, when set.
- */
- __u64 overflow_recov : 1,
-
- /*
- * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
- * Recovery. It indicates support for data poisoning in HW and deferred
- * error interrupts.
- */
- succor : 1,
-
- /*
- * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
- * the register space for each MCA bank and also increases number of
- * banks. Also, to accommodate the new banks and registers, the MCA
- * register space is moved to a new MSR range.
- */
- smca : 1,
-
- __reserved_0 : 61;
-};
-
-struct mca_msr_regs {
- u32 (*ctl) (int bank);
- u32 (*status) (int bank);
- u32 (*addr) (int bank);
- u32 (*misc) (int bank);
-};
-
-extern struct mce_vendor_flags mce_flags;
-
-extern struct mca_msr_regs msr_ops;
-
enum mce_notifier_prios {
MCE_PRIO_FIRST = INT_MAX,
MCE_PRIO_SRAO = INT_MAX - 1,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index e956eb267061..ed3c5aee22cf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -113,8 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb) { }
static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
#endif
-extern struct mca_config mca_cfg;
-
#ifndef CONFIG_X86_64
/*
* On 32-bit systems it would be difficult to safely unmap a poison page
@@ -130,4 +128,58 @@ static inline void mce_unmap_kpfn(unsigned long pfn) {}
#define mce_unmap_kpfn mce_unmap_kpfn
#endif
+struct mca_config {
+ bool dont_log_ce;
+ bool cmci_disabled;
+ bool lmce_disabled;
+ bool ignore_ce;
+ bool disabled;
+ bool ser;
+ bool recovery;
+ bool bios_cmci_threshold;
+ u8 banks;
+ s8 bootlog;
+ int tolerant;
+ int monarch_timeout;
+ int panic_timeout;
+ u32 rip_msr;
+};
+
+extern struct mca_config mca_cfg;
+
+struct mce_vendor_flags {
+ /*
+ * Indicates that overflow conditions are not fatal, when set.
+ */
+ __u64 overflow_recov : 1,
+
+ /*
+ * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+ * Recovery. It indicates support for data poisoning in HW and deferred
+ * error interrupts.
+ */
+ succor : 1,
+
+ /*
+ * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+ * the register space for each MCA bank and also increases number of
+ * banks. Also, to accommodate the new banks and registers, the MCA
+ * register space is moved to a new MSR range.
+ */
+ smca : 1,
+
+ __reserved_0 : 61;
+};
+
+extern struct mce_vendor_flags mce_flags;
+
+struct mca_msr_regs {
+ u32 (*ctl) (int bank);
+ u32 (*status) (int bank);
+ u32 (*addr) (int bank);
+ u32 (*misc) (int bank);
+};
+
+extern struct mca_msr_regs msr_ops;
+
#endif /* __X86_MCE_INTERNAL_H__ */
--
2.13.0
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [tip:ras/core] x86/mce: Put private structures and definitions into the internal header
2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
2018-02-21 11:39 ` Ingo Molnar
@ 2018-02-21 17:58 ` tip-bot for Borislav Petkov
1 sibling, 0 replies; 18+ messages in thread
From: tip-bot for Borislav Petkov @ 2018-02-21 17:58 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, linux-edac, torvalds, tony.luck, peterz, bp, hpa,
bp, tglx, mingo
Commit-ID: a189c03235639a31343215f82b83b49985c55336
Gitweb: https://git.kernel.org/tip/a189c03235639a31343215f82b83b49985c55336
Author: Borislav Petkov <bp@suse.de>
AuthorDate: Wed, 21 Feb 2018 11:18:53 +0100
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:53 +0100
x86/mce: Put private structures and definitions into the internal header
... because they don't need to be exported outside of MCE.
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-2-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/include/asm/mce.h | 52 ----------------------------
arch/x86/kernel/cpu/mcheck/mce-internal.h | 56 +++++++++++++++++++++++++++++--
2 files changed, 54 insertions(+), 54 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 96ea4b5..c3fb9a7 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -138,58 +138,6 @@ struct mce_log_buffer {
struct mce entry[MCE_LOG_LEN];
};
-struct mca_config {
- bool dont_log_ce;
- bool cmci_disabled;
- bool lmce_disabled;
- bool ignore_ce;
- bool disabled;
- bool ser;
- bool recovery;
- bool bios_cmci_threshold;
- u8 banks;
- s8 bootlog;
- int tolerant;
- int monarch_timeout;
- int panic_timeout;
- u32 rip_msr;
-};
-
-struct mce_vendor_flags {
- /*
- * Indicates that overflow conditions are not fatal, when set.
- */
- __u64 overflow_recov : 1,
-
- /*
- * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
- * Recovery. It indicates support for data poisoning in HW and deferred
- * error interrupts.
- */
- succor : 1,
-
- /*
- * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
- * the register space for each MCA bank and also increases number of
- * banks. Also, to accommodate the new banks and registers, the MCA
- * register space is moved to a new MSR range.
- */
- smca : 1,
-
- __reserved_0 : 61;
-};
-
-struct mca_msr_regs {
- u32 (*ctl) (int bank);
- u32 (*status) (int bank);
- u32 (*addr) (int bank);
- u32 (*misc) (int bank);
-};
-
-extern struct mce_vendor_flags mce_flags;
-
-extern struct mca_msr_regs msr_ops;
-
enum mce_notifier_prios {
MCE_PRIO_FIRST = INT_MAX,
MCE_PRIO_SRAO = INT_MAX - 1,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index e956eb2..ed3c5ae 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -113,8 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb) { }
static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
#endif
-extern struct mca_config mca_cfg;
-
#ifndef CONFIG_X86_64
/*
* On 32-bit systems it would be difficult to safely unmap a poison page
@@ -130,4 +128,58 @@ static inline void mce_unmap_kpfn(unsigned long pfn) {}
#define mce_unmap_kpfn mce_unmap_kpfn
#endif
+struct mca_config {
+ bool dont_log_ce;
+ bool cmci_disabled;
+ bool lmce_disabled;
+ bool ignore_ce;
+ bool disabled;
+ bool ser;
+ bool recovery;
+ bool bios_cmci_threshold;
+ u8 banks;
+ s8 bootlog;
+ int tolerant;
+ int monarch_timeout;
+ int panic_timeout;
+ u32 rip_msr;
+};
+
+extern struct mca_config mca_cfg;
+
+struct mce_vendor_flags {
+ /*
+ * Indicates that overflow conditions are not fatal, when set.
+ */
+ __u64 overflow_recov : 1,
+
+ /*
+ * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+ * Recovery. It indicates support for data poisoning in HW and deferred
+ * error interrupts.
+ */
+ succor : 1,
+
+ /*
+ * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+ * the register space for each MCA bank and also increases number of
+ * banks. Also, to accommodate the new banks and registers, the MCA
+ * register space is moved to a new MSR range.
+ */
+ smca : 1,
+
+ __reserved_0 : 61;
+};
+
+extern struct mce_vendor_flags mce_flags;
+
+struct mca_msr_regs {
+ u32 (*ctl) (int bank);
+ u32 (*status) (int bank);
+ u32 (*addr) (int bank);
+ u32 (*misc) (int bank);
+};
+
+extern struct mca_msr_regs msr_ops;
+
#endif /* __X86_MCE_INTERNAL_H__ */
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [tip:ras/core] x86/mce: Convert 'struct mca_config' bools to a bitfield
2018-02-21 10:18 ` [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield Borislav Petkov
@ 2018-02-21 17:58 ` tip-bot for Borislav Petkov
0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Borislav Petkov @ 2018-02-21 17:58 UTC (permalink / raw)
To: linux-tip-commits
Cc: bp, peterz, hpa, linux-kernel, torvalds, mingo, tglx, bp,
linux-edac, tony.luck
Commit-ID: 09933946643bcc8e0a9bd4ede192ed854e22db8f
Gitweb: https://git.kernel.org/tip/09933946643bcc8e0a9bd4ede192ed854e22db8f
Author: Borislav Petkov <bp@suse.de>
AuthorDate: Wed, 21 Feb 2018 11:18:54 +0100
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:53 +0100
x86/mce: Convert 'struct mca_config' bools to a bitfield
... to save space when future flags are added.
No functionality change.
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-3-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/kernel/cpu/mcheck/mce-internal.h | 13 ++++++++-----
arch/x86/kernel/cpu/mcheck/mce.c | 16 ++++++++--------
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index ed3c5ae..374d1aa 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -131,12 +131,15 @@ static inline void mce_unmap_kpfn(unsigned long pfn) {}
struct mca_config {
bool dont_log_ce;
bool cmci_disabled;
- bool lmce_disabled;
bool ignore_ce;
- bool disabled;
- bool ser;
- bool recovery;
- bool bios_cmci_threshold;
+
+ __u64 lmce_disabled : 1,
+ disabled : 1,
+ ser : 1,
+ recovery : 1,
+ bios_cmci_threshold : 1,
+ __reserved : 59;
+
u8 banks;
s8 bootlog;
int tolerant;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8ff94d1..db5b1e4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1511,7 +1511,7 @@ static int __mcheck_cpu_cap_init(void)
mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
if (cap & MCG_SER_P)
- mca_cfg.ser = true;
+ mca_cfg.ser = 1;
return 0;
}
@@ -1819,12 +1819,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
return;
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
- mca_cfg.disabled = true;
+ mca_cfg.disabled = 1;
return;
}
if (mce_gen_pool_init()) {
- mca_cfg.disabled = true;
+ mca_cfg.disabled = 1;
pr_emerg("Couldn't allocate MCE records pool!\n");
return;
}
@@ -1902,11 +1902,11 @@ static int __init mcheck_enable(char *str)
if (*str == '=')
str++;
if (!strcmp(str, "off"))
- cfg->disabled = true;
+ cfg->disabled = 1;
else if (!strcmp(str, "no_cmci"))
cfg->cmci_disabled = true;
else if (!strcmp(str, "no_lmce"))
- cfg->lmce_disabled = true;
+ cfg->lmce_disabled = 1;
else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce"))
@@ -1914,9 +1914,9 @@ static int __init mcheck_enable(char *str)
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
- cfg->bios_cmci_threshold = true;
+ cfg->bios_cmci_threshold = 1;
else if (!strcmp(str, "recovery"))
- cfg->recovery = true;
+ cfg->recovery = 1;
else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
@@ -2381,7 +2381,7 @@ device_initcall_sync(mcheck_init_device);
*/
static int __init mcheck_disable(char *str)
{
- mca_cfg.disabled = true;
+ mca_cfg.disabled = 1;
return 1;
}
__setup("nomce", mcheck_disable);
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [tip:ras/core] x86/mce: Issue the 'mcelog --ascii' message only on !AMD
2018-02-21 10:18 ` [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD Borislav Petkov
@ 2018-02-21 17:59 ` tip-bot for Borislav Petkov
0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Borislav Petkov @ 2018-02-21 17:59 UTC (permalink / raw)
To: linux-tip-commits
Cc: tony.luck, bp, hpa, bp, tglx, peterz, torvalds, linux-kernel,
linux-edac, mingo
Commit-ID: b2fbf6f282147b42d669a4bd4a7b1de2e2d6a792
Gitweb: https://git.kernel.org/tip/b2fbf6f282147b42d669a4bd4a7b1de2e2d6a792
Author: Borislav Petkov <bp@suse.de>
AuthorDate: Wed, 21 Feb 2018 11:18:55 +0100
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:53 +0100
x86/mce: Issue the 'mcelog --ascii' message only on !AMD
mcelog cannot decode AMD MCEs.
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-4-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/kernel/cpu/mcheck/mce.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index db5b1e4..d7dff23 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -268,7 +268,9 @@ static void __print_mce(struct mce *m)
static void print_mce(struct mce *m)
{
__print_mce(m);
- pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
+
+ if (m->cpuvendor != X86_VENDOR_AMD)
+ pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
}
#define PANIC_TIMEOUT 5 /* 5 seconds */
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [tip:ras/core] x86/mce/AMD: Collect error info even if valid bits are not set
2018-02-21 10:18 ` [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set Borislav Petkov
@ 2018-02-21 17:59 ` tip-bot for Borislav Petkov
0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Borislav Petkov @ 2018-02-21 17:59 UTC (permalink / raw)
To: linux-tip-commits
Cc: bp, bp, hpa, peterz, tglx, mingo, torvalds, linux-edac,
tony.luck, linux-kernel
Commit-ID: 4b1e84276a6172980c5bf39aa091ba13e90d6dad
Gitweb: https://git.kernel.org/tip/4b1e84276a6172980c5bf39aa091ba13e90d6dad
Author: Borislav Petkov <bp@suse.de>
AuthorDate: Wed, 21 Feb 2018 11:18:56 +0100
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:54 +0100
x86/mce/AMD: Collect error info even if valid bits are not set
The MCA banks log error info into MCA_ADDR, MCA_MISC0, and MCA_SYND even
if the corresponding valid bits are not set:
"Error handlers should save the values in MCA_ADDR, MCA_MISC0,
and MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
MCA_STATUS[SyndV] are zero."
Do so by setting those bits so that code down the MCE processing path
doesn't need to be changed.
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-5-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/kernel/cpu/mcheck/mce.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d7dff23..3c9a25b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -441,6 +441,20 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
if (mca_cfg.rip_msr)
m->ip = mce_rdmsrl(mca_cfg.rip_msr);
}
+
+ /*
+ * Error handlers should save the values in MCA_ADDR, MCA_MISC0, and
+ * MCA_SYND even if MCA_STATUS[AddrV], MCA_STATUS[MiscV], and
+ * MCA_STATUS[SyndV] are zero.
+ */
+ if (m->cpuvendor == X86_VENDOR_AMD) {
+ u64 status = MCI_STATUS_ADDRV | MCI_STATUS_MISCV;
+
+ if (mce_flags.smca)
+ status |= MCI_STATUS_SYNDV;
+
+ m->status |= status;
+ }
}
int mce_available(struct cpuinfo_x86 *c)
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [tip:ras/core] x86/mce/AMD: Pass the bank number to smca_get_bank_type()
2018-02-21 10:18 ` [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type() Borislav Petkov
@ 2018-02-21 18:00 ` tip-bot for Yazen Ghannam
0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Yazen Ghannam @ 2018-02-21 18:00 UTC (permalink / raw)
To: linux-tip-commits
Cc: hpa, linux-edac, bp, linux-kernel, tony.luck, mingo, peterz,
yazen.ghannam, bp, tglx, torvalds
Commit-ID: e5d6a126d4c473499f354254a15ca0c2d8c84ca3
Gitweb: https://git.kernel.org/tip/e5d6a126d4c473499f354254a15ca0c2d8c84ca3
Author: Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Wed, 21 Feb 2018 11:18:57 +0100
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:54 +0100
x86/mce/AMD: Pass the bank number to smca_get_bank_type()
Pass the bank number to smca_get_bank_type() since that's all we need.
Also, we should compare the bank number to MAX_NR_BANKS (size of the
smca_banks array) not the number of bank types. Bank types are reused
for multiple banks, so the number of types can be different from the
number of banks in a system and thus we could return an invalid bank
type.
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # 4.14.x
Cc: <stable@vger.kernel.org> # 4.14.x: 11cf887728a3 x86/MCE/AMD: Define a function to get SMCA bank type
Cc: <stable@vger.kernel.org> # 4.14.x: c6708d50f166 x86/MCE: Report only DRAM ECC as memory errors on AMD systems
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-6-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 0f32ad2..7fbb19c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,14 +110,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);
-static enum smca_bank_types smca_get_bank_type(struct mce *m)
+static enum smca_bank_types smca_get_bank_type(unsigned int bank)
{
struct smca_bank *b;
- if (m->bank >= N_SMCA_BANK_TYPES)
+ if (bank >= MAX_NR_BANKS)
return N_SMCA_BANK_TYPES;
- b = &smca_banks[m->bank];
+ b = &smca_banks[bank];
if (!b->hwid)
return N_SMCA_BANK_TYPES;
@@ -760,7 +760,7 @@ bool amd_mce_is_memory_error(struct mce *m)
u8 xec = (m->status >> 16) & 0x1f;
if (mce_flags.smca)
- return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
+ return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;
return m->bank == 4 && xec == 0x8;
}
@@ -1063,7 +1063,7 @@ static struct kobj_type threshold_ktype = {
static const char *get_name(unsigned int bank, struct threshold_block *b)
{
- unsigned int bank_type;
+ enum smca_bank_types bank_type;
if (!mce_flags.smca) {
if (b && bank == 4)
@@ -1072,11 +1072,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return th_names[bank];
}
- if (!smca_banks[bank].hwid)
+ bank_type = smca_get_bank_type(bank);
+ if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;
- bank_type = smca_banks[bank].hwid->bank_type;
-
if (b && bank_type == SMCA_UMC) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [tip:ras/core] x86/mce/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type
2018-02-21 10:18 ` [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type Borislav Petkov
@ 2018-02-21 18:00 ` tip-bot for Yazen Ghannam
0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Yazen Ghannam @ 2018-02-21 18:00 UTC (permalink / raw)
To: linux-tip-commits
Cc: tglx, mingo, yazen.ghannam, peterz, bp, bp, linux-kernel,
tony.luck, linux-edac, torvalds, hpa
Commit-ID: 68627a697c195937672ce07683094c72b1174786
Gitweb: https://git.kernel.org/tip/68627a697c195937672ce07683094c72b1174786
Author: Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Wed, 21 Feb 2018 11:18:58 +0100
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:54 +0100
x86/mce/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type
Currently, bank 4 is reserved on Fam17h, so we chose not to initialize
bank 4 in the smca_banks array. This means that when we check if a bank
is initialized, like during boot or resume, we will see that bank 4 is
not initialized and try to initialize it.
This will cause a call trace, when resuming from suspend, due to
rdmsr_*on_cpu() calls in the init path. The rdmsr_*on_cpu() calls issue
an IPI but we're running with interrupts disabled. This triggers:
WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0
...
Reserved banks will be read-as-zero, so their MCA_IPID register will be
zero. So, like the smca_banks array, the threshold_banks array will not
have an entry for a reserved bank since all its MCA_MISC* registers will
be zero.
Enumerate a "Reserved" bank type that matches on a HWID_MCATYPE of 0,0.
Use the "Reserved" type when checking if a bank is reserved. It's
possible that other bank numbers may be reserved on future systems.
Don't try to find the block address on reserved banks.
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # 4.14.x
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-7-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/include/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce_amd.c | 7 +++++++
drivers/edac/mce_amd.c | 11 +++++++----
3 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c3fb9a7..8c7b3e5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -294,6 +294,7 @@ enum smca_bank_types {
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
+ SMCA_RESERVED, /* Reserved */
SMCA_EX, /* Execution Unit */
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 7fbb19c..d8ba9d0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = {
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
[SMCA_DE] = { "decode_unit", "Decode Unit" },
+ [SMCA_RESERVED] = { "reserved", "Reserved" },
[SMCA_EX] = { "execution_unit", "Execution Unit" },
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
@@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */
+ /* Reserved type */
+ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
@@ -433,6 +437,9 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
u32 addr = 0, offset = 0;
if (mce_flags.smca) {
+ if (smca_get_bank_type(bank) == SMCA_RESERVED)
+ return addr;
+
if (!block) {
addr = MSR_AMD64_SMCA_MCx_MISC(bank);
} else {
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index a11a671..2ab4d61 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -854,21 +854,24 @@ static void decode_mc6_mce(struct mce *m)
static void decode_smca_error(struct mce *m)
{
struct smca_hwid *hwid;
- unsigned int bank_type;
+ enum smca_bank_types bank_type;
const char *ip_name;
u8 xec = XEC(m->status, xec_mask);
if (m->bank >= ARRAY_SIZE(smca_banks))
return;
- if (x86_family(m->cpuid) >= 0x17 && m->bank == 4)
- pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
-
hwid = smca_banks[m->bank].hwid;
if (!hwid)
return;
bank_type = hwid->bank_type;
+
+ if (bank_type == SMCA_RESERVED) {
+ pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
+ return;
+ }
+
ip_name = smca_get_long_name(bank_type);
pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [tip:ras/core] x86/mce/AMD: Get address from already initialized block
2018-02-21 10:18 ` [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block Borislav Petkov
@ 2018-02-21 18:01 ` tip-bot for Yazen Ghannam
0 siblings, 0 replies; 18+ messages in thread
From: tip-bot for Yazen Ghannam @ 2018-02-21 18:01 UTC (permalink / raw)
To: linux-tip-commits
Cc: bp, linux-edac, peterz, mingo, tony.luck, hpa, tglx, bp,
yazen.ghannam, torvalds, linux-kernel
Commit-ID: 27bd59502702fe51d9eb00450a75b727ec6bfcb4
Gitweb: https://git.kernel.org/tip/27bd59502702fe51d9eb00450a75b727ec6bfcb4
Author: Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Wed, 21 Feb 2018 11:18:59 +0100
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 21 Feb 2018 17:00:55 +0100
x86/mce/AMD: Get address from already initialized block
The block address is saved after the block is initialized when
threshold_init_device() is called.
Use the saved block address, if available, rather than trying to
rediscover it.
This will avoid a call trace, when resuming from suspend, due to the
rdmsr_safe_on_cpu() call in get_block_address(). The rdmsr_safe_on_cpu()
call issues an IPI but we're running with interrupts disabled. This
triggers:
WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # 4.14.x
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20180221101900.10326-8-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index d8ba9d0..12bc286 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
{
u32 addr = 0, offset = 0;
+ if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ return addr;
+
+ /* Get address from already initialized block. */
+ if (per_cpu(threshold_banks, cpu)) {
+ struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
+
+ if (bankp && bankp->blocks) {
+ struct threshold_block *blockp = &bankp->blocks[block];
+
+ if (blockp)
+ return blockp->address;
+ }
+ }
+
if (mce_flags.smca) {
if (smca_get_bank_type(bank) == SMCA_RESERVED)
return addr;
^ permalink raw reply related [flat|nested] 18+ messages in thread
end of thread, other threads:[~2018-02-21 18:02 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-21 10:18 [PATCH 0/8] x86/RAS: Some accumulated stuff Borislav Petkov
2018-02-21 10:18 ` [PATCH 1/8] x86/MCE: Put private structures and definitions into the internal header Borislav Petkov
2018-02-21 11:39 ` Ingo Molnar
2018-02-21 13:28 ` Borislav Petkov
2018-02-21 17:58 ` [tip:ras/core] x86/mce: " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 2/8] x86/MCE: Convert mca_config bools to a bitfield Borislav Petkov
2018-02-21 17:58 ` [tip:ras/core] x86/mce: Convert 'struct mca_config' " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 3/8] x86/mce: Issue the mcelog --ascii message on !AMD Borislav Petkov
2018-02-21 17:59 ` [tip:ras/core] x86/mce: Issue the 'mcelog --ascii' message only " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 4/8] x86/MCE/AMD: Collect error info even if valid bits are not set Borislav Petkov
2018-02-21 17:59 ` [tip:ras/core] x86/mce/AMD: " tip-bot for Borislav Petkov
2018-02-21 10:18 ` [PATCH 5/8] x86/MCE/AMD: Pass the bank number to smca_get_bank_type() Borislav Petkov
2018-02-21 18:00 ` [tip:ras/core] x86/mce/AMD: " tip-bot for Yazen Ghannam
2018-02-21 10:18 ` [PATCH 6/8] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type Borislav Petkov
2018-02-21 18:00 ` [tip:ras/core] x86/mce/AMD, " tip-bot for Yazen Ghannam
2018-02-21 10:18 ` [PATCH 7/8] x86/MCE/AMD: Get address from already initialized block Borislav Petkov
2018-02-21 18:01 ` [tip:ras/core] x86/mce/AMD: " tip-bot for Yazen Ghannam
2018-02-21 10:19 ` [PATCH 8/8] x86/MCE/AMD: Carve out SMCA get_block_address() code Borislav Petkov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).