linux-edac.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] x86/mce: Support extended MCA_ADDR address on SMCA systems
@ 2021-06-08 22:10 Smita Koralahalli
  2021-06-08 22:10 ` [PATCH 1/2] x86/mce: Define function to extract ErrorAddr from MCA_ADDR Smita Koralahalli
  2021-06-08 22:10 ` [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes Smita Koralahalli
  0 siblings, 2 replies; 8+ messages in thread
From: Smita Koralahalli @ 2021-06-08 22:10 UTC (permalink / raw)
  To: x86, linux-kernel, linux-edac
  Cc: Borislav Petkov, Tony Luck, Yazen Ghannam, Muralidhara M K,
	Akshay Gupta, Youquan Song, Zhen Lei, Thomas Gleixner,
	Ingo Molnar, H . Peter Anvin, Smita Koralahalli

This series of patches adds support for extended physical address on newer
AMD processors such as AMD 'Milan'.

The first patch defines a separate helper function to extract
MCA_ADDR[ErrorAddr].

The second patch adds support for extended ErrorAddr bits in MCA_ADDR.

Smita Koralahalli (2):
  x86/mce: Define function to extract ErrorAddr from MCA_ADDR
  x86/mce: Add support for Extended Physical Address MCA changes

 arch/x86/include/asm/mce.h     |  4 ++++
 arch/x86/kernel/cpu/mce/amd.c  | 41 ++++++++++++++++++++++++++--------
 arch/x86/kernel/cpu/mce/core.c | 13 ++++-------
 3 files changed, 40 insertions(+), 18 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] x86/mce: Define function to extract ErrorAddr from MCA_ADDR
  2021-06-08 22:10 [PATCH 0/2] x86/mce: Support extended MCA_ADDR address on SMCA systems Smita Koralahalli
@ 2021-06-08 22:10 ` Smita Koralahalli
  2021-06-08 22:10 ` [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes Smita Koralahalli
  1 sibling, 0 replies; 8+ messages in thread
From: Smita Koralahalli @ 2021-06-08 22:10 UTC (permalink / raw)
  To: x86, linux-kernel, linux-edac
  Cc: Borislav Petkov, Tony Luck, Yazen Ghannam, Muralidhara M K,
	Akshay Gupta, Youquan Song, Zhen Lei, Thomas Gleixner,
	Ingo Molnar, H . Peter Anvin, Smita Koralahalli

Move MCA_ADDR[ErrorAddr] extraction into a separate helper function. This
will be further refactored in the next patch.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
 arch/x86/include/asm/mce.h     |  2 ++
 arch/x86/kernel/cpu/mce/amd.c  | 14 +++++++++-----
 arch/x86/kernel/cpu/mce/core.c |  7 ++-----
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0607ec4f5091..0a1c7224a582 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -357,6 +357,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
 
 void mce_amd_feature_init(struct cpuinfo_x86 *c);
 int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
+void smca_extract_err_addr(struct mce *m);
 
 #else
 
@@ -366,6 +367,7 @@ static inline bool amd_mce_is_memory_error(struct mce *m)		{ return false; };
 static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)		{ }
 static inline int
 umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)	{ return -EINVAL; };
+static inline void smca_extract_err_addr(struct mce *m)			{ }
 #endif
 
 static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c)	{ return mce_amd_feature_init(c); }
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 08831acc1d03..f71435e53cdb 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -899,6 +899,13 @@ bool amd_mce_is_memory_error(struct mce *m)
 	return m->bank == 4 && xec == 0x8;
 }
 
+void smca_extract_err_addr(struct mce *m)
+{
+	u8 lsb = (m->addr >> 56) & 0x3f;
+
+	m->addr &= GENMASK_ULL(55, lsb);
+}
+
 static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 {
 	struct mce m;
@@ -917,11 +924,8 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 		 * Extract [55:<lsb>] where lsb is the least significant
 		 * *valid* bit of the address bits.
 		 */
-		if (mce_flags.smca) {
-			u8 lsb = (m.addr >> 56) & 0x3f;
-
-			m.addr &= GENMASK_ULL(55, lsb);
-		}
+		if (mce_flags.smca)
+			smca_extract_err_addr(&m);
 	}
 
 	if (mce_flags.smca) {
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index bf7fe87a7e88..2c09c1eec50a 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -703,11 +703,8 @@ static void mce_read_aux(struct mce *m, int i)
 		 * Extract [55:<lsb>] where lsb is the least significant
 		 * *valid* bit of the address bits.
 		 */
-		if (mce_flags.smca) {
-			u8 lsb = (m->addr >> 56) & 0x3f;
-
-			m->addr &= GENMASK_ULL(55, lsb);
-		}
+		if (mce_flags.smca)
+			smca_extract_err_addr(m);
 	}
 
 	if (mce_flags.smca) {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes
  2021-06-08 22:10 [PATCH 0/2] x86/mce: Support extended MCA_ADDR address on SMCA systems Smita Koralahalli
  2021-06-08 22:10 ` [PATCH 1/2] x86/mce: Define function to extract ErrorAddr from MCA_ADDR Smita Koralahalli
@ 2021-06-08 22:10 ` Smita Koralahalli
  2021-06-10 11:55   ` Borislav Petkov
  1 sibling, 1 reply; 8+ messages in thread
From: Smita Koralahalli @ 2021-06-08 22:10 UTC (permalink / raw)
  To: x86, linux-kernel, linux-edac
  Cc: Borislav Petkov, Tony Luck, Yazen Ghannam, Muralidhara M K,
	Akshay Gupta, Youquan Song, Zhen Lei, Thomas Gleixner,
	Ingo Molnar, H . Peter Anvin, Smita Koralahalli

Newer AMD processors such as AMD 'Milan' support more physical address
bits.

That is the MCA_ADDR registers on Scalable MCA systems contain the
ErrorAddr in bits [56:0] instead of [55:0]. Hence the existing LSB field
from bits [61:56] in MCA_ADDR must be moved around to accommodate the
larger ErrorAddr size.

MCA_CONFIG[McaLsbInStatusSupported] indicates this change. If set, the
LSB field will be found in MCA_STATUS rather than MCA_ADDR.

Each logical CPU has unique MCA bank in hardware and is not shared with
other logical CPUs. Additionally on SMCA systems, each feature bit may be
different for each bank within same logical CPU.

Check for MCA_CONFIG[McaLsbInStatusSupported] for each MCA bank and for
each CPU.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
 arch/x86/include/asm/mce.h     |  2 ++
 arch/x86/kernel/cpu/mce/amd.c  | 31 +++++++++++++++++++++++++------
 arch/x86/kernel/cpu/mce/core.c |  6 ++----
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0a1c7224a582..33c5e77cf924 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -358,6 +358,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
 void mce_amd_feature_init(struct cpuinfo_x86 *c);
 int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
 void smca_extract_err_addr(struct mce *m);
+void smca_feature_init(void);
 
 #else
 
@@ -368,6 +369,7 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)		{ }
 static inline int
 umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)	{ return -EINVAL; };
 static inline void smca_extract_err_addr(struct mce *m)			{ }
+static inline void smca_feature_init(void)				{ }
 #endif
 
 static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c)	{ return mce_amd_feature_init(c); }
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index f71435e53cdb..480a497877e2 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -204,6 +204,12 @@ EXPORT_SYMBOL_GPL(smca_banks);
 #define MAX_MCATYPE_NAME_LEN	30
 static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
 
+struct smca_config {
+	__u64 lsb_in_status     :  1,
+	__reserved_0            : 63;
+};
+static DEFINE_PER_CPU_READ_MOSTLY(struct smca_config[MAX_NR_BANKS], smca_cfg);
+
 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
 
 /*
@@ -901,9 +907,26 @@ bool amd_mce_is_memory_error(struct mce *m)
 
 void smca_extract_err_addr(struct mce *m)
 {
-	u8 lsb = (m->addr >> 56) & 0x3f;
+	if (this_cpu_ptr(smca_cfg)[m->bank].lsb_in_status) {
+		u8 lsb = (m->status >> 24) & 0x3f;
+
+		m->addr &= GENMASK_ULL(56, lsb);
+	} else {
+		u8 lsb = (m->addr >> 56) & 0x3f;
+
+		m->addr &= GENMASK_ULL(55, lsb);
+	}
+}
+
+void smca_feature_init(void)
+{
+	unsigned int bank;
+	u64 mca_cfg;
 
-	m->addr &= GENMASK_ULL(55, lsb);
+	for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
+		rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(bank), mca_cfg);
+		this_cpu_ptr(smca_cfg)[bank].lsb_in_status = !!(mca_cfg & BIT(8));
+	}
 }
 
 static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
@@ -920,10 +943,6 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 	if (m.status & MCI_STATUS_ADDRV) {
 		m.addr = addr;
 
-		/*
-		 * Extract [55:<lsb>] where lsb is the least significant
-		 * *valid* bit of the address bits.
-		 */
 		if (mce_flags.smca)
 			smca_extract_err_addr(&m);
 	}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c09c1eec50a..ce33006e42f8 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -699,10 +699,6 @@ static void mce_read_aux(struct mce *m, int i)
 			m->addr <<= shift;
 		}
 
-		/*
-		 * Extract [55:<lsb>] where lsb is the least significant
-		 * *valid* bit of the address bits.
-		 */
 		if (mce_flags.smca)
 			smca_extract_err_addr(m);
 	}
@@ -1839,6 +1835,8 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
 			msr_ops.status	= smca_status_reg;
 			msr_ops.addr	= smca_addr_reg;
 			msr_ops.misc	= smca_misc_reg;
+
+			smca_feature_init();
 		}
 	}
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes
  2021-06-08 22:10 ` [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes Smita Koralahalli
@ 2021-06-10 11:55   ` Borislav Petkov
  2021-06-11  3:36     ` Smita Koralahalli Channabasappa
  0 siblings, 1 reply; 8+ messages in thread
From: Borislav Petkov @ 2021-06-10 11:55 UTC (permalink / raw)
  To: Smita Koralahalli
  Cc: x86, linux-kernel, linux-edac, Tony Luck, Yazen Ghannam,
	Muralidhara M K, Akshay Gupta, Youquan Song, Zhen Lei,
	Thomas Gleixner, Ingo Molnar, H . Peter Anvin

On Tue, Jun 08, 2021 at 05:10:12PM -0500, Smita Koralahalli wrote:
> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
> index f71435e53cdb..480a497877e2 100644
> --- a/arch/x86/kernel/cpu/mce/amd.c
> +++ b/arch/x86/kernel/cpu/mce/amd.c
> @@ -204,6 +204,12 @@ EXPORT_SYMBOL_GPL(smca_banks);
>  #define MAX_MCATYPE_NAME_LEN	30
>  static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
>  
> +struct smca_config {
> +	__u64 lsb_in_status     :  1,
> +	__reserved_0            : 63;
> +};
> +static DEFINE_PER_CPU_READ_MOSTLY(struct smca_config[MAX_NR_BANKS], smca_cfg);

Per CPU and per bank, huh? For a single bit?

Even if we have

static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);

already?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes
  2021-06-10 11:55   ` Borislav Petkov
@ 2021-06-11  3:36     ` Smita Koralahalli Channabasappa
  2021-06-11  7:37       ` Borislav Petkov
  0 siblings, 1 reply; 8+ messages in thread
From: Smita Koralahalli Channabasappa @ 2021-06-11  3:36 UTC (permalink / raw)
  To: Borislav Petkov, Smita Koralahalli
  Cc: x86, linux-kernel, linux-edac, Tony Luck, Yazen Ghannam,
	Muralidhara M K, Akshay Gupta, Youquan Song, Zhen Lei,
	Thomas Gleixner, Ingo Molnar, H . Peter Anvin

On 6/10/21 6:55 AM, Borislav Petkov wrote:

> On Tue, Jun 08, 2021 at 05:10:12PM -0500, Smita Koralahalli wrote:
>> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
>> index f71435e53cdb..480a497877e2 100644
>> --- a/arch/x86/kernel/cpu/mce/amd.c
>> +++ b/arch/x86/kernel/cpu/mce/amd.c
>> @@ -204,6 +204,12 @@ EXPORT_SYMBOL_GPL(smca_banks);
>>   #define MAX_MCATYPE_NAME_LEN	30
>>   static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
>>   
>> +struct smca_config {
>> +	__u64 lsb_in_status     :  1,
>> +	__reserved_0            : 63;
>> +};
>> +static DEFINE_PER_CPU_READ_MOSTLY(struct smca_config[MAX_NR_BANKS], smca_cfg);
> Per CPU and per bank, huh? For a single bit?
>
> Even if we have
>
> static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
>
> already?

The idea of defining a new struct was to keep SMCA specific stuff separate.
Thought, it would be costly to include in existing struct mce_bank[] as it will be
unnecessarily defined for each cpu and each bank across all vendors even if they
aren't using it and would be a problem if they are constraint on resource and space.

Also, in the future we can use this newly defined struct smca_config[] to cache
other MCA_CONFIG feature bits for different use cases if they are per bank and per
cpu.

I understand its unnecessary overhead atleast now, to just have a new struct per
cpu per bank for a single bit in which case I can refrain defining a new one and
include it in the existing struct.

Let me know what do you think?

Thanks,
Smita

>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes
  2021-06-11  3:36     ` Smita Koralahalli Channabasappa
@ 2021-06-11  7:37       ` Borislav Petkov
  0 siblings, 0 replies; 8+ messages in thread
From: Borislav Petkov @ 2021-06-11  7:37 UTC (permalink / raw)
  To: Smita Koralahalli Channabasappa
  Cc: Smita Koralahalli, x86, linux-kernel, linux-edac, Tony Luck,
	Yazen Ghannam, Muralidhara M K, Akshay Gupta, Youquan Song,
	Zhen Lei, Thomas Gleixner, Ingo Molnar, H . Peter Anvin

On Thu, Jun 10, 2021 at 10:36:44PM -0500, Smita Koralahalli Channabasappa wrote:
> The idea of defining a new struct was to keep SMCA specific stuff separate.
> Thought, it would be costly to include in existing struct mce_bank[] as it will be
> unnecessarily defined for each cpu and each bank across all vendors even if they
> aren't using it and would be a problem if they are constraint on resource and space.

That's very considerate of you to think about the other vendors - I wish
everyone would do that...

However, our mce_banks_array is defined unconditionally on all vendors
already. So it is there even now. So I wouldn't lose a single second of
sleep about adding an u64 bitfield there.

> Also, in the future we can use this newly defined struct smca_config[] to cache
> other MCA_CONFIG feature bits for different use cases if they are per bank and per
> cpu.

You can use other bits in that bitfield. I hope 64 are enough. :)

HTH.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes
  2021-06-25  1:33 ` [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes Smita Koralahalli
@ 2021-07-13 16:21   ` Yazen Ghannam
  0 siblings, 0 replies; 8+ messages in thread
From: Yazen Ghannam @ 2021-07-13 16:21 UTC (permalink / raw)
  To: Smita Koralahalli
  Cc: x86, linux-kernel, linux-edac, Tony Luck, H . Peter Anvin,
	Muralidhara M K, Akshay Gupta, Youquan Song, Zhen Lei

On Thu, Jun 24, 2021 at 08:33:41PM -0500, Smita Koralahalli wrote:
> Newer AMD processors such as AMD 'Milan' support more physical address
> bits.
> 
> That is the MCA_ADDR registers on Scalable MCA systems contain the
> ErrorAddr in bits [56:0] instead of [55:0]. Hence the existing LSB field
> from bits [61:56] in MCA_ADDR must be moved around to accommodate the
> larger ErrorAddr size.
> 
> MCA_CONFIG[McaLsbInStatusSupported] indicates this change. If set, the
> LSB field will be found in MCA_STATUS rather than MCA_ADDR.
> 
> Each logical CPU has unique MCA bank in hardware and is not shared with
> other logical CPUs. Additionally on SMCA systems, each feature bit may be
> different for each bank within same logical CPU.
> 
> Check for MCA_CONFIG[McaLsbInStatusSupported] for each MCA bank and for
> each CPU.
> 
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> ---

Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Tested-by: Yazen Ghannam <yazen.ghannam@amd.com>

Thanks,
Yazen

P.S. A good test case for this feature is data poison consumption on
Milan. This case will generate two MCA errors. One will be a deferred
error in the UMC bank. This will be handled through the deferred error
interrupt handler. Also, the UMC bank does not have the "LSB in Status"
feature. The other error will be a #MC in the Load Store bank. This
bank does have the "LSB in Status" feature.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes
  2021-06-25  1:33 [PATCH 1/2] x86/mce: Define function to extract ErrorAddr from MCA_ADDR Smita Koralahalli
@ 2021-06-25  1:33 ` Smita Koralahalli
  2021-07-13 16:21   ` Yazen Ghannam
  0 siblings, 1 reply; 8+ messages in thread
From: Smita Koralahalli @ 2021-06-25  1:33 UTC (permalink / raw)
  To: x86, linux-kernel, linux-edac
  Cc: Tony Luck, H . Peter Anvin, Yazen Ghannam, Muralidhara M K,
	Akshay Gupta, Youquan Song, Zhen Lei, Smita Koralahalli

Newer AMD processors such as AMD 'Milan' support more physical address
bits.

That is the MCA_ADDR registers on Scalable MCA systems contain the
ErrorAddr in bits [56:0] instead of [55:0]. Hence the existing LSB field
from bits [61:56] in MCA_ADDR must be moved around to accommodate the
larger ErrorAddr size.

MCA_CONFIG[McaLsbInStatusSupported] indicates this change. If set, the
LSB field will be found in MCA_STATUS rather than MCA_ADDR.

Each logical CPU has unique MCA bank in hardware and is not shared with
other logical CPUs. Additionally on SMCA systems, each feature bit may be
different for each bank within same logical CPU.

Check for MCA_CONFIG[McaLsbInStatusSupported] for each MCA bank and for
each CPU.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
v2:
	Declared lsb_in_status in existing mce_banks[] struct.
	Moved struct mce_banks[] declaration from core.c -> internal.h
---
 arch/x86/include/asm/mce.h         |  2 ++
 arch/x86/kernel/cpu/mce/amd.c      | 25 +++++++++++++++++++------
 arch/x86/kernel/cpu/mce/core.c     | 12 +++---------
 arch/x86/kernel/cpu/mce/internal.h | 14 ++++++++++++++
 4 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0a1c7224a582..33c5e77cf924 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -358,6 +358,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
 void mce_amd_feature_init(struct cpuinfo_x86 *c);
 int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
 void smca_extract_err_addr(struct mce *m);
+void smca_feature_init(void);
 
 #else
 
@@ -368,6 +369,7 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)		{ }
 static inline int
 umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)	{ return -EINVAL; };
 static inline void smca_extract_err_addr(struct mce *m)			{ }
+static inline void smca_feature_init(void)				{ }
 #endif
 
 static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c)	{ return mce_amd_feature_init(c); }
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index f71435e53cdb..5e0819de641f 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -901,9 +901,26 @@ bool amd_mce_is_memory_error(struct mce *m)
 
 void smca_extract_err_addr(struct mce *m)
 {
-	u8 lsb = (m->addr >> 56) & 0x3f;
+	if (this_cpu_ptr(mce_banks_array)[m->bank].lsb_in_status) {
+		u8 lsb = (m->status >> 24) & 0x3f;
 
-	m->addr &= GENMASK_ULL(55, lsb);
+		m->addr &= GENMASK_ULL(56, lsb);
+	} else {
+		u8 lsb = (m->addr >> 56) & 0x3f;
+
+		m->addr &= GENMASK_ULL(55, lsb);
+	}
+}
+
+void smca_feature_init(void)
+{
+	unsigned int bank;
+	u64 mca_cfg;
+
+	for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
+		rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(bank), mca_cfg);
+		this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(mca_cfg & BIT(8));
+	}
 }
 
 static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
@@ -920,10 +937,6 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 	if (m.status & MCI_STATUS_ADDRV) {
 		m.addr = addr;
 
-		/*
-		 * Extract [55:<lsb>] where lsb is the least significant
-		 * *valid* bit of the address bits.
-		 */
 		if (mce_flags.smca)
 			smca_extract_err_addr(&m);
 	}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c09c1eec50a..f3be82acce67 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -67,11 +67,7 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
 
 DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
 
-struct mce_bank {
-	u64			ctl;			/* subevents to enable */
-	bool			init;			/* initialise bank? */
-};
-static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
 
 #define ATTR_LEN               16
 /* One object for each MCE bank, shared by all CPUs */
@@ -699,10 +695,6 @@ static void mce_read_aux(struct mce *m, int i)
 			m->addr <<= shift;
 		}
 
-		/*
-		 * Extract [55:<lsb>] where lsb is the least significant
-		 * *valid* bit of the address bits.
-		 */
 		if (mce_flags.smca)
 			smca_extract_err_addr(m);
 	}
@@ -1839,6 +1831,8 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
 			msr_ops.status	= smca_status_reg;
 			msr_ops.addr	= smca_addr_reg;
 			msr_ops.misc	= smca_misc_reg;
+
+			smca_feature_init();
 		}
 	}
 }
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 88dcc79cfb07..37b76a726c29 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -168,6 +168,20 @@ struct mce_vendor_flags {
 
 extern struct mce_vendor_flags mce_flags;
 
+struct mce_bank {
+	u64			ctl;			/* subevents to enable */
+	bool			init;			/* initialise bank? */
+
+	/*
+	 * (AMD) MCA_CONFIG[McaLsbInStatusSupported]: This bit indicates
+	 * the LSB field is found in MCA_STATUS, when set.
+	 */
+	__u64 lsb_in_status		: 1,
+	      __reserved_1		: 63;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+
 struct mca_msr_regs {
 	u32 (*ctl)	(int bank);
 	u32 (*status)	(int bank);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-07-13 16:22 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-08 22:10 [PATCH 0/2] x86/mce: Support extended MCA_ADDR address on SMCA systems Smita Koralahalli
2021-06-08 22:10 ` [PATCH 1/2] x86/mce: Define function to extract ErrorAddr from MCA_ADDR Smita Koralahalli
2021-06-08 22:10 ` [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes Smita Koralahalli
2021-06-10 11:55   ` Borislav Petkov
2021-06-11  3:36     ` Smita Koralahalli Channabasappa
2021-06-11  7:37       ` Borislav Petkov
2021-06-25  1:33 [PATCH 1/2] x86/mce: Define function to extract ErrorAddr from MCA_ADDR Smita Koralahalli
2021-06-25  1:33 ` [PATCH 2/2] x86/mce: Add support for Extended Physical Address MCA changes Smita Koralahalli
2021-07-13 16:21   ` Yazen Ghannam

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).