linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type
@ 2017-12-01 15:50 Yazen Ghannam
  2017-12-01 15:50 ` [PATCH 2/2] x86/mce: Report only DRAM ECC as memory errors on AMD systems Yazen Ghannam
  2017-12-02 14:21 ` [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type Borislav Petkov
  0 siblings, 2 replies; 4+ messages in thread
From: Yazen Ghannam @ 2017-12-01 15:50 UTC (permalink / raw)
  To: linux-edac; +Cc: Yazen Ghannam, Borislav Petkov, Tony Luck, x86, linux-kernel

From: Yazen Ghannam <yazen.ghannam@amd.com>

Scalable MCA systems have various types of banks. The bank's type can
determine how we handle errors from it. For example, if a bank represents
a UMC then we will need to convert its address from a normalized address
to a system physical address before handling the error.

Define an exported function to return a bank's SMCA type.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/include/asm/mce.h           |  1 +
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b1e8d8db921f..9ab8bf32e61c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -376,6 +376,7 @@ struct smca_bank {
 extern struct smca_bank smca_banks[MAX_NR_BANKS];
 
 extern const char *smca_get_long_name(enum smca_bank_types t);
+extern unsigned int smca_get_bank_type(struct mce *m);
 
 extern int mce_threshold_create_device(unsigned int cpu);
 extern int mce_threshold_remove_device(unsigned int cpu);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index a38ab1fa53a2..bc0510a4f6c0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,6 +110,17 @@ const char *smca_get_long_name(enum smca_bank_types t)
 }
 EXPORT_SYMBOL_GPL(smca_get_long_name);
 
+unsigned int smca_get_bank_type(struct mce *m)
+{
+	struct smca_bank bank = smca_banks[m->bank];
+
+	if (!bank.hwid)
+		return N_SMCA_BANK_TYPES;
+
+	return bank.hwid->bank_type;
+}
+EXPORT_SYMBOL_GPL(smca_get_bank_type);
+
 static struct smca_hwid smca_hwid_mcatypes[] = {
 	/* { bank_type, hwid_mcatype, xec_bitmap } */
 
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] x86/mce: Report only DRAM ECC as memory errors on AMD systems
  2017-12-01 15:50 [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type Yazen Ghannam
@ 2017-12-01 15:50 ` Yazen Ghannam
  2017-12-02 14:21 ` [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type Borislav Petkov
  1 sibling, 0 replies; 4+ messages in thread
From: Yazen Ghannam @ 2017-12-01 15:50 UTC (permalink / raw)
  To: linux-edac; +Cc: Yazen Ghannam, Borislav Petkov, Tony Luck, x86, linux-kernel

From: Yazen Ghannam <yazen.ghannam@amd.com>

The MCA_STATUS[ErrorCodeExt] field is very bank type specific. We currently
check if the ErrorCodeExt value is 0x0 or 0x8 in mce_is_memory_error(), but
we don't check the bank. This means that we could flag non-memory errors as
memory errors.

We know that we want to flag DRAM ECC errors as memory errors, so let's do
those cases first. We can add more cases later when needed.

Check that bank type is UMC and xec is 0 on SMCA systems.

Check that bank is 4 (Northbridge) and xec is 8 on legacy systems.

Define a wrapper function in mce_amd.c so we can use SMCA enums.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/include/asm/mce.h           |  2 ++
 arch/x86/kernel/cpu/mcheck/mce.c     |  4 +---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 11 +++++++++++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 9ab8bf32e61c..7b1cc4d1710c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -377,6 +377,7 @@ extern struct smca_bank smca_banks[MAX_NR_BANKS];
 
 extern const char *smca_get_long_name(enum smca_bank_types t);
 extern unsigned int smca_get_bank_type(struct mce *m);
+extern bool amd_mce_is_memory_error(struct mce *m);
 
 extern int mce_threshold_create_device(unsigned int cpu);
 extern int mce_threshold_remove_device(unsigned int cpu);
@@ -385,6 +386,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
 
 static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
 static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
+static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
 
 #endif
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b1d616d08eee..321c7a80be66 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -503,10 +503,8 @@ static int mce_usable_address(struct mce *m)
 bool mce_is_memory_error(struct mce *m)
 {
 	if (m->cpuvendor == X86_VENDOR_AMD) {
-		/* ErrCodeExt[20:16] */
-		u8 xec = (m->status >> 16) & 0x1f;
+		return amd_mce_is_memory_error(m);
 
-		return (xec == 0x0 || xec == 0x8);
 	} else if (m->cpuvendor == X86_VENDOR_INTEL) {
 		/*
 		 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index bc0510a4f6c0..b2f016bd1de5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -751,6 +751,17 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 }
 EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
 
+bool amd_mce_is_memory_error(struct mce *m)
+{
+	/* ErrCodeExt[20:16] */
+	u8 xec = (m->status >> 16) & 0x1f;
+
+	if (mce_flags.smca)
+		return (smca_get_bank_type(m) == SMCA_UMC && xec == 0x0);
+
+	return (m->bank == 4 && xec == 0x8);
+}
+
 static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 {
 	struct mce m;
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type
  2017-12-01 15:50 [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type Yazen Ghannam
  2017-12-01 15:50 ` [PATCH 2/2] x86/mce: Report only DRAM ECC as memory errors on AMD systems Yazen Ghannam
@ 2017-12-02 14:21 ` Borislav Petkov
  2017-12-03  0:58   ` Ghannam, Yazen
  1 sibling, 1 reply; 4+ messages in thread
From: Borislav Petkov @ 2017-12-02 14:21 UTC (permalink / raw)
  To: Yazen Ghannam; +Cc: linux-edac, Tony Luck, x86, linux-kernel

On Fri, Dec 01, 2017 at 09:50:33AM -0600, Yazen Ghannam wrote:
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> 
> Scalable MCA systems have various types of banks. The bank's type can
> determine how we handle errors from it. For example, if a bank represents
> a UMC then we will need to convert its address from a normalized address
> to a system physical address before handling the error.
> 
> Define an exported function to return a bank's SMCA type.
> 
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> ---
>  arch/x86/include/asm/mce.h           |  1 +
>  arch/x86/kernel/cpu/mcheck/mce_amd.c | 11 +++++++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index b1e8d8db921f..9ab8bf32e61c 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -376,6 +376,7 @@ struct smca_bank {
>  extern struct smca_bank smca_banks[MAX_NR_BANKS];
>  
>  extern const char *smca_get_long_name(enum smca_bank_types t);
> +extern unsigned int smca_get_bank_type(struct mce *m);
>  
>  extern int mce_threshold_create_device(unsigned int cpu);
>  extern int mce_threshold_remove_device(unsigned int cpu);
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index a38ab1fa53a2..bc0510a4f6c0 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -110,6 +110,17 @@ const char *smca_get_long_name(enum smca_bank_types t)
>  }
>  EXPORT_SYMBOL_GPL(smca_get_long_name);
>  
> +unsigned int smca_get_bank_type(struct mce *m)
> +{
> +	struct smca_bank bank = smca_banks[m->bank];
> +
> +	if (!bank.hwid)
> +		return N_SMCA_BANK_TYPES;
> +
> +	return bank.hwid->bank_type;
> +}
> +EXPORT_SYMBOL_GPL(smca_get_bank_type);

Why are you exporting it if it is used in mce_amd.c only anyway?

-- 
Regards/Gruss,
    Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
-- 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type
  2017-12-02 14:21 ` [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type Borislav Petkov
@ 2017-12-03  0:58   ` Ghannam, Yazen
  0 siblings, 0 replies; 4+ messages in thread
From: Ghannam, Yazen @ 2017-12-03  0:58 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-edac, Tony Luck, x86, linux-kernel

> -----Original Message-----
> From: linux-edac-owner@vger.kernel.org [mailto:linux-edac-
> owner@vger.kernel.org] On Behalf Of Borislav Petkov
> Sent: Saturday, December 2, 2017 9:22 AM
> To: Ghannam, Yazen <Yazen.Ghannam@amd.com>
> Cc: linux-edac@vger.kernel.org; Tony Luck <tony.luck@intel.com>;
> x86@kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank
> type
> 
> On Fri, Dec 01, 2017 at 09:50:33AM -0600, Yazen Ghannam wrote:
> > From: Yazen Ghannam <yazen.ghannam@amd.com>
> >
> > Scalable MCA systems have various types of banks. The bank's type can
> > determine how we handle errors from it. For example, if a bank represents
> > a UMC then we will need to convert its address from a normalized address
> > to a system physical address before handling the error.
> >
> > Define an exported function to return a bank's SMCA type.
> >
> > Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> > ---
> >  arch/x86/include/asm/mce.h           |  1 +
> >  arch/x86/kernel/cpu/mcheck/mce_amd.c | 11 +++++++++++
> >  2 files changed, 12 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> > index b1e8d8db921f..9ab8bf32e61c 100644
> > --- a/arch/x86/include/asm/mce.h
> > +++ b/arch/x86/include/asm/mce.h
> > @@ -376,6 +376,7 @@ struct smca_bank {
> >  extern struct smca_bank smca_banks[MAX_NR_BANKS];
> >
> >  extern const char *smca_get_long_name(enum smca_bank_types t);
> > +extern unsigned int smca_get_bank_type(struct mce *m);
> >
> >  extern int mce_threshold_create_device(unsigned int cpu);
> >  extern int mce_threshold_remove_device(unsigned int cpu);
> > diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > index a38ab1fa53a2..bc0510a4f6c0 100644
> > --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > @@ -110,6 +110,17 @@ const char *smca_get_long_name(enum
> smca_bank_types t)
> >  }
> >  EXPORT_SYMBOL_GPL(smca_get_long_name);
> >
> > +unsigned int smca_get_bank_type(struct mce *m)
> > +{
> > +	struct smca_bank bank = smca_banks[m->bank];
> > +
> > +	if (!bank.hwid)
> > +		return N_SMCA_BANK_TYPES;
> > +
> > +	return bank.hwid->bank_type;
> > +}
> > +EXPORT_SYMBOL_GPL(smca_get_bank_type);
> 
> Why are you exporting it if it is used in mce_amd.c only anyway?
> 

I was thinking it could be used in edac/mce_amd.c also.

I don't have a use at the moment, so I can change this if you'd like.

Thanks,
Yazen

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-12-03  0:58 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-01 15:50 [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type Yazen Ghannam
2017-12-01 15:50 ` [PATCH 2/2] x86/mce: Report only DRAM ECC as memory errors on AMD systems Yazen Ghannam
2017-12-02 14:21 ` [PATCH 1/2] x86/mce/AMD: Define function to get SMCA bank type Borislav Petkov
2017-12-03  0:58   ` Ghannam, Yazen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).