linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] Add new SMCA bank types
@ 2019-02-01 22:55 Ghannam, Yazen
  2019-02-01 22:55 ` [PATCH 1/5] x86/MCE/AMD: Add new MP5, NBIO, and PCIE " Ghannam, Yazen
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Ghannam, Yazen @ 2019-02-01 22:55 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp, tony.luck, x86

From: Yazen Ghannam <yazen.ghannam@amd.com>

This series adds decoding for some new SMCA bank types (MP5, NBIO, and
PCIE) and also for some new versions of existing bank types (CS, PSP,
and SMU).

This series also adds new error type descriptions for existing SMCA bank
types that aren't getting a version bump. And also some error
descriptions are updated to more closely match the latest documentation.

The last patch in this set is a minor tweak to print the error code and
description on a single line rather than separate lines.

Thanks,
Yazen

Yazen Ghannam (5):
  x86/MCE/AMD: Add new MP5, NBIO, and PCIE SMCA bank types
  x86/MCE/AMD: Add new McaTypes for CS, PSP, and SMU
  EDAC, mce_amd: Add new error descriptions for some SMCA bank types
  EDAC, mce_amd: Match error descriptions to latest documentation
  EDAC, mce_amd: Print ExtErrorCode and description on a single line

 arch/x86/include/asm/mce.h    |   6 +
 arch/x86/kernel/cpu/mce/amd.c |  26 +++-
 drivers/edac/mce_amd.c        | 264 +++++++++++++++++++++++-----------
 3 files changed, 205 insertions(+), 91 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/5] x86/MCE/AMD: Add new MP5, NBIO, and PCIE SMCA bank types
  2019-02-01 22:55 [PATCH 0/5] Add new SMCA bank types Ghannam, Yazen
@ 2019-02-01 22:55 ` Ghannam, Yazen
  2019-02-04 20:42   ` [tip:ras/core] x86/MCE/AMD, EDAC/mce_amd: " tip-bot for Yazen Ghannam
  2019-02-01 22:55 ` [PATCH 2/5] x86/MCE/AMD: Add new McaTypes for CS, PSP, and SMU Ghannam, Yazen
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Ghannam, Yazen @ 2019-02-01 22:55 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp, tony.luck, x86

From: Yazen Ghannam <yazen.ghannam@amd.com>

Add the (HWID, MCATYPE) tuples and names for the new MP5, NBIO, and
PCIE SMCA bank types.

Also, add their respective error descriptions to edac_mce_amd.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/include/asm/mce.h    |  3 +++
 arch/x86/kernel/cpu/mce/amd.c | 12 ++++++++++++
 drivers/edac/mce_amd.c        | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c1a812bd5a27..91b65d859ca8 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -312,6 +312,9 @@ enum smca_bank_types {
 	SMCA_PB,	/* Parameter Block */
 	SMCA_PSP,	/* Platform Security Processor */
 	SMCA_SMU,	/* System Management Unit */
+	SMCA_MP5,	/* Microprocessor 5 Unit */
+	SMCA_NBIO,	/* Northbridge IO Unit */
+	SMCA_PCIE,	/* PCI Express Unit */
 	N_SMCA_BANK_TYPES
 };
 
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index ed3327342b40..00f60b8c7e4f 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -93,6 +93,9 @@ static struct smca_bank_name smca_names[] = {
 	[SMCA_PB]	= { "param_block",	"Parameter Block" },
 	[SMCA_PSP]	= { "psp",		"Platform Security Processor" },
 	[SMCA_SMU]	= { "smu",		"System Management Unit" },
+	[SMCA_MP5]	= { "mp5",		"Microprocessor 5 Unit" },
+	[SMCA_NBIO]	= { "nbio",		"Northbridge IO Unit" },
+	[SMCA_PCIE]	= { "pcie",		"PCI Express Unit" },
 };
 
 static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
@@ -162,6 +165,15 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 
 	/* System Management Unit MCA type */
 	{ SMCA_SMU,	 HWID_MCATYPE(0x01, 0x0), 0x1 },
+
+	/* Microprocessor 5 Unit MCA type */
+	{ SMCA_MP5,	 HWID_MCATYPE(0x01, 0x2), 0x3FF },
+
+	/* Northbridge IO Unit MCA type */
+	{ SMCA_NBIO,	 HWID_MCATYPE(0x18, 0x0), 0x1F },
+
+	/* PCI Express Unit MCA type */
+	{ SMCA_PCIE,	 HWID_MCATYPE(0x46, 0x0), 0x1F },
 };
 
 struct smca_bank smca_banks[MAX_NR_BANKS];
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index c605089d899f..5ab4ab3f0ce6 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -285,6 +285,35 @@ static const char * const smca_smu_mce_desc[] = {
 	"SMU RAM ECC or parity error",
 };
 
+static const char * const smca_mp5_mce_desc[] = {
+	"High SRAM ECC or parity error",
+	"Low SRAM ECC or parity error",
+	"Data Cache Bank A ECC or parity error",
+	"Data Cache Bank B ECC or parity error",
+	"Data Tag Cache Bank A ECC or parity error",
+	"Data Tag Cache Bank B ECC or parity error",
+	"Instruction Cache Bank A ECC or parity error",
+	"Instruction Cache Bank B ECC or parity error",
+	"Instruction Tag Cache Bank A ECC or parity error",
+	"Instruction Tag Cache Bank B ECC or parity error",
+};
+
+static const char * const smca_nbio_mce_desc[] = {
+	"ECC or Parity error",
+	"PCIE error",
+	"SDP ErrEvent error",
+	"SDP Egress Poison Error",
+	"IOHC Internal Poison Error",
+};
+
+static const char * const smca_pcie_mce_desc[] = {
+	"CCIX PER Message logging",
+	"CCIX Read Response with Status: Non-Data Error",
+	"CCIX Write Response with Status: Non-Data Error",
+	"CCIX Read Response with Status: Data Error",
+	"CCIX Non-okay write response with data error",
+};
+
 struct smca_mce_desc {
 	const char * const *descs;
 	unsigned int num_descs;
@@ -304,6 +333,9 @@ static struct smca_mce_desc smca_mce_descs[] = {
 	[SMCA_PB]	= { smca_pb_mce_desc,	ARRAY_SIZE(smca_pb_mce_desc)	},
 	[SMCA_PSP]	= { smca_psp_mce_desc,	ARRAY_SIZE(smca_psp_mce_desc)	},
 	[SMCA_SMU]	= { smca_smu_mce_desc,	ARRAY_SIZE(smca_smu_mce_desc)	},
+	[SMCA_MP5]	= { smca_mp5_mce_desc,	ARRAY_SIZE(smca_mp5_mce_desc)	},
+	[SMCA_NBIO]	= { smca_nbio_mce_desc,	ARRAY_SIZE(smca_nbio_mce_desc)	},
+	[SMCA_PCIE]	= { smca_pcie_mce_desc,	ARRAY_SIZE(smca_pcie_mce_desc)	},
 };
 
 static bool f12h_mc0_mce(u16 ec, u8 xec)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/5] x86/MCE/AMD: Add new McaTypes for CS, PSP, and SMU
  2019-02-01 22:55 [PATCH 0/5] Add new SMCA bank types Ghannam, Yazen
  2019-02-01 22:55 ` [PATCH 1/5] x86/MCE/AMD: Add new MP5, NBIO, and PCIE " Ghannam, Yazen
@ 2019-02-01 22:55 ` Ghannam, Yazen
  2019-02-04 20:43   ` [tip:ras/core] x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units tip-bot for Yazen Ghannam
  2019-02-01 22:55 ` [PATCH 3/5] EDAC, mce_amd: Add new error descriptions for some SMCA bank types Ghannam, Yazen
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Ghannam, Yazen @ 2019-02-01 22:55 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp, tony.luck, x86

From: Yazen Ghannam <yazen.ghannam@amd.com>

The existing CS, PSP, and SMU SMCA bank types will see new versions (as
indicated by their McaTypes) in future SMCA systems.

Add the new (HWID, MCATYPE) tuples for these new versions. Reuse the
same names as the older versions, since they are logically the same to
the user. SMCA systems won't mix and match IP blocks with different
McaType versions in the same system, so there isn't a need to
distinguish them. The MCA_IPID register is saved when logging an MCA
error, and that can be used to triage the error.

Also, add the new error descriptions to edac_mce_amd. Some error types
(positions in the list) are overloaded compared to the previous
McaTypes. Therefore, just create new lists of the error descriptions to
keep things simple even if some of the error descriptions are the same
between versions.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/include/asm/mce.h    |  3 ++
 arch/x86/kernel/cpu/mce/amd.c |  6 ++++
 drivers/edac/mce_amd.c        | 55 +++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 91b65d859ca8..299a38536567 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -307,11 +307,14 @@ enum smca_bank_types {
 	SMCA_FP,	/* Floating Point */
 	SMCA_L3_CACHE,	/* L3 Cache */
 	SMCA_CS,	/* Coherent Slave */
+	SMCA_CS_V2,	/* Coherent Slave */
 	SMCA_PIE,	/* Power, Interrupts, etc. */
 	SMCA_UMC,	/* Unified Memory Controller */
 	SMCA_PB,	/* Parameter Block */
 	SMCA_PSP,	/* Platform Security Processor */
+	SMCA_PSP_V2,	/* Platform Security Processor */
 	SMCA_SMU,	/* System Management Unit */
+	SMCA_SMU_V2,	/* System Management Unit */
 	SMCA_MP5,	/* Microprocessor 5 Unit */
 	SMCA_NBIO,	/* Northbridge IO Unit */
 	SMCA_PCIE,	/* PCI Express Unit */
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 00f60b8c7e4f..bd1331b241ca 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -88,11 +88,14 @@ static struct smca_bank_name smca_names[] = {
 	[SMCA_FP]	= { "floating_point",	"Floating Point Unit" },
 	[SMCA_L3_CACHE]	= { "l3_cache",		"L3 Cache" },
 	[SMCA_CS]	= { "coherent_slave",	"Coherent Slave" },
+	[SMCA_CS_V2]	= { "coherent_slave",	"Coherent Slave" },
 	[SMCA_PIE]	= { "pie",		"Power, Interrupts, etc." },
 	[SMCA_UMC]	= { "umc",		"Unified Memory Controller" },
 	[SMCA_PB]	= { "param_block",	"Parameter Block" },
 	[SMCA_PSP]	= { "psp",		"Platform Security Processor" },
+	[SMCA_PSP_V2]	= { "psp",		"Platform Security Processor" },
 	[SMCA_SMU]	= { "smu",		"System Management Unit" },
+	[SMCA_SMU_V2]	= { "smu",		"System Management Unit" },
 	[SMCA_MP5]	= { "mp5",		"Microprocessor 5 Unit" },
 	[SMCA_NBIO]	= { "nbio",		"Northbridge IO Unit" },
 	[SMCA_PCIE]	= { "pcie",		"PCI Express Unit" },
@@ -153,6 +156,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 	/* Data Fabric MCA types */
 	{ SMCA_CS,	 HWID_MCATYPE(0x2E, 0x0), 0x1FF },
 	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1), 0xF },
+	{ SMCA_CS_V2,	 HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
 
 	/* Unified Memory Controller MCA type */
 	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0), 0x3F },
@@ -162,9 +166,11 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 
 	/* Platform Security Processor MCA type */
 	{ SMCA_PSP,	 HWID_MCATYPE(0xFF, 0x0), 0x1 },
+	{ SMCA_PSP_V2,	 HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
 
 	/* System Management Unit MCA type */
 	{ SMCA_SMU,	 HWID_MCATYPE(0x01, 0x0), 0x1 },
+	{ SMCA_SMU_V2,	 HWID_MCATYPE(0x01, 0x1), 0x7FF },
 
 	/* Microprocessor 5 Unit MCA type */
 	{ SMCA_MP5,	 HWID_MCATYPE(0x01, 0x2), 0x3FF },
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 5ab4ab3f0ce6..184c90172d17 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -257,6 +257,23 @@ static const char * const smca_cs_mce_desc[] = {
 	"ECC error on probe filter access",
 };
 
+static const char * const smca_cs2_mce_desc[] = {
+	"Illegal Request",
+	"Address Violation",
+	"Security Violation",
+	"Illegal Response",
+	"Unexpected Response",
+	"Request or Probe Parity Error",
+	"Read Response Parity Error",
+	"Atomic Request Parity Error",
+	"SDP read response had no match in the CS queue",
+	"Probe Filter Protocol Error",
+	"Probe Filter ECC Error",
+	"SDP read response had an unexpected RETRY error",
+	"Counter overflow error",
+	"Counter underflow error",
+};
+
 static const char * const smca_pie_mce_desc[] = {
 	"HW assert",
 	"Internal PIE register security violation",
@@ -281,10 +298,45 @@ static const char * const smca_psp_mce_desc[] = {
 	"PSP RAM ECC or parity error",
 };
 
+static const char * const smca_psp2_mce_desc[] = {
+	"High SRAM ECC or parity error",
+	"Low SRAM ECC or parity error",
+	"Instruction Cache Bank 0 ECC or parity error",
+	"Instruction Cache Bank 1 ECC or parity error",
+	"Instruction Tag Ram 0 parity error",
+	"Instruction Tag Ram 1 parity error",
+	"Data Cache Bank 0 ECC or parity error",
+	"Data Cache Bank 1 ECC or parity error",
+	"Data Cache Bank 2 ECC or parity error",
+	"Data Cache Bank 3 ECC or parity error",
+	"Data Tag Bank 0 parity error",
+	"Data Tag Bank 1 parity error",
+	"Data Tag Bank 2 parity error",
+	"Data Tag Bank 3 parity error",
+	"Dirty Data Ram parity error",
+	"TLB Bank 0 parity error",
+	"TLB Bank 1 parity error",
+	"System Hub Read Buffer ECC or parity error",
+};
+
 static const char * const smca_smu_mce_desc[] = {
 	"SMU RAM ECC or parity error",
 };
 
+static const char * const smca_smu2_mce_desc[] = {
+	"High SRAM ECC or parity error",
+	"Low SRAM ECC or parity error",
+	"Data Cache Bank A ECC or parity error",
+	"Data Cache Bank B ECC or parity error",
+	"Data Tag Cache Bank A ECC or parity error",
+	"Data Tag Cache Bank B ECC or parity error",
+	"Instruction Cache Bank A ECC or parity error",
+	"Instruction Cache Bank B ECC or parity error",
+	"Instruction Tag Cache Bank A ECC or parity error",
+	"Instruction Tag Cache Bank B ECC or parity error",
+	"System Hub Read Buffer ECC or parity error",
+};
+
 static const char * const smca_mp5_mce_desc[] = {
 	"High SRAM ECC or parity error",
 	"Low SRAM ECC or parity error",
@@ -328,11 +380,14 @@ static struct smca_mce_desc smca_mce_descs[] = {
 	[SMCA_FP]	= { smca_fp_mce_desc,	ARRAY_SIZE(smca_fp_mce_desc)	},
 	[SMCA_L3_CACHE]	= { smca_l3_mce_desc,	ARRAY_SIZE(smca_l3_mce_desc)	},
 	[SMCA_CS]	= { smca_cs_mce_desc,	ARRAY_SIZE(smca_cs_mce_desc)	},
+	[SMCA_CS_V2]	= { smca_cs2_mce_desc,	ARRAY_SIZE(smca_cs2_mce_desc)	},
 	[SMCA_PIE]	= { smca_pie_mce_desc,	ARRAY_SIZE(smca_pie_mce_desc)	},
 	[SMCA_UMC]	= { smca_umc_mce_desc,	ARRAY_SIZE(smca_umc_mce_desc)	},
 	[SMCA_PB]	= { smca_pb_mce_desc,	ARRAY_SIZE(smca_pb_mce_desc)	},
 	[SMCA_PSP]	= { smca_psp_mce_desc,	ARRAY_SIZE(smca_psp_mce_desc)	},
+	[SMCA_PSP_V2]	= { smca_psp2_mce_desc,	ARRAY_SIZE(smca_psp2_mce_desc)	},
 	[SMCA_SMU]	= { smca_smu_mce_desc,	ARRAY_SIZE(smca_smu_mce_desc)	},
+	[SMCA_SMU_V2]	= { smca_smu2_mce_desc,	ARRAY_SIZE(smca_smu2_mce_desc)	},
 	[SMCA_MP5]	= { smca_mp5_mce_desc,	ARRAY_SIZE(smca_mp5_mce_desc)	},
 	[SMCA_NBIO]	= { smca_nbio_mce_desc,	ARRAY_SIZE(smca_nbio_mce_desc)	},
 	[SMCA_PCIE]	= { smca_pcie_mce_desc,	ARRAY_SIZE(smca_pcie_mce_desc)	},
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/5] EDAC, mce_amd: Add new error descriptions for some SMCA bank types
  2019-02-01 22:55 [PATCH 0/5] Add new SMCA bank types Ghannam, Yazen
  2019-02-01 22:55 ` [PATCH 1/5] x86/MCE/AMD: Add new MP5, NBIO, and PCIE " Ghannam, Yazen
  2019-02-01 22:55 ` [PATCH 2/5] x86/MCE/AMD: Add new McaTypes for CS, PSP, and SMU Ghannam, Yazen
@ 2019-02-01 22:55 ` Ghannam, Yazen
  2019-02-04 20:44   ` [tip:ras/core] x86/MCE/AMD, EDAC/mce_amd: " tip-bot for Yazen Ghannam
  2019-02-01 22:55 ` [PATCH 4/5] EDAC, mce_amd: Match error descriptions to latest documentation Ghannam, Yazen
  2019-02-01 22:55 ` [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description on a single line Ghannam, Yazen
  4 siblings, 1 reply; 14+ messages in thread
From: Ghannam, Yazen @ 2019-02-01 22:55 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp, tony.luck, x86

From: Yazen Ghannam <yazen.ghannam@amd.com>

Some SMCA bank types on future systems will report new error types even
though the bank type is not treated as a new version. These new error
types will reported by bits that are reserved in past systems.

Add the new error descriptions to the lists in edac_mce_amd.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/kernel/cpu/mce/amd.c | 8 ++++----
 drivers/edac/mce_amd.c        | 6 +++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index bd1331b241ca..e64de5149e50 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -144,22 +144,22 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 	{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
 
 	/* ZN Core (HWID=0xB0) MCA types */
-	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
 	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
 	{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
 	{ SMCA_DE,	 HWID_MCATYPE(0xB0, 0x3), 0x1FF },
 	/* HWID 0xB0 MCATYPE 0x4 is Reserved */
-	{ SMCA_EX,	 HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+	{ SMCA_EX,	 HWID_MCATYPE(0xB0, 0x5), 0xFFF },
 	{ SMCA_FP,	 HWID_MCATYPE(0xB0, 0x6), 0x7F },
 	{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
 
 	/* Data Fabric MCA types */
 	{ SMCA_CS,	 HWID_MCATYPE(0x2E, 0x0), 0x1FF },
-	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1), 0xF },
+	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1), 0x1F },
 	{ SMCA_CS_V2,	 HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
 
 	/* Unified Memory Controller MCA type */
-	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0), 0x3F },
+	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0), 0xFF },
 
 	/* Parameter Block MCA type */
 	{ SMCA_PB,	 HWID_MCATYPE(0x05, 0x0), 0x1 },
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 184c90172d17..c79e650aa606 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -155,7 +155,7 @@ static const char * const smca_ls_mce_desc[] = {
 	"Store queue parity",
 	"Miss address buffer payload parity",
 	"L1 TLB parity",
-	"Reserved",
+	"DC Tag error type 5",
 	"DC tag error type 6",
 	"DC tag error type 1",
 	"Internal error type 1",
@@ -222,6 +222,7 @@ static const char * const smca_ex_mce_desc[] = {
 	"Retire status queue parity error",
 	"Scheduling queue parity error",
 	"Branch buffer queue parity error",
+	"Hardware Assertion error",
 };
 
 static const char * const smca_fp_mce_desc[] = {
@@ -279,6 +280,7 @@ static const char * const smca_pie_mce_desc[] = {
 	"Internal PIE register security violation",
 	"Error on GMI link",
 	"Poison data written to internal PIE register",
+	"A deferred error was detected in the DF"
 };
 
 static const char * const smca_umc_mce_desc[] = {
@@ -288,6 +290,8 @@ static const char * const smca_umc_mce_desc[] = {
 	"Advanced peripheral bus error",
 	"Command/address parity error",
 	"Write data CRC error",
+	"DCQ SRAM ECC error",
+	"AES SRAM ECC error",
 };
 
 static const char * const smca_pb_mce_desc[] = {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/5] EDAC, mce_amd: Match error descriptions to latest documentation
  2019-02-01 22:55 [PATCH 0/5] Add new SMCA bank types Ghannam, Yazen
                   ` (2 preceding siblings ...)
  2019-02-01 22:55 ` [PATCH 3/5] EDAC, mce_amd: Add new error descriptions for some SMCA bank types Ghannam, Yazen
@ 2019-02-01 22:55 ` Ghannam, Yazen
  2019-02-04 20:44   ` [tip:ras/core] " tip-bot for Yazen Ghannam
  2019-02-01 22:55 ` [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description on a single line Ghannam, Yazen
  4 siblings, 1 reply; 14+ messages in thread
From: Ghannam, Yazen @ 2019-02-01 22:55 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp, tony.luck, x86

From: Yazen Ghannam <yazen.ghannam@amd.com>

Update the error descriptions to match the latest documentation for
easier searching. In some cases the changes are small and in other cases
the changes may be total rewording of the description.

No functional change.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/edac/mce_amd.c | 166 ++++++++++++++++++++---------------------
 1 file changed, 83 insertions(+), 83 deletions(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index c79e650aa606..7e29ceabdf6f 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -151,74 +151,74 @@ static const char * const mc6_mce_desc[] = {
 
 /* Scalable MCA error strings */
 static const char * const smca_ls_mce_desc[] = {
-	"Load queue parity",
-	"Store queue parity",
-	"Miss address buffer payload parity",
-	"L1 TLB parity",
+	"Load queue parity error",
+	"Store queue parity error",
+	"Miss address buffer payload parity error",
+	"Level 1 TLB parity error",
 	"DC Tag error type 5",
-	"DC tag error type 6",
-	"DC tag error type 1",
+	"DC Tag error type 6",
+	"DC Tag error type 1",
 	"Internal error type 1",
 	"Internal error type 2",
-	"Sys Read data error thread 0",
-	"Sys read data error thread 1",
-	"DC tag error type 2",
-	"DC data error type 1 (poison consumption)",
-	"DC data error type 2",
-	"DC data error type 3",
-	"DC tag error type 4",
-	"L2 TLB parity",
+	"System Read Data Error Thread 0",
+	"System Read Data Error Thread 1",
+	"DC Tag error type 2",
+	"DC Data error type 1 and poison consumption",
+	"DC Data error type 2",
+	"DC Data error type 3",
+	"DC Tag error type 4",
+	"Level 2 TLB parity error",
 	"PDC parity error",
-	"DC tag error type 3",
-	"DC tag error type 5",
-	"L2 fill data error",
+	"DC Tag error type 3",
+	"DC Tag error type 5",
+	"L2 Fill Data error",
 };
 
 static const char * const smca_if_mce_desc[] = {
-	"microtag probe port parity error",
-	"IC microtag or full tag multi-hit error",
-	"IC full tag parity",
-	"IC data array parity",
-	"Decoupling queue phys addr parity error",
-	"L0 ITLB parity error",
-	"L1 ITLB parity error",
-	"L2 ITLB parity error",
-	"BPQ snoop parity on Thread 0",
-	"BPQ snoop parity on Thread 1",
-	"L1 BTB multi-match error",
-	"L2 BTB multi-match error",
-	"L2 Cache Response Poison error",
-	"System Read Data error",
+	"Op Cache Microtag Probe Port Parity Error",
+	"IC Microtag or Full Tag Multi-hit Error",
+	"IC Full Tag Parity Error",
+	"IC Data Array Parity Error",
+	"Decoupling Queue PhysAddr Parity Error",
+	"L0 ITLB Parity Error",
+	"L1 ITLB Parity Error",
+	"L2 ITLB Parity Error",
+	"BPQ Thread 0 Snoop Parity Error",
+	"BPQ Thread 1 Snoop Parity Error",
+	"L1 BTB Multi-Match Error",
+	"L2 BTB Multi-Match Error",
+	"L2 Cache Response Poison Error",
+	"System Read Data Error",
 };
 
 static const char * const smca_l2_mce_desc[] = {
-	"L2M tag multi-way-hit error",
-	"L2M tag ECC error",
-	"L2M data ECC error",
-	"HW assert",
+	"L2M Tag Multiple-Way-Hit error",
+	"L2M Tag or State Array ECC Error",
+	"L2M Data Array ECC Error",
+	"Hardware Assert Error",
 };
 
 static const char * const smca_de_mce_desc[] = {
-	"uop cache tag parity error",
-	"uop cache data parity error",
-	"Insn buffer parity error",
-	"uop queue parity error",
-	"Insn dispatch queue parity error",
-	"Fetch address FIFO parity",
-	"Patch RAM data parity",
-	"Patch RAM sequencer parity",
-	"uop buffer parity"
+	"Micro-op cache tag parity error",
+	"Micro-op cache data parity error",
+	"Instruction buffer parity error",
+	"Micro-op queue parity error",
+	"Instruction dispatch queue parity error",
+	"Fetch address FIFO parity error",
+	"Patch RAM data parity error",
+	"Patch RAM sequencer parity error",
+	"Micro-op buffer parity error"
 };
 
 static const char * const smca_ex_mce_desc[] = {
-	"Watchdog timeout error",
-	"Phy register file parity",
-	"Flag register file parity",
-	"Immediate displacement register file parity",
-	"Address generator payload parity",
-	"EX payload parity",
-	"Checkpoint queue parity",
-	"Retire dispatch queue parity",
+	"Watchdog Timeout error",
+	"Physical register file parity error",
+	"Flag register file parity error",
+	"Immediate displacement register file parity error",
+	"Address generator payload parity error",
+	"EX payload parity error",
+	"Checkpoint queue parity error",
+	"Retire dispatch queue parity error",
 	"Retire status queue parity error",
 	"Scheduling queue parity error",
 	"Branch buffer queue parity error",
@@ -226,36 +226,36 @@ static const char * const smca_ex_mce_desc[] = {
 };
 
 static const char * const smca_fp_mce_desc[] = {
-	"Physical register file parity",
-	"Freelist parity error",
-	"Schedule queue parity",
+	"Physical register file (PRF) parity error",
+	"Freelist (FL) parity error",
+	"Schedule queue parity error",
 	"NSQ parity error",
-	"Retire queue parity",
-	"Status register file parity",
+	"Retire queue (RQ) parity error",
+	"Status register file (SRF) parity error",
 	"Hardware assertion",
 };
 
 static const char * const smca_l3_mce_desc[] = {
-	"Shadow tag macro ECC error",
-	"Shadow tag macro multi-way-hit error",
-	"L3M tag ECC error",
-	"L3M tag multi-way-hit error",
-	"L3M data ECC error",
-	"XI parity, L3 fill done channel error",
-	"L3 victim queue parity",
-	"L3 HW assert",
+	"Shadow Tag Macro ECC Error",
+	"Shadow Tag Macro Multi-way-hit Error",
+	"L3M Tag ECC Error",
+	"L3M Tag Multi-way-hit Error",
+	"L3M Data ECC Error",
+	"SDP Parity Error or SystemReadDataError from XI",
+	"L3 Victim Queue Parity Error",
+	"L3 Hardware Assertion",
 };
 
 static const char * const smca_cs_mce_desc[] = {
-	"Illegal request from transport layer",
-	"Address violation",
-	"Security violation",
-	"Illegal response from transport layer",
-	"Unexpected response",
-	"Parity error on incoming request or probe response data",
-	"Parity error on incoming read response data",
-	"Atomic request parity",
-	"ECC error on probe filter access",
+	"Illegal Request",
+	"Address Violation",
+	"Security Violation",
+	"Illegal Response",
+	"Unexpected Response",
+	"Request or Probe Parity Error",
+	"Read Response Parity Error",
+	"Atomic Request Parity Error",
+	"Probe Filter ECC Error",
 };
 
 static const char * const smca_cs2_mce_desc[] = {
@@ -276,30 +276,30 @@ static const char * const smca_cs2_mce_desc[] = {
 };
 
 static const char * const smca_pie_mce_desc[] = {
-	"HW assert",
-	"Internal PIE register security violation",
-	"Error on GMI link",
-	"Poison data written to internal PIE register",
+	"Hardware Assert",
+	"Register security violation",
+	"Link Error",
+	"Poison data consumption",
 	"A deferred error was detected in the DF"
 };
 
 static const char * const smca_umc_mce_desc[] = {
 	"DRAM ECC error",
-	"Data poison error on DRAM",
+	"Data poison error",
 	"SDP parity error",
 	"Advanced peripheral bus error",
-	"Command/address parity error",
+	"Address/Command parity error",
 	"Write data CRC error",
 	"DCQ SRAM ECC error",
 	"AES SRAM ECC error",
 };
 
 static const char * const smca_pb_mce_desc[] = {
-	"Parameter Block RAM ECC error",
+	"An ECC error in the Parameter Block RAM array",
 };
 
 static const char * const smca_psp_mce_desc[] = {
-	"PSP RAM ECC or parity error",
+	"An ECC or parity error in a PSP RAM instance",
 };
 
 static const char * const smca_psp2_mce_desc[] = {
@@ -324,7 +324,7 @@ static const char * const smca_psp2_mce_desc[] = {
 };
 
 static const char * const smca_smu_mce_desc[] = {
-	"SMU RAM ECC or parity error",
+	"An ECC or parity error in an SMU RAM instance",
 };
 
 static const char * const smca_smu2_mce_desc[] = {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description on a single line
  2019-02-01 22:55 [PATCH 0/5] Add new SMCA bank types Ghannam, Yazen
                   ` (3 preceding siblings ...)
  2019-02-01 22:55 ` [PATCH 4/5] EDAC, mce_amd: Match error descriptions to latest documentation Ghannam, Yazen
@ 2019-02-01 22:55 ` Ghannam, Yazen
  2019-02-03 12:21   ` Borislav Petkov
  2019-02-04 20:45   ` [tip:ras/core] " tip-bot for Yazen Ghannam
  4 siblings, 2 replies; 14+ messages in thread
From: Ghannam, Yazen @ 2019-02-01 22:55 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp, tony.luck, x86

From: Yazen Ghannam <yazen.ghannam@amd.com>

Save a log line by printing the extended error code and the description
on a single line. This is similar to how errors are printed in other
subsystems, e.g. "#, description". If we don't have a valid description
then only the number/code is printed.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/edac/mce_amd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 7e29ceabdf6f..f286b880f981 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -965,13 +965,12 @@ static void decode_smca_error(struct mce *m)
 
 	ip_name = smca_get_long_name(bank_type);
 
-	pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
+	pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
 
 	/* Only print the decode of valid error codes */
 	if (xec < smca_mce_descs[bank_type].num_descs &&
 			(hwid->xec_bitmap & BIT_ULL(xec))) {
-		pr_emerg(HW_ERR "%s Error: ", ip_name);
-		pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
+		pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
 	}
 
 	if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description on a single line
  2019-02-01 22:55 ` [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description on a single line Ghannam, Yazen
@ 2019-02-03 12:21   ` Borislav Petkov
  2019-02-04 15:14     ` Ghannam, Yazen
  2019-02-04 20:45   ` [tip:ras/core] " tip-bot for Yazen Ghannam
  1 sibling, 1 reply; 14+ messages in thread
From: Borislav Petkov @ 2019-02-03 12:21 UTC (permalink / raw)
  To: Ghannam, Yazen; +Cc: linux-edac, linux-kernel, bp, tony.luck, x86

On Fri, Feb 01, 2019 at 10:55:54PM +0000, Ghannam, Yazen wrote:
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> 
> Save a log line by printing the extended error code and the description
> on a single line. This is similar to how errors are printed in other
> subsystems, e.g. "#, description". If we don't have a valid description
> then only the number/code is printed.
> 
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> ---
>  drivers/edac/mce_amd.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
> index 7e29ceabdf6f..f286b880f981 100644
> --- a/drivers/edac/mce_amd.c
> +++ b/drivers/edac/mce_amd.c
> @@ -965,13 +965,12 @@ static void decode_smca_error(struct mce *m)
>  
>  	ip_name = smca_get_long_name(bank_type);
>  
> -	pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
> +	pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
>  
>  	/* Only print the decode of valid error codes */
>  	if (xec < smca_mce_descs[bank_type].num_descs &&
>  			(hwid->xec_bitmap & BIT_ULL(xec))) {
> -		pr_emerg(HW_ERR "%s Error: ", ip_name);
> -		pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
> +		pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);

If the if-condition doesn't hit, we never print the \n

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description on a single line
  2019-02-03 12:21   ` Borislav Petkov
@ 2019-02-04 15:14     ` Ghannam, Yazen
  2019-02-04 16:36       ` Borislav Petkov
  0 siblings, 1 reply; 14+ messages in thread
From: Ghannam, Yazen @ 2019-02-04 15:14 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-edac, linux-kernel, bp, tony.luck, x86

> -----Original Message-----
> From: Borislav Petkov <bp@alien8.de>
> Sent: Sunday, February 3, 2019 6:22 AM
> To: Ghannam, Yazen <Yazen.Ghannam@amd.com>
> Cc: linux-edac@vger.kernel.org; linux-kernel@vger.kernel.org; bp@suse.de;
> tony.luck@intel.com; x86@kernel.org
> Subject: Re: [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description
> on a single line
> 
> On Fri, Feb 01, 2019 at 10:55:54PM +0000, Ghannam, Yazen wrote:
> > From: Yazen Ghannam <yazen.ghannam@amd.com>
> >
> > Save a log line by printing the extended error code and the description
> > on a single line. This is similar to how errors are printed in other
> > subsystems, e.g. "#, description". If we don't have a valid description
> > then only the number/code is printed.
> >
> > Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> > ---
> >  drivers/edac/mce_amd.c | 5 ++---
> >  1 file changed, 2 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
> > index 7e29ceabdf6f..f286b880f981 100644
> > --- a/drivers/edac/mce_amd.c
> > +++ b/drivers/edac/mce_amd.c
> > @@ -965,13 +965,12 @@ static void decode_smca_error(struct mce *m)
> >
> >  	ip_name = smca_get_long_name(bank_type);
> >
> > -	pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
> > +	pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
> >
> >  	/* Only print the decode of valid error codes */
> >  	if (xec < smca_mce_descs[bank_type].num_descs &&
> >  			(hwid->xec_bitmap & BIT_ULL(xec))) {
> > -		pr_emerg(HW_ERR "%s Error: ", ip_name);
> > -		pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
> > +		pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
> 
> If the if-condition doesn't hit, we never print the \n
> 

Yes, you're right. But it seems that the next pr_* that's not a pr_cont will be on
a newline automatically.

Should I add an explicit newline just to be sure we get one?

Thanks,
Yazen

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description on a single line
  2019-02-04 15:14     ` Ghannam, Yazen
@ 2019-02-04 16:36       ` Borislav Petkov
  0 siblings, 0 replies; 14+ messages in thread
From: Borislav Petkov @ 2019-02-04 16:36 UTC (permalink / raw)
  To: Ghannam, Yazen; +Cc: linux-edac, linux-kernel, tony.luck, x86

On Mon, Feb 04, 2019 at 03:14:27PM +0000, Ghannam, Yazen wrote:
> Yes, you're right. But it seems that the next pr_* that's not a pr_cont will be on
> a newline automatically.

Ah, that must be that printk continuation fun around

  4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines")

> Should I add an explicit newline just to be sure we get one?

Nah, no need. If anything, we should strive to get rid of KERN_CONT, if
possible.

Thx.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [tip:ras/core] x86/MCE/AMD, EDAC/mce_amd: Add new MP5, NBIO, and PCIE SMCA bank types
  2019-02-01 22:55 ` [PATCH 1/5] x86/MCE/AMD: Add new MP5, NBIO, and PCIE " Ghannam, Yazen
@ 2019-02-04 20:42   ` tip-bot for Yazen Ghannam
  0 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Yazen Ghannam @ 2019-02-04 20:42 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-edac, yazen.ghannam, hpa, mchehab, arnd, x86, bp,
	Shirish.S, tony.luck, qiuxu.zhuo, mingo, vishal.l.verma,
	keescook, tglx, puwen, linux-kernel, mingo

Commit-ID:  cbfa447edd6a3825fdb8a4ffae74ff7208f2d2c0
Gitweb:     https://git.kernel.org/tip/cbfa447edd6a3825fdb8a4ffae74ff7208f2d2c0
Author:     Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Fri, 1 Feb 2019 22:55:51 +0000
Committer:  Borislav Petkov <bp@suse.de>
CommitDate: Sun, 3 Feb 2019 13:01:44 +0100

x86/MCE/AMD, EDAC/mce_amd: Add new MP5, NBIO, and PCIE SMCA bank types

Add the (HWID, MCATYPE) tuples and names for the new MP5, NBIO, and
PCIE SMCA bank types.

Also, add their respective error descriptions to the MCE decoding module
edac_mce_amd.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: Shirish S <Shirish.S@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190201225534.8177-2-Yazen.Ghannam@amd.com
---
 arch/x86/include/asm/mce.h    |  3 +++
 arch/x86/kernel/cpu/mce/amd.c | 12 ++++++++++++
 drivers/edac/mce_amd.c        | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c1a812bd5a27..91b65d859ca8 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -312,6 +312,9 @@ enum smca_bank_types {
 	SMCA_PB,	/* Parameter Block */
 	SMCA_PSP,	/* Platform Security Processor */
 	SMCA_SMU,	/* System Management Unit */
+	SMCA_MP5,	/* Microprocessor 5 Unit */
+	SMCA_NBIO,	/* Northbridge IO Unit */
+	SMCA_PCIE,	/* PCI Express Unit */
 	N_SMCA_BANK_TYPES
 };
 
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index ed3327342b40..00f60b8c7e4f 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -93,6 +93,9 @@ static struct smca_bank_name smca_names[] = {
 	[SMCA_PB]	= { "param_block",	"Parameter Block" },
 	[SMCA_PSP]	= { "psp",		"Platform Security Processor" },
 	[SMCA_SMU]	= { "smu",		"System Management Unit" },
+	[SMCA_MP5]	= { "mp5",		"Microprocessor 5 Unit" },
+	[SMCA_NBIO]	= { "nbio",		"Northbridge IO Unit" },
+	[SMCA_PCIE]	= { "pcie",		"PCI Express Unit" },
 };
 
 static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
@@ -162,6 +165,15 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 
 	/* System Management Unit MCA type */
 	{ SMCA_SMU,	 HWID_MCATYPE(0x01, 0x0), 0x1 },
+
+	/* Microprocessor 5 Unit MCA type */
+	{ SMCA_MP5,	 HWID_MCATYPE(0x01, 0x2), 0x3FF },
+
+	/* Northbridge IO Unit MCA type */
+	{ SMCA_NBIO,	 HWID_MCATYPE(0x18, 0x0), 0x1F },
+
+	/* PCI Express Unit MCA type */
+	{ SMCA_PCIE,	 HWID_MCATYPE(0x46, 0x0), 0x1F },
 };
 
 struct smca_bank smca_banks[MAX_NR_BANKS];
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index c605089d899f..5ab4ab3f0ce6 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -285,6 +285,35 @@ static const char * const smca_smu_mce_desc[] = {
 	"SMU RAM ECC or parity error",
 };
 
+static const char * const smca_mp5_mce_desc[] = {
+	"High SRAM ECC or parity error",
+	"Low SRAM ECC or parity error",
+	"Data Cache Bank A ECC or parity error",
+	"Data Cache Bank B ECC or parity error",
+	"Data Tag Cache Bank A ECC or parity error",
+	"Data Tag Cache Bank B ECC or parity error",
+	"Instruction Cache Bank A ECC or parity error",
+	"Instruction Cache Bank B ECC or parity error",
+	"Instruction Tag Cache Bank A ECC or parity error",
+	"Instruction Tag Cache Bank B ECC or parity error",
+};
+
+static const char * const smca_nbio_mce_desc[] = {
+	"ECC or Parity error",
+	"PCIE error",
+	"SDP ErrEvent error",
+	"SDP Egress Poison Error",
+	"IOHC Internal Poison Error",
+};
+
+static const char * const smca_pcie_mce_desc[] = {
+	"CCIX PER Message logging",
+	"CCIX Read Response with Status: Non-Data Error",
+	"CCIX Write Response with Status: Non-Data Error",
+	"CCIX Read Response with Status: Data Error",
+	"CCIX Non-okay write response with data error",
+};
+
 struct smca_mce_desc {
 	const char * const *descs;
 	unsigned int num_descs;
@@ -304,6 +333,9 @@ static struct smca_mce_desc smca_mce_descs[] = {
 	[SMCA_PB]	= { smca_pb_mce_desc,	ARRAY_SIZE(smca_pb_mce_desc)	},
 	[SMCA_PSP]	= { smca_psp_mce_desc,	ARRAY_SIZE(smca_psp_mce_desc)	},
 	[SMCA_SMU]	= { smca_smu_mce_desc,	ARRAY_SIZE(smca_smu_mce_desc)	},
+	[SMCA_MP5]	= { smca_mp5_mce_desc,	ARRAY_SIZE(smca_mp5_mce_desc)	},
+	[SMCA_NBIO]	= { smca_nbio_mce_desc,	ARRAY_SIZE(smca_nbio_mce_desc)	},
+	[SMCA_PCIE]	= { smca_pcie_mce_desc,	ARRAY_SIZE(smca_pcie_mce_desc)	},
 };
 
 static bool f12h_mc0_mce(u16 ec, u8 xec)

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [tip:ras/core] x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units
  2019-02-01 22:55 ` [PATCH 2/5] x86/MCE/AMD: Add new McaTypes for CS, PSP, and SMU Ghannam, Yazen
@ 2019-02-04 20:43   ` tip-bot for Yazen Ghannam
  0 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Yazen Ghannam @ 2019-02-04 20:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: puwen, keescook, linux-edac, x86, Shirish.S, mingo, hpa, bp,
	qiuxu.zhuo, vishal.l.verma, yazen.ghannam, tony.luck, tglx,
	mingo, linux-kernel, arnd, mchehab

Commit-ID:  3ad7e748c12cc771df6020a552def3e1727e8a17
Gitweb:     https://git.kernel.org/tip/3ad7e748c12cc771df6020a552def3e1727e8a17
Author:     Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Fri, 1 Feb 2019 22:55:52 +0000
Committer:  Borislav Petkov <bp@suse.de>
CommitDate: Sun, 3 Feb 2019 13:01:57 +0100

x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units

The existing CS, PSP, and SMU SMCA bank types will see new versions (as
indicated by their McaTypes) in future SMCA systems.

Add the new (HWID, MCATYPE) tuples for these new versions. Reuse the
same names as the older versions, since they are logically the same to
the user. SMCA systems won't mix and match IP blocks with different
McaType versions in the same system, so there isn't a need to
distinguish them. The MCA_IPID register is saved when logging an MCA
error, and that can be used to triage the error.

Also, add the new error descriptions to edac_mce_amd. Some error types
(positions in the list) are overloaded compared to the previous
McaTypes. Therefore, just create new lists of the error descriptions to
keep things simple even if some of the error descriptions are the same
between versions.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: Shirish S <Shirish.S@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190201225534.8177-3-Yazen.Ghannam@amd.com
---
 arch/x86/include/asm/mce.h    |  3 +++
 arch/x86/kernel/cpu/mce/amd.c |  6 +++++
 drivers/edac/mce_amd.c        | 55 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 91b65d859ca8..299a38536567 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -307,11 +307,14 @@ enum smca_bank_types {
 	SMCA_FP,	/* Floating Point */
 	SMCA_L3_CACHE,	/* L3 Cache */
 	SMCA_CS,	/* Coherent Slave */
+	SMCA_CS_V2,	/* Coherent Slave */
 	SMCA_PIE,	/* Power, Interrupts, etc. */
 	SMCA_UMC,	/* Unified Memory Controller */
 	SMCA_PB,	/* Parameter Block */
 	SMCA_PSP,	/* Platform Security Processor */
+	SMCA_PSP_V2,	/* Platform Security Processor */
 	SMCA_SMU,	/* System Management Unit */
+	SMCA_SMU_V2,	/* System Management Unit */
 	SMCA_MP5,	/* Microprocessor 5 Unit */
 	SMCA_NBIO,	/* Northbridge IO Unit */
 	SMCA_PCIE,	/* PCI Express Unit */
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 00f60b8c7e4f..bd1331b241ca 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -88,11 +88,14 @@ static struct smca_bank_name smca_names[] = {
 	[SMCA_FP]	= { "floating_point",	"Floating Point Unit" },
 	[SMCA_L3_CACHE]	= { "l3_cache",		"L3 Cache" },
 	[SMCA_CS]	= { "coherent_slave",	"Coherent Slave" },
+	[SMCA_CS_V2]	= { "coherent_slave",	"Coherent Slave" },
 	[SMCA_PIE]	= { "pie",		"Power, Interrupts, etc." },
 	[SMCA_UMC]	= { "umc",		"Unified Memory Controller" },
 	[SMCA_PB]	= { "param_block",	"Parameter Block" },
 	[SMCA_PSP]	= { "psp",		"Platform Security Processor" },
+	[SMCA_PSP_V2]	= { "psp",		"Platform Security Processor" },
 	[SMCA_SMU]	= { "smu",		"System Management Unit" },
+	[SMCA_SMU_V2]	= { "smu",		"System Management Unit" },
 	[SMCA_MP5]	= { "mp5",		"Microprocessor 5 Unit" },
 	[SMCA_NBIO]	= { "nbio",		"Northbridge IO Unit" },
 	[SMCA_PCIE]	= { "pcie",		"PCI Express Unit" },
@@ -153,6 +156,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 	/* Data Fabric MCA types */
 	{ SMCA_CS,	 HWID_MCATYPE(0x2E, 0x0), 0x1FF },
 	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1), 0xF },
+	{ SMCA_CS_V2,	 HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
 
 	/* Unified Memory Controller MCA type */
 	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0), 0x3F },
@@ -162,9 +166,11 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 
 	/* Platform Security Processor MCA type */
 	{ SMCA_PSP,	 HWID_MCATYPE(0xFF, 0x0), 0x1 },
+	{ SMCA_PSP_V2,	 HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
 
 	/* System Management Unit MCA type */
 	{ SMCA_SMU,	 HWID_MCATYPE(0x01, 0x0), 0x1 },
+	{ SMCA_SMU_V2,	 HWID_MCATYPE(0x01, 0x1), 0x7FF },
 
 	/* Microprocessor 5 Unit MCA type */
 	{ SMCA_MP5,	 HWID_MCATYPE(0x01, 0x2), 0x3FF },
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 5ab4ab3f0ce6..184c90172d17 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -257,6 +257,23 @@ static const char * const smca_cs_mce_desc[] = {
 	"ECC error on probe filter access",
 };
 
+static const char * const smca_cs2_mce_desc[] = {
+	"Illegal Request",
+	"Address Violation",
+	"Security Violation",
+	"Illegal Response",
+	"Unexpected Response",
+	"Request or Probe Parity Error",
+	"Read Response Parity Error",
+	"Atomic Request Parity Error",
+	"SDP read response had no match in the CS queue",
+	"Probe Filter Protocol Error",
+	"Probe Filter ECC Error",
+	"SDP read response had an unexpected RETRY error",
+	"Counter overflow error",
+	"Counter underflow error",
+};
+
 static const char * const smca_pie_mce_desc[] = {
 	"HW assert",
 	"Internal PIE register security violation",
@@ -281,10 +298,45 @@ static const char * const smca_psp_mce_desc[] = {
 	"PSP RAM ECC or parity error",
 };
 
+static const char * const smca_psp2_mce_desc[] = {
+	"High SRAM ECC or parity error",
+	"Low SRAM ECC or parity error",
+	"Instruction Cache Bank 0 ECC or parity error",
+	"Instruction Cache Bank 1 ECC or parity error",
+	"Instruction Tag Ram 0 parity error",
+	"Instruction Tag Ram 1 parity error",
+	"Data Cache Bank 0 ECC or parity error",
+	"Data Cache Bank 1 ECC or parity error",
+	"Data Cache Bank 2 ECC or parity error",
+	"Data Cache Bank 3 ECC or parity error",
+	"Data Tag Bank 0 parity error",
+	"Data Tag Bank 1 parity error",
+	"Data Tag Bank 2 parity error",
+	"Data Tag Bank 3 parity error",
+	"Dirty Data Ram parity error",
+	"TLB Bank 0 parity error",
+	"TLB Bank 1 parity error",
+	"System Hub Read Buffer ECC or parity error",
+};
+
 static const char * const smca_smu_mce_desc[] = {
 	"SMU RAM ECC or parity error",
 };
 
+static const char * const smca_smu2_mce_desc[] = {
+	"High SRAM ECC or parity error",
+	"Low SRAM ECC or parity error",
+	"Data Cache Bank A ECC or parity error",
+	"Data Cache Bank B ECC or parity error",
+	"Data Tag Cache Bank A ECC or parity error",
+	"Data Tag Cache Bank B ECC or parity error",
+	"Instruction Cache Bank A ECC or parity error",
+	"Instruction Cache Bank B ECC or parity error",
+	"Instruction Tag Cache Bank A ECC or parity error",
+	"Instruction Tag Cache Bank B ECC or parity error",
+	"System Hub Read Buffer ECC or parity error",
+};
+
 static const char * const smca_mp5_mce_desc[] = {
 	"High SRAM ECC or parity error",
 	"Low SRAM ECC or parity error",
@@ -328,11 +380,14 @@ static struct smca_mce_desc smca_mce_descs[] = {
 	[SMCA_FP]	= { smca_fp_mce_desc,	ARRAY_SIZE(smca_fp_mce_desc)	},
 	[SMCA_L3_CACHE]	= { smca_l3_mce_desc,	ARRAY_SIZE(smca_l3_mce_desc)	},
 	[SMCA_CS]	= { smca_cs_mce_desc,	ARRAY_SIZE(smca_cs_mce_desc)	},
+	[SMCA_CS_V2]	= { smca_cs2_mce_desc,	ARRAY_SIZE(smca_cs2_mce_desc)	},
 	[SMCA_PIE]	= { smca_pie_mce_desc,	ARRAY_SIZE(smca_pie_mce_desc)	},
 	[SMCA_UMC]	= { smca_umc_mce_desc,	ARRAY_SIZE(smca_umc_mce_desc)	},
 	[SMCA_PB]	= { smca_pb_mce_desc,	ARRAY_SIZE(smca_pb_mce_desc)	},
 	[SMCA_PSP]	= { smca_psp_mce_desc,	ARRAY_SIZE(smca_psp_mce_desc)	},
+	[SMCA_PSP_V2]	= { smca_psp2_mce_desc,	ARRAY_SIZE(smca_psp2_mce_desc)	},
 	[SMCA_SMU]	= { smca_smu_mce_desc,	ARRAY_SIZE(smca_smu_mce_desc)	},
+	[SMCA_SMU_V2]	= { smca_smu2_mce_desc,	ARRAY_SIZE(smca_smu2_mce_desc)	},
 	[SMCA_MP5]	= { smca_mp5_mce_desc,	ARRAY_SIZE(smca_mp5_mce_desc)	},
 	[SMCA_NBIO]	= { smca_nbio_mce_desc,	ARRAY_SIZE(smca_nbio_mce_desc)	},
 	[SMCA_PCIE]	= { smca_pcie_mce_desc,	ARRAY_SIZE(smca_pcie_mce_desc)	},

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [tip:ras/core] x86/MCE/AMD, EDAC/mce_amd: Add new error descriptions for some SMCA bank types
  2019-02-01 22:55 ` [PATCH 3/5] EDAC, mce_amd: Add new error descriptions for some SMCA bank types Ghannam, Yazen
@ 2019-02-04 20:44   ` tip-bot for Yazen Ghannam
  0 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Yazen Ghannam @ 2019-02-04 20:44 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: x86, mingo, bp, mchehab, tony.luck, linux-kernel, tglx,
	Shirish.S, mingo, keescook, yazen.ghannam, hpa, linux-edac

Commit-ID:  8a5dd2cd2f2e94878cacc969655a69ca214795ab
Gitweb:     https://git.kernel.org/tip/8a5dd2cd2f2e94878cacc969655a69ca214795ab
Author:     Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Fri, 1 Feb 2019 22:55:52 +0000
Committer:  Borislav Petkov <bp@suse.de>
CommitDate: Sun, 3 Feb 2019 13:05:16 +0100

x86/MCE/AMD, EDAC/mce_amd: Add new error descriptions for some SMCA bank types

Some SMCA bank types on future systems will report new error types even
though the bank type is not treated as a new version. These new error
types will reported by bits that are reserved in past systems.

Add the new error descriptions to the lists in edac_mce_amd.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Shirish S <Shirish.S@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190201225534.8177-4-Yazen.Ghannam@amd.com
---
 arch/x86/kernel/cpu/mce/amd.c | 8 ++++----
 drivers/edac/mce_amd.c        | 6 +++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index bd1331b241ca..e64de5149e50 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -144,22 +144,22 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 	{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
 
 	/* ZN Core (HWID=0xB0) MCA types */
-	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
 	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
 	{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
 	{ SMCA_DE,	 HWID_MCATYPE(0xB0, 0x3), 0x1FF },
 	/* HWID 0xB0 MCATYPE 0x4 is Reserved */
-	{ SMCA_EX,	 HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+	{ SMCA_EX,	 HWID_MCATYPE(0xB0, 0x5), 0xFFF },
 	{ SMCA_FP,	 HWID_MCATYPE(0xB0, 0x6), 0x7F },
 	{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
 
 	/* Data Fabric MCA types */
 	{ SMCA_CS,	 HWID_MCATYPE(0x2E, 0x0), 0x1FF },
-	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1), 0xF },
+	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1), 0x1F },
 	{ SMCA_CS_V2,	 HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
 
 	/* Unified Memory Controller MCA type */
-	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0), 0x3F },
+	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0), 0xFF },
 
 	/* Parameter Block MCA type */
 	{ SMCA_PB,	 HWID_MCATYPE(0x05, 0x0), 0x1 },
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 184c90172d17..c79e650aa606 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -155,7 +155,7 @@ static const char * const smca_ls_mce_desc[] = {
 	"Store queue parity",
 	"Miss address buffer payload parity",
 	"L1 TLB parity",
-	"Reserved",
+	"DC Tag error type 5",
 	"DC tag error type 6",
 	"DC tag error type 1",
 	"Internal error type 1",
@@ -222,6 +222,7 @@ static const char * const smca_ex_mce_desc[] = {
 	"Retire status queue parity error",
 	"Scheduling queue parity error",
 	"Branch buffer queue parity error",
+	"Hardware Assertion error",
 };
 
 static const char * const smca_fp_mce_desc[] = {
@@ -279,6 +280,7 @@ static const char * const smca_pie_mce_desc[] = {
 	"Internal PIE register security violation",
 	"Error on GMI link",
 	"Poison data written to internal PIE register",
+	"A deferred error was detected in the DF"
 };
 
 static const char * const smca_umc_mce_desc[] = {
@@ -288,6 +290,8 @@ static const char * const smca_umc_mce_desc[] = {
 	"Advanced peripheral bus error",
 	"Command/address parity error",
 	"Write data CRC error",
+	"DCQ SRAM ECC error",
+	"AES SRAM ECC error",
 };
 
 static const char * const smca_pb_mce_desc[] = {

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [tip:ras/core] EDAC, mce_amd: Match error descriptions to latest documentation
  2019-02-01 22:55 ` [PATCH 4/5] EDAC, mce_amd: Match error descriptions to latest documentation Ghannam, Yazen
@ 2019-02-04 20:44   ` tip-bot for Yazen Ghannam
  0 siblings, 0 replies; 14+ messages in thread
From: tip-bot for Yazen Ghannam @ 2019-02-04 20:44 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mchehab, tony.luck, bp, linux-edac, hpa, tglx, mingo,
	linux-kernel, yazen.ghannam

Commit-ID:  e03447ee718b331be8f3abc388c7bf7d325dfab4
Gitweb:     https://git.kernel.org/tip/e03447ee718b331be8f3abc388c7bf7d325dfab4
Author:     Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Fri, 1 Feb 2019 22:55:53 +0000
Committer:  Borislav Petkov <bp@suse.de>
CommitDate: Sun, 3 Feb 2019 13:16:50 +0100

EDAC, mce_amd: Match error descriptions to latest documentation

Update the error descriptions to match the latest documentation for
easier searching. In some cases the changes are small and in other cases
the changes may be total rewording of the description.

No functional changes.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190201225534.8177-5-Yazen.Ghannam@amd.com
---
 drivers/edac/mce_amd.c | 166 ++++++++++++++++++++++++-------------------------
 1 file changed, 83 insertions(+), 83 deletions(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index c79e650aa606..7e29ceabdf6f 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -151,74 +151,74 @@ static const char * const mc6_mce_desc[] = {
 
 /* Scalable MCA error strings */
 static const char * const smca_ls_mce_desc[] = {
-	"Load queue parity",
-	"Store queue parity",
-	"Miss address buffer payload parity",
-	"L1 TLB parity",
+	"Load queue parity error",
+	"Store queue parity error",
+	"Miss address buffer payload parity error",
+	"Level 1 TLB parity error",
 	"DC Tag error type 5",
-	"DC tag error type 6",
-	"DC tag error type 1",
+	"DC Tag error type 6",
+	"DC Tag error type 1",
 	"Internal error type 1",
 	"Internal error type 2",
-	"Sys Read data error thread 0",
-	"Sys read data error thread 1",
-	"DC tag error type 2",
-	"DC data error type 1 (poison consumption)",
-	"DC data error type 2",
-	"DC data error type 3",
-	"DC tag error type 4",
-	"L2 TLB parity",
+	"System Read Data Error Thread 0",
+	"System Read Data Error Thread 1",
+	"DC Tag error type 2",
+	"DC Data error type 1 and poison consumption",
+	"DC Data error type 2",
+	"DC Data error type 3",
+	"DC Tag error type 4",
+	"Level 2 TLB parity error",
 	"PDC parity error",
-	"DC tag error type 3",
-	"DC tag error type 5",
-	"L2 fill data error",
+	"DC Tag error type 3",
+	"DC Tag error type 5",
+	"L2 Fill Data error",
 };
 
 static const char * const smca_if_mce_desc[] = {
-	"microtag probe port parity error",
-	"IC microtag or full tag multi-hit error",
-	"IC full tag parity",
-	"IC data array parity",
-	"Decoupling queue phys addr parity error",
-	"L0 ITLB parity error",
-	"L1 ITLB parity error",
-	"L2 ITLB parity error",
-	"BPQ snoop parity on Thread 0",
-	"BPQ snoop parity on Thread 1",
-	"L1 BTB multi-match error",
-	"L2 BTB multi-match error",
-	"L2 Cache Response Poison error",
-	"System Read Data error",
+	"Op Cache Microtag Probe Port Parity Error",
+	"IC Microtag or Full Tag Multi-hit Error",
+	"IC Full Tag Parity Error",
+	"IC Data Array Parity Error",
+	"Decoupling Queue PhysAddr Parity Error",
+	"L0 ITLB Parity Error",
+	"L1 ITLB Parity Error",
+	"L2 ITLB Parity Error",
+	"BPQ Thread 0 Snoop Parity Error",
+	"BPQ Thread 1 Snoop Parity Error",
+	"L1 BTB Multi-Match Error",
+	"L2 BTB Multi-Match Error",
+	"L2 Cache Response Poison Error",
+	"System Read Data Error",
 };
 
 static const char * const smca_l2_mce_desc[] = {
-	"L2M tag multi-way-hit error",
-	"L2M tag ECC error",
-	"L2M data ECC error",
-	"HW assert",
+	"L2M Tag Multiple-Way-Hit error",
+	"L2M Tag or State Array ECC Error",
+	"L2M Data Array ECC Error",
+	"Hardware Assert Error",
 };
 
 static const char * const smca_de_mce_desc[] = {
-	"uop cache tag parity error",
-	"uop cache data parity error",
-	"Insn buffer parity error",
-	"uop queue parity error",
-	"Insn dispatch queue parity error",
-	"Fetch address FIFO parity",
-	"Patch RAM data parity",
-	"Patch RAM sequencer parity",
-	"uop buffer parity"
+	"Micro-op cache tag parity error",
+	"Micro-op cache data parity error",
+	"Instruction buffer parity error",
+	"Micro-op queue parity error",
+	"Instruction dispatch queue parity error",
+	"Fetch address FIFO parity error",
+	"Patch RAM data parity error",
+	"Patch RAM sequencer parity error",
+	"Micro-op buffer parity error"
 };
 
 static const char * const smca_ex_mce_desc[] = {
-	"Watchdog timeout error",
-	"Phy register file parity",
-	"Flag register file parity",
-	"Immediate displacement register file parity",
-	"Address generator payload parity",
-	"EX payload parity",
-	"Checkpoint queue parity",
-	"Retire dispatch queue parity",
+	"Watchdog Timeout error",
+	"Physical register file parity error",
+	"Flag register file parity error",
+	"Immediate displacement register file parity error",
+	"Address generator payload parity error",
+	"EX payload parity error",
+	"Checkpoint queue parity error",
+	"Retire dispatch queue parity error",
 	"Retire status queue parity error",
 	"Scheduling queue parity error",
 	"Branch buffer queue parity error",
@@ -226,36 +226,36 @@ static const char * const smca_ex_mce_desc[] = {
 };
 
 static const char * const smca_fp_mce_desc[] = {
-	"Physical register file parity",
-	"Freelist parity error",
-	"Schedule queue parity",
+	"Physical register file (PRF) parity error",
+	"Freelist (FL) parity error",
+	"Schedule queue parity error",
 	"NSQ parity error",
-	"Retire queue parity",
-	"Status register file parity",
+	"Retire queue (RQ) parity error",
+	"Status register file (SRF) parity error",
 	"Hardware assertion",
 };
 
 static const char * const smca_l3_mce_desc[] = {
-	"Shadow tag macro ECC error",
-	"Shadow tag macro multi-way-hit error",
-	"L3M tag ECC error",
-	"L3M tag multi-way-hit error",
-	"L3M data ECC error",
-	"XI parity, L3 fill done channel error",
-	"L3 victim queue parity",
-	"L3 HW assert",
+	"Shadow Tag Macro ECC Error",
+	"Shadow Tag Macro Multi-way-hit Error",
+	"L3M Tag ECC Error",
+	"L3M Tag Multi-way-hit Error",
+	"L3M Data ECC Error",
+	"SDP Parity Error or SystemReadDataError from XI",
+	"L3 Victim Queue Parity Error",
+	"L3 Hardware Assertion",
 };
 
 static const char * const smca_cs_mce_desc[] = {
-	"Illegal request from transport layer",
-	"Address violation",
-	"Security violation",
-	"Illegal response from transport layer",
-	"Unexpected response",
-	"Parity error on incoming request or probe response data",
-	"Parity error on incoming read response data",
-	"Atomic request parity",
-	"ECC error on probe filter access",
+	"Illegal Request",
+	"Address Violation",
+	"Security Violation",
+	"Illegal Response",
+	"Unexpected Response",
+	"Request or Probe Parity Error",
+	"Read Response Parity Error",
+	"Atomic Request Parity Error",
+	"Probe Filter ECC Error",
 };
 
 static const char * const smca_cs2_mce_desc[] = {
@@ -276,30 +276,30 @@ static const char * const smca_cs2_mce_desc[] = {
 };
 
 static const char * const smca_pie_mce_desc[] = {
-	"HW assert",
-	"Internal PIE register security violation",
-	"Error on GMI link",
-	"Poison data written to internal PIE register",
+	"Hardware Assert",
+	"Register security violation",
+	"Link Error",
+	"Poison data consumption",
 	"A deferred error was detected in the DF"
 };
 
 static const char * const smca_umc_mce_desc[] = {
 	"DRAM ECC error",
-	"Data poison error on DRAM",
+	"Data poison error",
 	"SDP parity error",
 	"Advanced peripheral bus error",
-	"Command/address parity error",
+	"Address/Command parity error",
 	"Write data CRC error",
 	"DCQ SRAM ECC error",
 	"AES SRAM ECC error",
 };
 
 static const char * const smca_pb_mce_desc[] = {
-	"Parameter Block RAM ECC error",
+	"An ECC error in the Parameter Block RAM array",
 };
 
 static const char * const smca_psp_mce_desc[] = {
-	"PSP RAM ECC or parity error",
+	"An ECC or parity error in a PSP RAM instance",
 };
 
 static const char * const smca_psp2_mce_desc[] = {
@@ -324,7 +324,7 @@ static const char * const smca_psp2_mce_desc[] = {
 };
 
 static const char * const smca_smu_mce_desc[] = {
-	"SMU RAM ECC or parity error",
+	"An ECC or parity error in an SMU RAM instance",
 };
 
 static const char * const smca_smu2_mce_desc[] = {

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [tip:ras/core] EDAC, mce_amd: Print ExtErrorCode and description on a single line
  2019-02-01 22:55 ` [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description on a single line Ghannam, Yazen
  2019-02-03 12:21   ` Borislav Petkov
@ 2019-02-04 20:45   ` tip-bot for Yazen Ghannam
  1 sibling, 0 replies; 14+ messages in thread
From: tip-bot for Yazen Ghannam @ 2019-02-04 20:45 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, bp, tony.luck, linux-edac, linux-kernel, mchehab,
	yazen.ghannam, hpa, mingo

Commit-ID:  1c1522d32ac49065f88e5a8b3d6e3a5613b20118
Gitweb:     https://git.kernel.org/tip/1c1522d32ac49065f88e5a8b3d6e3a5613b20118
Author:     Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate: Fri, 1 Feb 2019 22:55:54 +0000
Committer:  Borislav Petkov <bp@suse.de>
CommitDate: Mon, 4 Feb 2019 19:29:13 +0100

EDAC, mce_amd: Print ExtErrorCode and description on a single line

Save a log line by printing the extended error code and the description
on a single line. This is similar to how errors are printed in other
subsystems, e.g. "#, description". If we don't have a valid description
then only the number/code is printed.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190201225534.8177-6-Yazen.Ghannam@amd.com
---
 drivers/edac/mce_amd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 7e29ceabdf6f..f286b880f981 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -965,13 +965,12 @@ static void decode_smca_error(struct mce *m)
 
 	ip_name = smca_get_long_name(bank_type);
 
-	pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
+	pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
 
 	/* Only print the decode of valid error codes */
 	if (xec < smca_mce_descs[bank_type].num_descs &&
 			(hwid->xec_bitmap & BIT_ULL(xec))) {
-		pr_emerg(HW_ERR "%s Error: ", ip_name);
-		pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
+		pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
 	}
 
 	if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2019-02-04 20:45 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-01 22:55 [PATCH 0/5] Add new SMCA bank types Ghannam, Yazen
2019-02-01 22:55 ` [PATCH 1/5] x86/MCE/AMD: Add new MP5, NBIO, and PCIE " Ghannam, Yazen
2019-02-04 20:42   ` [tip:ras/core] x86/MCE/AMD, EDAC/mce_amd: " tip-bot for Yazen Ghannam
2019-02-01 22:55 ` [PATCH 2/5] x86/MCE/AMD: Add new McaTypes for CS, PSP, and SMU Ghannam, Yazen
2019-02-04 20:43   ` [tip:ras/core] x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units tip-bot for Yazen Ghannam
2019-02-01 22:55 ` [PATCH 3/5] EDAC, mce_amd: Add new error descriptions for some SMCA bank types Ghannam, Yazen
2019-02-04 20:44   ` [tip:ras/core] x86/MCE/AMD, EDAC/mce_amd: " tip-bot for Yazen Ghannam
2019-02-01 22:55 ` [PATCH 4/5] EDAC, mce_amd: Match error descriptions to latest documentation Ghannam, Yazen
2019-02-04 20:44   ` [tip:ras/core] " tip-bot for Yazen Ghannam
2019-02-01 22:55 ` [PATCH 5/5] EDAC, mce_amd: Print ExtErrorCode and description on a single line Ghannam, Yazen
2019-02-03 12:21   ` Borislav Petkov
2019-02-04 15:14     ` Ghannam, Yazen
2019-02-04 16:36       ` Borislav Petkov
2019-02-04 20:45   ` [tip:ras/core] " tip-bot for Yazen Ghannam

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).