linux-edac.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yazen Ghannam <yazen.ghannam@amd.com>
To: <linux-edac@vger.kernel.org>
Cc: <linux-kernel@vger.kernel.org>, <tony.luck@intel.com>,
	<x86@kernel.org>, <Avadhut.Naik@amd.com>, <John.Allen@amd.com>,
	Yazen Ghannam <yazen.ghannam@amd.com>
Subject: [PATCH v2 04/16] x86/mce/amd: Look up bank type by IPID
Date: Thu, 4 Apr 2024 10:13:47 -0500	[thread overview]
Message-ID: <20240404151359.47970-5-yazen.ghannam@amd.com> (raw)
In-Reply-To: <20240404151359.47970-1-yazen.ghannam@amd.com>

Scalable MCA systems use values within the MCA_IPID register to describe
a bank's type. Other information is not needed.

Currently, the bank types are cached during boot and this information is
used during boot and run time. The cached values are per-CPU and
per-bank. The boot path needs the cached values, but this should be
removed. The run time path does not need the cached values.

Determine a Scalable MCA bank's type using only the MCA_IPID values.

Keep old code until init path is cleaned up.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---

Notes:
    Link:
    https://lkml.kernel.org/r/20231118193248.1296798-9-yazen.ghannam@amd.com
    
    v1->v2:
    * Include bitops started in dropped patches. (Yazen)
    * Update all users of smca_get_bank_type(). (Yazen)

 arch/x86/include/asm/mce.h              |  4 +-
 arch/x86/kernel/cpu/mce/amd.c           | 99 ++++++++++++++++++++++---
 drivers/edac/amd64_edac.c               |  2 +-
 drivers/edac/mce_amd.c                  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c |  2 +-
 5 files changed, 94 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index de3118305838..adad99bac567 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -59,8 +59,6 @@
  *  - TCC bit is present in MCx_STATUS.
  */
 #define MCI_CONFIG_MCAX		0x1
-#define MCI_IPID_MCATYPE	0xFFFF0000
-#define MCI_IPID_HWID		0xFFF
 
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
@@ -342,7 +340,7 @@ extern int mce_threshold_create_device(unsigned int cpu);
 extern int mce_threshold_remove_device(unsigned int cpu);
 
 void mce_amd_feature_init(struct cpuinfo_x86 *c);
-enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
+enum smca_bank_types smca_get_bank_type(u64 ipid);
 #else
 
 static inline int mce_threshold_create_device(unsigned int cpu)		{ return 0; };
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index bc78e751dfcc..c76bc158b6b6 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -7,6 +7,7 @@
  *
  *  All MC4_MISCi registers are shared between cores on a node.
  */
+#include <linux/bitfield.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/kobject.h>
@@ -51,6 +52,10 @@
 #define DEF_INT_TYPE_APIC	0x2
 
 /* Scalable MCA: */
+#define MCI_IPID_MCATYPE	GENMASK_ULL(47, 44)
+#define MCI_IPID_HWID		GENMASK_ULL(43, 32)
+#define MCI_IPID_MCATYPE_OLD	0xFFFF0000
+#define MCI_IPID_HWID_OLD	0xFFF
 
 /* Threshold LVT offset is at MSR0xC0000410[15:12] */
 #define SMCA_THR_LVT_OFF	0xF000
@@ -131,7 +136,7 @@ static const char *smca_get_name(enum smca_bank_types t)
 	return smca_names[t];
 }
 
-enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
+static enum smca_bank_types smca_get_bank_type_old(unsigned int cpu, unsigned int bank)
 {
 	struct smca_bank *b;
 
@@ -144,9 +149,8 @@ enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
 
 	return b->hwid->bank_type;
 }
-EXPORT_SYMBOL_GPL(smca_get_bank_type);
 
-static const struct smca_hwid smca_hwid_mcatypes[] = {
+static const struct smca_hwid smca_hwid_mcatypes_old[] = {
 	/* { bank_type, hwid_mcatype } */
 
 	/* Reserved type */
@@ -210,6 +214,83 @@ static const struct smca_hwid smca_hwid_mcatypes[] = {
 	{ SMCA_GMI_PHY,	 HWID_MCATYPE(0x269, 0x0)	},
 };
 
+/* Keep sorted first by HWID then by McaType. */
+static const u32 smca_hwid_mcatypes[] = {
+	/* Reserved type */
+	[SMCA_RESERVED]		= HWID_MCATYPE(0x00, 0x0),
+
+	/* System Management Unit MCA type */
+	[SMCA_SMU]		= HWID_MCATYPE(0x01, 0x0),
+	[SMCA_SMU_V2]		= HWID_MCATYPE(0x01, 0x1),
+
+	/* Microprocessor 5 Unit MCA type */
+	[SMCA_MP5]		= HWID_MCATYPE(0x01, 0x2),
+
+	/* MPDMA MCA type */
+	[SMCA_MPDMA]		= HWID_MCATYPE(0x01, 0x3),
+
+	/* Parameter Block MCA type */
+	[SMCA_PB]		= HWID_MCATYPE(0x05, 0x0),
+
+	/* Northbridge IO Unit MCA type */
+	[SMCA_NBIO]		= HWID_MCATYPE(0x18, 0x0),
+
+	/* Data Fabric MCA types */
+	[SMCA_CS]		= HWID_MCATYPE(0x2E, 0x0),
+	[SMCA_PIE]		= HWID_MCATYPE(0x2E, 0x1),
+	[SMCA_CS_V2]		= HWID_MCATYPE(0x2E, 0x2),
+
+	/* PCI Express Unit MCA type */
+	[SMCA_PCIE]		= HWID_MCATYPE(0x46, 0x0),
+	[SMCA_PCIE_V2]		= HWID_MCATYPE(0x46, 0x1),
+
+	[SMCA_XGMI_PCS]		= HWID_MCATYPE(0x50, 0x0),
+	[SMCA_NBIF]		= HWID_MCATYPE(0x6C, 0x0),
+	[SMCA_SHUB]		= HWID_MCATYPE(0x80, 0x0),
+
+	/* Unified Memory Controller MCA type */
+	[SMCA_UMC]		= HWID_MCATYPE(0x96, 0x0),
+	[SMCA_UMC_V2]		= HWID_MCATYPE(0x96, 0x1),
+
+	[SMCA_SATA]		= HWID_MCATYPE(0xA8, 0x0),
+	[SMCA_USB]		= HWID_MCATYPE(0xAA, 0x0),
+
+	/* ZN Core (HWID=0xB0) MCA types */
+	[SMCA_LS]		= HWID_MCATYPE(0xB0, 0x0),
+	[SMCA_IF]		= HWID_MCATYPE(0xB0, 0x1),
+	[SMCA_L2_CACHE]		= HWID_MCATYPE(0xB0, 0x2),
+	[SMCA_DE]		= HWID_MCATYPE(0xB0, 0x3),
+	/* HWID 0xB0 MCATYPE 0x4 is Reserved */
+	[SMCA_EX]		= HWID_MCATYPE(0xB0, 0x5),
+	[SMCA_FP]		= HWID_MCATYPE(0xB0, 0x6),
+	[SMCA_L3_CACHE]		= HWID_MCATYPE(0xB0, 0x7),
+	[SMCA_LS_V2]		= HWID_MCATYPE(0xB0, 0x10),
+
+	/* Platform Security Processor MCA type */
+	[SMCA_PSP]		= HWID_MCATYPE(0xFF, 0x0),
+	[SMCA_PSP_V2]		= HWID_MCATYPE(0xFF, 0x1),
+
+	[SMCA_GMI_PCS]		= HWID_MCATYPE(0x241, 0x0),
+	[SMCA_XGMI_PHY]		= HWID_MCATYPE(0x259, 0x0),
+	[SMCA_WAFL_PHY]		= HWID_MCATYPE(0x267, 0x0),
+	[SMCA_GMI_PHY]		= HWID_MCATYPE(0x269, 0x0),
+};
+
+enum smca_bank_types smca_get_bank_type(u64 ipid)
+{
+	enum smca_bank_types type;
+	u32 hwid_mcatype = HWID_MCATYPE(FIELD_GET(MCI_IPID_HWID, ipid),
+					FIELD_GET(MCI_IPID_MCATYPE, ipid));
+
+	for (type = 0; type < ARRAY_SIZE(smca_hwid_mcatypes); type++) {
+		if (hwid_mcatype == smca_hwid_mcatypes[type])
+			return type;
+	}
+
+	return N_SMCA_BANK_TYPES;
+}
+EXPORT_SYMBOL_GPL(smca_get_bank_type);
+
 /*
  * In SMCA enabled processors, we can have multiple banks for a given IP type.
  * So to define a unique name for each bank, we use a temp c-string to append
@@ -310,11 +391,11 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
 		return;
 	}
 
-	hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID,
-				    (high & MCI_IPID_MCATYPE) >> 16);
+	hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID_OLD,
+				    (high & MCI_IPID_MCATYPE_OLD) >> 16);
 
-	for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
-		s_hwid = &smca_hwid_mcatypes[i];
+	for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes_old); i++) {
+		s_hwid = &smca_hwid_mcatypes_old[i];
 
 		if (hwid_mcatype == s_hwid->hwid_mcatype) {
 			this_cpu_ptr(smca_banks)[bank].hwid = s_hwid;
@@ -724,7 +805,7 @@ static bool smca_mce_is_memory_error(struct mce *m)
 	if (XEC(m->status, 0x3f))
 		return false;
 
-	bank_type = smca_get_bank_type(m->extcpu, m->bank);
+	bank_type = smca_get_bank_type(m->ipid);
 
 	return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
 }
@@ -1097,7 +1178,7 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
 		return th_names[bank];
 	}
 
-	bank_type = smca_get_bank_type(cpu, bank);
+	bank_type = smca_get_bank_type_old(cpu, bank);
 	if (bank_type >= N_SMCA_BANK_TYPES)
 		return NULL;
 
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 1f3520d76861..4b3764ea7c59 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1041,7 +1041,7 @@ static int fixup_node_id(int node_id, struct mce *m)
 	/* MCA_IPID[InstanceIdHi] give the AMD Node ID for the bank. */
 	u8 nid = (m->ipid >> 44) & 0xF;
 
-	if (smca_get_bank_type(m->extcpu, m->bank) != SMCA_UMC_V2)
+	if (smca_get_bank_type(m->ipid) != SMCA_UMC_V2)
 		return node_id;
 
 	/* Nodes below the GPU base node are CPU nodes and don't need a fixup. */
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 8130c3dc64da..e02af5da1ec2 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -731,7 +731,7 @@ static const char *smca_get_long_name(enum smca_bank_types t)
 /* Decode errors according to Scalable MCA specification */
 static void decode_smca_error(struct mce *m)
 {
-	enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
+	enum smca_bank_types bank_type = smca_get_bank_type(m->ipid);
 	u8 xec = XEC(m->status, xec_mask);
 
 	if (bank_type >= N_SMCA_BANK_TYPES)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8ebab6f22e5a..c543600b759b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3546,7 +3546,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
 	 * and error occurred in DramECC (Extended error code = 0) then only
 	 * process the error, else bail out.
 	 */
-	if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
+	if (!m || !((smca_get_bank_type(m->ipid) == SMCA_UMC_V2) &&
 		    (XEC(m->status, 0x3f) == 0x0)))
 		return NOTIFY_DONE;
 
-- 
2.34.1


  parent reply	other threads:[~2024-04-04 15:14 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-04 15:13 [PATCH v2 00/16] MCA Updates Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 01/16] x86/mce: Define mce_setup() helpers for common and per-CPU fields Yazen Ghannam
2024-04-16 10:02   ` Borislav Petkov
2024-04-17 13:50     ` Yazen Ghannam
2024-04-22  8:13       ` Borislav Petkov
2024-04-04 15:13 ` [PATCH v2 02/16] x86/mce: Use mce_setup() helpers for apei_smca_report_x86_error() Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 03/16] x86/mce/amd: Use fixed bank number for quirks Yazen Ghannam
2024-04-04 15:13 ` Yazen Ghannam [this message]
2024-04-23 17:06   ` [PATCH v2 04/16] x86/mce/amd: Look up bank type by IPID Borislav Petkov
2024-04-23 19:16     ` Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 05/16] x86/mce/amd: Clean up SMCA configuration Yazen Ghannam
2024-04-23 19:06   ` Borislav Petkov
2024-04-23 19:32     ` Yazen Ghannam
2024-04-24  2:29       ` Borislav Petkov
2024-04-24 13:44         ` Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 06/16] x86/mce/amd: Prep DFR handler before enabling banks Yazen Ghannam
2024-04-24 18:34   ` Borislav Petkov
2024-04-25 13:31     ` Yazen Ghannam
2024-04-29 12:38       ` Borislav Petkov
2024-04-29 13:22         ` Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 07/16] x86/mce/amd: Simplify DFR handler setup Yazen Ghannam
2024-04-24 19:06   ` Borislav Petkov
2024-04-25 14:12     ` Yazen Ghannam
2024-04-29 12:59       ` Borislav Petkov
2024-04-29 13:56         ` Yazen Ghannam
2024-04-29 14:12           ` Borislav Petkov
2024-04-29 14:25             ` Yazen Ghannam
2024-04-30 13:47               ` Borislav Petkov
2024-04-29 18:34       ` Robert Richter
2024-04-30 18:06         ` Borislav Petkov
2024-05-02 16:02           ` Yazen Ghannam
2024-05-02 18:48             ` Robert Richter
2024-05-04 14:37               ` Borislav Petkov
2024-04-04 15:13 ` [PATCH v2 08/16] x86/mce/amd: Clean up enable_deferred_error_interrupt() Yazen Ghannam
2024-04-29 13:12   ` Borislav Petkov
2024-04-29 14:18     ` Yazen Ghannam
2024-05-04 14:41       ` Borislav Petkov
2024-04-04 15:13 ` [PATCH v2 09/16] x86/mce: Unify AMD THR handler with MCA Polling Yazen Ghannam
2024-04-29 13:40   ` Borislav Petkov
2024-04-29 14:36     ` Yazen Ghannam
2024-05-04 14:52       ` Borislav Petkov
2024-05-07 16:25         ` Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 10/16] x86/mce: Unify AMD DFR " Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 11/16] x86/mce: Skip AMD threshold init if no threshold banks found Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 12/16] x86/mce/amd: Support SMCA Corrected Error Interrupt Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 13/16] x86/mce: Add wrapper for struct mce to export vendor specific info Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 14/16] x86/mce, EDAC/mce_amd: Add support for new MCA_SYND{1,2} registers Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 15/16] x86/mce/apei: Handle variable register array size Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 16/16] EDAC/mce_amd: Add support for FRU Text in MCA Yazen Ghannam
2024-04-05 16:06   ` Luck, Tony
2024-04-07 13:19     ` Yazen Ghannam
2024-04-08 19:47     ` Naik, Avadhut
2024-04-08 19:57       ` Luck, Tony

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240404151359.47970-5-yazen.ghannam@amd.com \
    --to=yazen.ghannam@amd.com \
    --cc=Avadhut.Naik@amd.com \
    --cc=John.Allen@amd.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).