linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs
@ 2019-02-19 20:25 Ghannam, Yazen
  2019-02-19 20:25 ` [PATCH 2/5] EDAC/amd64: Support more than two UMCs Ghannam, Yazen
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Ghannam, Yazen @ 2019-02-19 20:25 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp

From: Yazen Ghannam <yazen.ghannam@amd.com>

Add the new Family 17h Model 30h PCI IDs to the AMD64 EDAC module.

This also fixes a probe failure that appeared when some other PCI IDs
for Fam17hMod30h were added to the AMD NB code.

Fixes: be3518a16ef2 (x86/amd_nb: Add PCI device IDs for family 17h, model 30h)
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/edac/amd64_edac.c | 15 ++++++++++++++-
 drivers/edac/amd64_edac.h |  3 +++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6ea98575a402..9947437d9574 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2211,6 +2211,15 @@ static struct amd64_family_type family_types[] = {
 			.dbam_to_cs		= f17_base_addr_to_cs_size,
 		}
 	},
+	[F17_M30H_CPUS] = {
+		.ctl_name = "F17h_M30h",
+		.f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
+		.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
+		.ops = {
+			.early_channel_count	= f17_early_channel_count,
+			.dbam_to_cs		= f17_base_addr_to_cs_size,
+		}
+	},
 };
 
 /*
@@ -3199,7 +3208,11 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
 		break;
 
 	case 0x17:
-		if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
+		if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
+			fam_type = &family_types[F17_M30H_CPUS];
+			pvt->ops = &family_types[F17_M30H_CPUS].ops;
+			break;
+		} else if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
 			fam_type = &family_types[F17_M10H_CPUS];
 			pvt->ops = &family_types[F17_M10H_CPUS].ops;
 			break;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 4242f8e39c18..de8dbb0b42b5 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -117,6 +117,8 @@
 #define PCI_DEVICE_ID_AMD_17H_DF_F6	0x1466
 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8
 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee
+#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490
+#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
 
 /*
  * Function 1 - Address Map
@@ -284,6 +286,7 @@ enum amd_families {
 	F16_M30H_CPUS,
 	F17_CPUS,
 	F17_M10H_CPUS,
+	F17_M30H_CPUS,
 	NUM_FAMILIES,
 };
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] EDAC/amd64: Support more than two UMCs
  2019-02-19 20:25 [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Ghannam, Yazen
@ 2019-02-19 20:25 ` Ghannam, Yazen
  2019-02-26 11:07   ` Borislav Petkov
  2019-02-19 20:25 ` [PATCH 3/5] EDAC/amd64: Recognize x16 Symbol Size Ghannam, Yazen
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Ghannam, Yazen @ 2019-02-19 20:25 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp

From: Yazen Ghannam <yazen.ghannam@amd.com>

The first few models of Family 17h all had 2 UMCs per Die, so we treated
this as a fixed value. However, future systems may have more UMCs per
Die.

Related to this, we were finding the channel number and base address of
a UMC by matching on fixed, known values. However, a pattern has emerged
so we no longer need to match on hardcoded values.

Set the number of UMCs at init time based on the Family/Model. Also,
update the functions that find the channel number and base address of a
UMC in order to support more than two UMCs.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/edac/amd64_edac.c | 42 ++++++++++++++++++---------------------
 drivers/edac/amd64_edac.h | 20 +++++++++++++++----
 2 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9947437d9574..507d824fe45a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -449,6 +449,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
 #define for_each_chip_select_mask(i, dct, pvt) \
 	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
 
+#define for_each_umc(i) \
+	for (i = 0; i < num_umcs; i++)
+
 /*
  * @input_addr is an InputAddr associated with the node given by mci. Return the
  * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
@@ -722,7 +725,7 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
 	if (pvt->umc) {
 		u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
 
-		for (i = 0; i < NUM_UMCS; i++) {
+		for_each_umc(i) {
 			if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
 				continue;
 
@@ -811,7 +814,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
 	struct amd64_umc *umc;
 	u32 i, tmp, umc_base;
 
-	for (i = 0; i < NUM_UMCS; i++) {
+	for_each_umc(i) {
 		umc_base = get_umc_base(i);
 		umc = &pvt->umc[i];
 
@@ -1388,7 +1391,7 @@ static int f17_early_channel_count(struct amd64_pvt *pvt)
 	int i, channels = 0;
 
 	/* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
-	for (i = 0; i < NUM_UMCS; i++)
+	for_each_umc(i)
 		channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
 
 	amd64_info("MCT channel count: %d\n", channels);
@@ -2473,18 +2476,14 @@ static inline void decode_bus_error(int node_id, struct mce *m)
  * To find the UMC channel represented by this bank we need to match on its
  * instance_id. The instance_id of a bank is held in the lower 32 bits of its
  * IPID.
+ *
+ * Currently, we can derive the channel number by looking at the 6th byte in
+ * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
+ * number.
  */
-static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
+static int find_umc_channel(struct mce *m)
 {
-	u32 umc_instance_id[] = {0x50f00, 0x150f00};
-	u32 instance_id = m->ipid & GENMASK(31, 0);
-	int i, channel = -1;
-
-	for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
-		if (umc_instance_id[i] == instance_id)
-			channel = i;
-
-	return channel;
+	return (m->ipid & GENMASK(31, 0)) >> 20;
 }
 
 static void decode_umc_error(int node_id, struct mce *m)
@@ -2506,11 +2505,7 @@ static void decode_umc_error(int node_id, struct mce *m)
 	if (m->status & MCI_STATUS_DEFERRED)
 		ecc_type = 3;
 
-	err.channel = find_umc_channel(pvt, m);
-	if (err.channel < 0) {
-		err.err_code = ERR_CHANNEL;
-		goto log_error;
-	}
+	err.channel = find_umc_channel(m);
 
 	if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
 		err.err_code = ERR_NORM_ADDR;
@@ -2612,7 +2607,7 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
 	if (pvt->umc) {
 		u8 i;
 
-		for (i = 0; i < NUM_UMCS; i++) {
+		for_each_umc(i) {
 			/* Check enabled channels only: */
 			if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
 			    (pvt->umc[i].ecc_ctrl & BIT(7))) {
@@ -2648,7 +2643,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
 	u32 i, umc_base;
 
 	/* Read registers from each UMC */
-	for (i = 0; i < NUM_UMCS; i++) {
+	for_each_umc(i) {
 
 		umc_base = get_umc_base(i);
 		umc = &pvt->umc[i];
@@ -3061,7 +3056,8 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 	if (boot_cpu_data.x86 >= 0x17) {
 		u8 umc_en_mask = 0, ecc_en_mask = 0;
 
-		for (i = 0; i < NUM_UMCS; i++) {
+		set_num_umcs();
+		for_each_umc(i) {
 			u32 base = get_umc_base(i);
 
 			/* Only check enabled UMCs. */
@@ -3114,7 +3110,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
 {
 	u8 i, ecc_en = 1, cpk_en = 1;
 
-	for (i = 0; i < NUM_UMCS; i++) {
+	for_each_umc(i) {
 		if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
 			ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
 			cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
@@ -3273,7 +3269,7 @@ static int init_one_instance(unsigned int nid)
 		goto err_free;
 
 	if (pvt->fam >= 0x17) {
-		pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
+		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
 		if (!pvt->umc) {
 			ret = -ENOMEM;
 			goto err_free;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index de8dbb0b42b5..435450bf8684 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -274,8 +274,6 @@
 
 #define UMC_SDP_INIT			BIT(31)
 
-#define NUM_UMCS			2
-
 enum amd_families {
 	K8_CPUS = 0,
 	F10_CPUS,
@@ -399,8 +397,22 @@ struct err_info {
 
 static inline u32 get_umc_base(u8 channel)
 {
-	/* ch0: 0x50000, ch1: 0x150000 */
-	return 0x50000 + (!!channel << 20);
+	/* chY: 0xY50000 */
+	return 0x50000 + (channel << 20);
+}
+
+static u8 num_umcs;
+
+static inline void set_num_umcs(void)
+{
+	u8 model = boot_cpu_data.x86_model;
+
+	if (model >= 0x30 && model <= 0x3f)
+		num_umcs = 8;
+	else
+		num_umcs = 2;
+
+	edac_dbg(1, "Number of UMCs: %x", num_umcs);
 }
 
 static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] EDAC/amd64: Recognize x16 Symbol Size
  2019-02-19 20:25 [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Ghannam, Yazen
  2019-02-19 20:25 ` [PATCH 2/5] EDAC/amd64: Support more than two UMCs Ghannam, Yazen
@ 2019-02-19 20:25 ` Ghannam, Yazen
  2019-02-26 13:17   ` Borislav Petkov
  2019-02-19 20:26 ` [PATCH 4/5] EDAC/amd64: Support more than two Controllers for Chip Select handling Ghannam, Yazen
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Ghannam, Yazen @ 2019-02-19 20:25 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp

From: Yazen Ghannam <yazen.ghannam@amd.com>

Future AMD systems may support x16 symbol sizes.

Recognize if a system is using x16 symbol size. Also, simplify the print
statement.

Note that a x16 syndrome vector table is not necessary like with x4 or
x8. This is because systems that support x16 symbol sizes will be SMCA
systems. In which case, the syndrome can be directly extracted from the
MCA_SYND[Syndrome] field.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/edac/amd64_edac.c | 15 +++++++++------
 drivers/edac/amd64_edac.h |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 507d824fe45a..bacd2cb22f29 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -897,8 +897,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
 
 	edac_dbg(1, "  DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
 
-	amd64_info("using %s syndromes.\n",
-			((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
+	amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
 }
 
 /*
@@ -2609,10 +2608,14 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
 
 		for_each_umc(i) {
 			/* Check enabled channels only: */
-			if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
-			    (pvt->umc[i].ecc_ctrl & BIT(7))) {
-				pvt->ecc_sym_sz = 8;
-				break;
+			if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
+				if (pvt->umc[i].ecc_ctrl & BIT(9)) {
+					pvt->ecc_sym_sz = 16;
+					break;
+				} else if (pvt->umc[i].ecc_ctrl & BIT(7)) {
+					pvt->ecc_sym_sz = 8;
+					break;
+				}
 			}
 		}
 
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 435450bf8684..6c6ce783208a 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -364,7 +364,7 @@ struct amd64_pvt {
 	u32 dct_sel_hi;		/* DRAM Controller Select High */
 	u32 online_spare;	/* On-Line spare Reg */
 
-	/* x4 or x8 syndromes in use */
+	/* x4, x8, or x16 syndromes in use */
 	u8 ecc_sym_sz;
 
 	/* place to store error injection parameters prior to issue */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] EDAC/amd64: Support more than two Controllers for Chip Select handling
  2019-02-19 20:25 [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Ghannam, Yazen
  2019-02-19 20:25 ` [PATCH 2/5] EDAC/amd64: Support more than two UMCs Ghannam, Yazen
  2019-02-19 20:25 ` [PATCH 3/5] EDAC/amd64: Recognize x16 Symbol Size Ghannam, Yazen
@ 2019-02-19 20:26 ` Ghannam, Yazen
  2019-02-26 13:24   ` Borislav Petkov
  2019-02-19 20:26 ` [PATCH 5/5] EDAC/amd64: Adjust printed Chip Select sizes when interleaved Ghannam, Yazen
  2019-02-26 10:41 ` [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Borislav Petkov
  4 siblings, 1 reply; 11+ messages in thread
From: Ghannam, Yazen @ 2019-02-19 20:26 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp

From: Yazen Ghannam <yazen.ghannam@amd.com>

The struct chip_select array that's used for saving Chip Select bases
and masks is fixed at length of two. There should be one struct
chip_select for each controller, so this array should be increased to
support systems that may have more than two controllers.

Increase the size of the struct chip_select array to eight, which is the
largest number of controllers per die currently supported on AMD
systems.

Also, carve out the Fam17h+ reading of the bases/masks into a separate
function. This effectively reverts the original bases/masks reading code
to pre-Fam17h support.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/edac/amd64_edac.c | 113 ++++++++++++++++++++------------------
 drivers/edac/amd64_edac.h |   5 +-
 2 files changed, 62 insertions(+), 56 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index bacd2cb22f29..656788699c64 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -911,89 +911,94 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
 	} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
+	} else if (pvt->fam >= 0x17) {
+		int umc;
+
+		for_each_umc(umc) {
+			pvt->csels[umc].b_cnt = 8;
+			pvt->csels[umc].m_cnt = 4;
+		}
+
 	} else {
 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
 	}
 }
 
+static void read_umc_base_mask(struct amd64_pvt *pvt)
+{
+	int cs, umc;
+
+	for_each_umc(umc) {
+		u32 umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR;
+		u32 umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
+
+		for_each_chip_select(cs, 0, pvt) {
+			u32 *base = &pvt->csels[umc].csbases[cs];
+			u32 *mask = &pvt->csels[umc].csmasks[cs];
+
+			u32 base_reg = umc_base_reg + (cs * 4);
+			u32 mask_reg = umc_mask_reg + ((cs >> 1) * 4);
+
+			if (!amd_smn_read(pvt->mc_node_id, base_reg, base))
+				edac_dbg(0, "  DCSB%d[%d]=0x%08x reg: 0x%x\n",
+					 umc, cs, *base, base_reg);
+
+			if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask))
+				edac_dbg(0, "    DCSM%d[%d]=0x%08x reg: 0x%x\n",
+					 umc, cs, *mask, mask_reg);
+		}
+	}
+}
+
 /*
  * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
  */
 static void read_dct_base_mask(struct amd64_pvt *pvt)
 {
-	int base_reg0, base_reg1, mask_reg0, mask_reg1, cs;
+	int cs;
 
 	prep_chip_selects(pvt);
 
-	if (pvt->umc) {
-		base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR;
-		base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR;
-		mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK;
-		mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK;
-	} else {
-		base_reg0 = DCSB0;
-		base_reg1 = DCSB1;
-		mask_reg0 = DCSM0;
-		mask_reg1 = DCSM1;
-	}
+	if (pvt->umc)
+		return read_umc_base_mask(pvt);
 
 	for_each_chip_select(cs, 0, pvt) {
-		int reg0   = base_reg0 + (cs * 4);
-		int reg1   = base_reg1 + (cs * 4);
+		int reg0   = DCSB0 + (cs * 4);
+		int reg1   = DCSB1 + (cs * 4);
 		u32 *base0 = &pvt->csels[0].csbases[cs];
 		u32 *base1 = &pvt->csels[1].csbases[cs];
 
-		if (pvt->umc) {
-			if (!amd_smn_read(pvt->mc_node_id, reg0, base0))
-				edac_dbg(0, "  DCSB0[%d]=0x%08x reg: 0x%x\n",
-					 cs, *base0, reg0);
+		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
+			edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
+				 cs, *base0, reg0);
 
-			if (!amd_smn_read(pvt->mc_node_id, reg1, base1))
-				edac_dbg(0, "  DCSB1[%d]=0x%08x reg: 0x%x\n",
-					 cs, *base1, reg1);
-		} else {
-			if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
-				edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
-					 cs, *base0, reg0);
-
-			if (pvt->fam == 0xf)
-				continue;
+		if (pvt->fam == 0xf)
+			continue;
 
-			if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
-				edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
-					 cs, *base1, (pvt->fam == 0x10) ? reg1
-								: reg0);
-		}
+		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
+			edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
+				 cs, *base1, (pvt->fam == 0x10) ? reg1
+							: reg0);
 	}
 
 	for_each_chip_select_mask(cs, 0, pvt) {
-		int reg0   = mask_reg0 + (cs * 4);
-		int reg1   = mask_reg1 + (cs * 4);
+		int reg0   = DCSM0 + (cs * 4);
+		int reg1   = DCSM1 + (cs * 4);
 		u32 *mask0 = &pvt->csels[0].csmasks[cs];
 		u32 *mask1 = &pvt->csels[1].csmasks[cs];
 
-		if (pvt->umc) {
-			if (!amd_smn_read(pvt->mc_node_id, reg0, mask0))
-				edac_dbg(0, "    DCSM0[%d]=0x%08x reg: 0x%x\n",
-					 cs, *mask0, reg0);
-
-			if (!amd_smn_read(pvt->mc_node_id, reg1, mask1))
-				edac_dbg(0, "    DCSM1[%d]=0x%08x reg: 0x%x\n",
-					 cs, *mask1, reg1);
-		} else {
-			if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
-				edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
-					 cs, *mask0, reg0);
+		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
+			edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
+				 cs, *mask0, reg0);
 
-			if (pvt->fam == 0xf)
-				continue;
+		if (pvt->fam == 0xf)
+			continue;
 
-			if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
-				edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
-					 cs, *mask1, (pvt->fam == 0x10) ? reg1
-								: reg0);
-		}
+		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
+			edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
+				 cs, *mask1, (pvt->fam == 0x10) ? reg1
+							: reg0);
 	}
 }
 
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 6c6ce783208a..f672ed3325db 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -96,6 +96,7 @@
 /* Hardware limit on ChipSelect rows per MC and processors per system */
 #define NUM_CHIPSELECTS			8
 #define DRAM_RANGES			8
+#define NUM_CONTROLLERS			8
 
 #define ON true
 #define OFF false
@@ -351,8 +352,8 @@ struct amd64_pvt {
 	u32 dbam0;		/* DRAM Base Address Mapping reg for DCT0 */
 	u32 dbam1;		/* DRAM Base Address Mapping reg for DCT1 */
 
-	/* one for each DCT */
-	struct chip_select csels[2];
+	/* one for each DCT/UMC */
+	struct chip_select csels[NUM_CONTROLLERS];
 
 	/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
 	struct dram_range ranges[DRAM_RANGES];
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] EDAC/amd64: Adjust printed Chip Select sizes when interleaved
  2019-02-19 20:25 [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Ghannam, Yazen
                   ` (2 preceding siblings ...)
  2019-02-19 20:26 ` [PATCH 4/5] EDAC/amd64: Support more than two Controllers for Chip Select handling Ghannam, Yazen
@ 2019-02-19 20:26 ` Ghannam, Yazen
  2019-02-26 10:41 ` [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Borislav Petkov
  4 siblings, 0 replies; 11+ messages in thread
From: Ghannam, Yazen @ 2019-02-19 20:26 UTC (permalink / raw)
  To: linux-edac; +Cc: Ghannam, Yazen, linux-kernel, bp

From: Yazen Ghannam <yazen.ghannam@amd.com>

AMD systems may support Chip Select interleaving. However, on Fam17h+
this was not taken into account when printing the Chip Select sizes.

Add support to detect if Chip Selects are interleaved on Fam17h+, and
adjust the sizes accordingly.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/edac/amd64_edac.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 656788699c64..073bfd112c44 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -784,6 +784,22 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
 		 (dclr & BIT(15)) ?  "yes" : "no");
 }
 
+/*
+ * The Address Mask should be a contiguous set of bits in the non-interleaved
+ * case. So to check for CS interleaving, find the most- and least-significant
+ * bits of the mask, generate a contiguous bitmask, and compare the two.
+ */
+static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs)
+{
+	u32 mask = pvt->csels[ctrl].csmasks[cs >> 1];
+	u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1;
+	u32 test_mask = GENMASK(msb, lsb);
+
+	edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask);
+
+	return mask ^ test_mask;
+}
+
 static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
 {
 	int dimm, size0, size1, cs0, cs1;
@@ -800,8 +816,19 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
 		size1 = 0;
 		cs1 = dimm * 2 + 1;
 
-		if (csrow_enabled(cs1, ctrl, pvt))
-			size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
+		if (csrow_enabled(cs1, ctrl, pvt)) {
+			/*
+			 * CS interleaving is only supported if both CSes have
+			 * the same amount of memory. Because they are
+			 * interleaved, it will look like both CSes have the
+			 * full amount of memory. Save the size for both as
+			 * half the amount we found on CS0, if interleaved.
+			 */
+			if (f17_cs_interleaved(pvt, ctrl, cs1))
+				size1 = size0 = (size0 >> 1);
+			else
+				size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
+		}
 
 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
 				cs0,	size0,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs
  2019-02-19 20:25 [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Ghannam, Yazen
                   ` (3 preceding siblings ...)
  2019-02-19 20:26 ` [PATCH 5/5] EDAC/amd64: Adjust printed Chip Select sizes when interleaved Ghannam, Yazen
@ 2019-02-26 10:41 ` Borislav Petkov
  2019-02-26 15:07   ` Ghannam, Yazen
  4 siblings, 1 reply; 11+ messages in thread
From: Borislav Petkov @ 2019-02-26 10:41 UTC (permalink / raw)
  To: Ghannam, Yazen; +Cc: linux-edac, linux-kernel

Just nitpicks:

On Tue, Feb 19, 2019 at 08:25:51PM +0000, Ghannam, Yazen wrote:

> Subject: Re: [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs

Write that "Fam17hMod30h" in a human readable form pls. Especially in
the subject: "family 0x17, models 0x30..."

> From: Yazen Ghannam <yazen.ghannam@amd.com>
> 
> Add the new Family 17h Model 30h PCI IDs to the AMD64 EDAC module.
> 
> This also fixes a probe failure that appeared when some other PCI IDs
> for Fam17hMod30h were added to the AMD NB code.
> 
> Fixes: be3518a16ef2 (x86/amd_nb: Add PCI device IDs for family 17h, model 30h)
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> ---
>  drivers/edac/amd64_edac.c | 15 ++++++++++++++-
>  drivers/edac/amd64_edac.h |  3 +++
>  2 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 6ea98575a402..9947437d9574 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -2211,6 +2211,15 @@ static struct amd64_family_type family_types[] = {
>  			.dbam_to_cs		= f17_base_addr_to_cs_size,
>  		}
>  	},
> +	[F17_M30H_CPUS] = {
> +		.ctl_name = "F17h_M30h",
> +		.f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
> +		.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
> +		.ops = {
> +			.early_channel_count	= f17_early_channel_count,
> +			.dbam_to_cs		= f17_base_addr_to_cs_size,
> +		}
> +	},
>  };
>  
>  /*
> @@ -3199,7 +3208,11 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
>  		break;
>  
>  	case 0x17:
> -		if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
> +		if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
> +			fam_type = &family_types[F17_M30H_CPUS];
> +			pvt->ops = &family_types[F17_M30H_CPUS].ops;
> +			break;
> +		} else if (pvt->model >= 0x10 && pvt->model <= 0x2f) {

Sort the checks in increasing models so that it is easier to follow.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] EDAC/amd64: Support more than two UMCs
  2019-02-19 20:25 ` [PATCH 2/5] EDAC/amd64: Support more than two UMCs Ghannam, Yazen
@ 2019-02-26 11:07   ` Borislav Petkov
  2019-02-26 15:12     ` Ghannam, Yazen
  0 siblings, 1 reply; 11+ messages in thread
From: Borislav Petkov @ 2019-02-26 11:07 UTC (permalink / raw)
  To: Ghannam, Yazen; +Cc: linux-edac, linux-kernel

On Tue, Feb 19, 2019 at 08:25:53PM +0000, Ghannam, Yazen wrote:
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> 
> The first few models of Family 17h all had 2 UMCs per Die, so we treated

Write out what that abbreviation UMC is.

> this as a fixed value. However, future systems may have more UMCs per
> Die.
> 
> Related to this, we were finding the channel number and base address of

Passive tone pls, no "we". From Documentation/process/submitting-patches.rst:

 "Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
  instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
  to do frotz", as if you are giving orders to the codebase to change
  its behaviour."

> a UMC by matching on fixed, known values. However, a pattern has emerged
> so we no longer need to match on hardcoded values.

a pattern has emerged?!

> Set the number of UMCs at init time based on the Family/Model. Also,
> update the functions that find the channel number and base address of a
> UMC in order to support more than two UMCs.
> 
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> ---
>  drivers/edac/amd64_edac.c | 42 ++++++++++++++++++---------------------
>  drivers/edac/amd64_edac.h | 20 +++++++++++++++----
>  2 files changed, 35 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 9947437d9574..507d824fe45a 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -449,6 +449,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
>  #define for_each_chip_select_mask(i, dct, pvt) \
>  	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
>  
> +#define for_each_umc(i) \
> +	for (i = 0; i < num_umcs; i++)

That change and the resulting conversion needs to be a separate patch so
that the diff doesn't distract from the UMC count figuring out addition.

>  /*
>   * @input_addr is an InputAddr associated with the node given by mci. Return the
>   * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
> @@ -722,7 +725,7 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
>  	if (pvt->umc) {
>  		u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
>  
> -		for (i = 0; i < NUM_UMCS; i++) {
> +		for_each_umc(i) {
>  			if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
>  				continue;
>  
> @@ -811,7 +814,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
>  	struct amd64_umc *umc;
>  	u32 i, tmp, umc_base;
>  
> -	for (i = 0; i < NUM_UMCS; i++) {
> +	for_each_umc(i) {
>  		umc_base = get_umc_base(i);
>  		umc = &pvt->umc[i];
>  
> @@ -1388,7 +1391,7 @@ static int f17_early_channel_count(struct amd64_pvt *pvt)
>  	int i, channels = 0;
>  
>  	/* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
> -	for (i = 0; i < NUM_UMCS; i++)
> +	for_each_umc(i)
>  		channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
>  
>  	amd64_info("MCT channel count: %d\n", channels);
> @@ -2473,18 +2476,14 @@ static inline void decode_bus_error(int node_id, struct mce *m)
>   * To find the UMC channel represented by this bank we need to match on its
>   * instance_id. The instance_id of a bank is held in the lower 32 bits of its
>   * IPID.
> + *
> + * Currently, we can derive the channel number by looking at the 6th byte in
> + * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
> + * number.
>   */
> -static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
> +static int find_umc_channel(struct mce *m)
>  {
> -	u32 umc_instance_id[] = {0x50f00, 0x150f00};
> -	u32 instance_id = m->ipid & GENMASK(31, 0);
> -	int i, channel = -1;
> -
> -	for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
> -		if (umc_instance_id[i] == instance_id)
> -			channel = i;
> -
> -	return channel;
> +	return (m->ipid & GENMASK(31, 0)) >> 20;
>  }
>  
>  static void decode_umc_error(int node_id, struct mce *m)
> @@ -2506,11 +2505,7 @@ static void decode_umc_error(int node_id, struct mce *m)
>  	if (m->status & MCI_STATUS_DEFERRED)
>  		ecc_type = 3;
>  
> -	err.channel = find_umc_channel(pvt, m);
> -	if (err.channel < 0) {
> -		err.err_code = ERR_CHANNEL;
> -		goto log_error;
> -	}
> +	err.channel = find_umc_channel(m);
>  
>  	if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
>  		err.err_code = ERR_NORM_ADDR;
> @@ -2612,7 +2607,7 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
>  	if (pvt->umc) {
>  		u8 i;
>  
> -		for (i = 0; i < NUM_UMCS; i++) {
> +		for_each_umc(i) {
>  			/* Check enabled channels only: */
>  			if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
>  			    (pvt->umc[i].ecc_ctrl & BIT(7))) {
> @@ -2648,7 +2643,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
>  	u32 i, umc_base;
>  
>  	/* Read registers from each UMC */
> -	for (i = 0; i < NUM_UMCS; i++) {
> +	for_each_umc(i) {
>  
>  		umc_base = get_umc_base(i);
>  		umc = &pvt->umc[i];
> @@ -3061,7 +3056,8 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
>  	if (boot_cpu_data.x86 >= 0x17) {
>  		u8 umc_en_mask = 0, ecc_en_mask = 0;
>  
> -		for (i = 0; i < NUM_UMCS; i++) {
> +		set_num_umcs();

Do you think this call belongs here conceptually?

It is called once per each driver instance even though the number is
static and needs to be computed only once, at driver init.

Also, the function name should be something like "compute_num_umcs" or
so.

> +		for_each_umc(i) {
>  			u32 base = get_umc_base(i);
>  
>  			/* Only check enabled UMCs. */
> @@ -3114,7 +3110,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
>  {
>  	u8 i, ecc_en = 1, cpk_en = 1;
>  
> -	for (i = 0; i < NUM_UMCS; i++) {
> +	for_each_umc(i) {
>  		if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
>  			ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
>  			cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
> @@ -3273,7 +3269,7 @@ static int init_one_instance(unsigned int nid)
>  		goto err_free;
>  
>  	if (pvt->fam >= 0x17) {
> -		pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
> +		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
>  		if (!pvt->umc) {
>  			ret = -ENOMEM;
>  			goto err_free;
> diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
> index de8dbb0b42b5..435450bf8684 100644
> --- a/drivers/edac/amd64_edac.h
> +++ b/drivers/edac/amd64_edac.h
> @@ -274,8 +274,6 @@
>  
>  #define UMC_SDP_INIT			BIT(31)
>  
> -#define NUM_UMCS			2
> -
>  enum amd_families {
>  	K8_CPUS = 0,
>  	F10_CPUS,
> @@ -399,8 +397,22 @@ struct err_info {
>  
>  static inline u32 get_umc_base(u8 channel)
>  {
> -	/* ch0: 0x50000, ch1: 0x150000 */
> -	return 0x50000 + (!!channel << 20);
> +	/* chY: 0xY50000 */
> +	return 0x50000 + (channel << 20);
> +}
> +
> +static u8 num_umcs;
> +

You and I know what "UMC" means but I doubt the reader does. Please add
some blurb here so that it is clear what it is.

> +static inline void set_num_umcs(void)
> +{
> +	u8 model = boot_cpu_data.x86_model;
> +
> +	if (model >= 0x30 && model <= 0x3f)
> +		num_umcs = 8;
> +	else
> +		num_umcs = 2;
> +
> +	edac_dbg(1, "Number of UMCs: %x", num_umcs);
>  }

Why is this function inline and in the header? I don't see anything
special about it and should be in amd64_edac.c instead, AFAICT.

Thx.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/5] EDAC/amd64: Recognize x16 Symbol Size
  2019-02-19 20:25 ` [PATCH 3/5] EDAC/amd64: Recognize x16 Symbol Size Ghannam, Yazen
@ 2019-02-26 13:17   ` Borislav Petkov
  0 siblings, 0 replies; 11+ messages in thread
From: Borislav Petkov @ 2019-02-26 13:17 UTC (permalink / raw)
  To: Ghannam, Yazen; +Cc: linux-edac, linux-kernel

On Tue, Feb 19, 2019 at 08:25:54PM +0000, Ghannam, Yazen wrote:
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> 
> Future AMD systems may support x16 symbol sizes.
> 
> Recognize if a system is using x16 symbol size. Also, simplify the print
> statement.
> 
> Note that a x16 syndrome vector table is not necessary like with x4 or
> x8. This is because systems that support x16 symbol sizes will be SMCA
> systems. In which case, the syndrome can be directly extracted from the
> MCA_SYND[Syndrome] field.
> 
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> ---
>  drivers/edac/amd64_edac.c | 15 +++++++++------
>  drivers/edac/amd64_edac.h |  2 +-
>  2 files changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 507d824fe45a..bacd2cb22f29 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -897,8 +897,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
>  
>  	edac_dbg(1, "  DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
>  
> -	amd64_info("using %s syndromes.\n",
> -			((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
> +	amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
>  }
>  
>  /*
> @@ -2609,10 +2608,14 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
>  
>  		for_each_umc(i) {
>  			/* Check enabled channels only: */
> -			if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
> -			    (pvt->umc[i].ecc_ctrl & BIT(7))) {
> -				pvt->ecc_sym_sz = 8;
> -				break;
> +			if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
> +				if (pvt->umc[i].ecc_ctrl & BIT(9)) {
> +					pvt->ecc_sym_sz = 16;
> +					break;
> +				} else if (pvt->umc[i].ecc_ctrl & BIT(7)) {
> +					pvt->ecc_sym_sz = 8;
> +					break;
> +				}
>  			}
>  		}
>  

Let's simplify this function a bit. Diff ontop:

---
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index bacd2cb22f29..df21b00dca08 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2611,18 +2611,14 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
 			if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
 				if (pvt->umc[i].ecc_ctrl & BIT(9)) {
 					pvt->ecc_sym_sz = 16;
-					break;
+					return;
 				} else if (pvt->umc[i].ecc_ctrl & BIT(7)) {
 					pvt->ecc_sym_sz = 8;
-					break;
+					return;
 				}
 			}
 		}
-
-		return;
-	}
-
-	if (pvt->fam >= 0x10) {
+	} else if (pvt->fam >= 0x10) {
 		u32 tmp;
 
 		amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/5] EDAC/amd64: Support more than two Controllers for Chip Select handling
  2019-02-19 20:26 ` [PATCH 4/5] EDAC/amd64: Support more than two Controllers for Chip Select handling Ghannam, Yazen
@ 2019-02-26 13:24   ` Borislav Petkov
  0 siblings, 0 replies; 11+ messages in thread
From: Borislav Petkov @ 2019-02-26 13:24 UTC (permalink / raw)
  To: Ghannam, Yazen; +Cc: linux-edac, linux-kernel

On Tue, Feb 19, 2019 at 08:26:09PM +0000, Ghannam, Yazen wrote:
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> 
> The struct chip_select array that's used for saving Chip Select bases
> and masks is fixed at length of two. There should be one struct
> chip_select for each controller, so this array should be increased to
> support systems that may have more than two controllers.
> 
> Increase the size of the struct chip_select array to eight, which is the
> largest number of controllers per die currently supported on AMD
> systems.
> 
> Also, carve out the Fam17h+ reading of the bases/masks into a separate
> function. This effectively reverts the original bases/masks reading code
> to pre-Fam17h support.

I like it simpler again. :-)

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs
  2019-02-26 10:41 ` [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Borislav Petkov
@ 2019-02-26 15:07   ` Ghannam, Yazen
  0 siblings, 0 replies; 11+ messages in thread
From: Ghannam, Yazen @ 2019-02-26 15:07 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-edac, linux-kernel

> -----Original Message-----
> From: linux-edac-owner@vger.kernel.org <linux-edac-owner@vger.kernel.org> On Behalf Of Borislav Petkov
> Sent: Tuesday, February 26, 2019 4:41 AM
> To: Ghannam, Yazen <Yazen.Ghannam@amd.com>
> Cc: linux-edac@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs
> 
> Just nitpicks:
> 
> On Tue, Feb 19, 2019 at 08:25:51PM +0000, Ghannam, Yazen wrote:
> 
> > Subject: Re: [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs
> 
> Write that "Fam17hMod30h" in a human readable form pls. Especially in
> the subject: "family 0x17, models 0x30..."
> 

Will do.

> > From: Yazen Ghannam <yazen.ghannam@amd.com>
> >
> > Add the new Family 17h Model 30h PCI IDs to the AMD64 EDAC module.
> >
> > This also fixes a probe failure that appeared when some other PCI IDs
> > for Fam17hMod30h were added to the AMD NB code.
> >
> > Fixes: be3518a16ef2 (x86/amd_nb: Add PCI device IDs for family 17h, model 30h)
> > Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> > ---
> >  drivers/edac/amd64_edac.c | 15 ++++++++++++++-
> >  drivers/edac/amd64_edac.h |  3 +++
> >  2 files changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> > index 6ea98575a402..9947437d9574 100644
> > --- a/drivers/edac/amd64_edac.c
> > +++ b/drivers/edac/amd64_edac.c
> > @@ -2211,6 +2211,15 @@ static struct amd64_family_type family_types[] = {
> >  			.dbam_to_cs		= f17_base_addr_to_cs_size,
> >  		}
> >  	},
> > +	[F17_M30H_CPUS] = {
> > +		.ctl_name = "F17h_M30h",
> > +		.f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
> > +		.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
> > +		.ops = {
> > +			.early_channel_count	= f17_early_channel_count,
> > +			.dbam_to_cs		= f17_base_addr_to_cs_size,
> > +		}
> > +	},
> >  };
> >
> >  /*
> > @@ -3199,7 +3208,11 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
> >  		break;
> >
> >  	case 0x17:
> > -		if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
> > +		if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
> > +			fam_type = &family_types[F17_M30H_CPUS];
> > +			pvt->ops = &family_types[F17_M30H_CPUS].ops;
> > +			break;
> > +		} else if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
> 
> Sort the checks in increasing models so that it is easier to follow.
> 

Okay, will do.

Thanks,
Yazen

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 2/5] EDAC/amd64: Support more than two UMCs
  2019-02-26 11:07   ` Borislav Petkov
@ 2019-02-26 15:12     ` Ghannam, Yazen
  0 siblings, 0 replies; 11+ messages in thread
From: Ghannam, Yazen @ 2019-02-26 15:12 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: linux-edac, linux-kernel

> -----Original Message-----
> From: linux-kernel-owner@vger.kernel.org <linux-kernel-owner@vger.kernel.org> On Behalf Of Borislav Petkov
> Sent: Tuesday, February 26, 2019 5:07 AM
> To: Ghannam, Yazen <Yazen.Ghannam@amd.com>
> Cc: linux-edac@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 2/5] EDAC/amd64: Support more than two UMCs
> 
> On Tue, Feb 19, 2019 at 08:25:53PM +0000, Ghannam, Yazen wrote:
> > From: Yazen Ghannam <yazen.ghannam@amd.com>
> >
> > The first few models of Family 17h all had 2 UMCs per Die, so we treated
> 
> Write out what that abbreviation UMC is.
> 

Okay.

> > this as a fixed value. However, future systems may have more UMCs per
> > Die.
> >
> > Related to this, we were finding the channel number and base address of
> 
> Passive tone pls, no "we". From Documentation/process/submitting-patches.rst:
> 
>  "Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
>   instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
>   to do frotz", as if you are giving orders to the codebase to change
>   its behaviour."
> 

Okay.

> > a UMC by matching on fixed, known values. However, a pattern has emerged
> > so we no longer need to match on hardcoded values.
> 
> a pattern has emerged?!
> 

Yes, that's right. Sounds pretty mysterious... 😊

I'll write it out to make it clear.

> > Set the number of UMCs at init time based on the Family/Model. Also,
> > update the functions that find the channel number and base address of a
> > UMC in order to support more than two UMCs.
> >
> > Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> > ---
> >  drivers/edac/amd64_edac.c | 42 ++++++++++++++++++---------------------
> >  drivers/edac/amd64_edac.h | 20 +++++++++++++++----
> >  2 files changed, 35 insertions(+), 27 deletions(-)
> >
> > diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> > index 9947437d9574..507d824fe45a 100644
> > --- a/drivers/edac/amd64_edac.c
> > +++ b/drivers/edac/amd64_edac.c
> > @@ -449,6 +449,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
> >  #define for_each_chip_select_mask(i, dct, pvt) \
> >  	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
> >
> > +#define for_each_umc(i) \
> > +	for (i = 0; i < num_umcs; i++)
> 
> That change and the resulting conversion needs to be a separate patch so
> that the diff doesn't distract from the UMC count figuring out addition.
> 

Okay.


> >  /*
> >   * @input_addr is an InputAddr associated with the node given by mci. Return the
> >   * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
> > @@ -722,7 +725,7 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
> >  	if (pvt->umc) {
> >  		u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
> >
> > -		for (i = 0; i < NUM_UMCS; i++) {
> > +		for_each_umc(i) {
> >  			if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
> >  				continue;
> >
> > @@ -811,7 +814,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
> >  	struct amd64_umc *umc;
> >  	u32 i, tmp, umc_base;
> >
> > -	for (i = 0; i < NUM_UMCS; i++) {
> > +	for_each_umc(i) {
> >  		umc_base = get_umc_base(i);
> >  		umc = &pvt->umc[i];
> >
> > @@ -1388,7 +1391,7 @@ static int f17_early_channel_count(struct amd64_pvt *pvt)
> >  	int i, channels = 0;
> >
> >  	/* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
> > -	for (i = 0; i < NUM_UMCS; i++)
> > +	for_each_umc(i)
> >  		channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
> >
> >  	amd64_info("MCT channel count: %d\n", channels);
> > @@ -2473,18 +2476,14 @@ static inline void decode_bus_error(int node_id, struct mce *m)
> >   * To find the UMC channel represented by this bank we need to match on its
> >   * instance_id. The instance_id of a bank is held in the lower 32 bits of its
> >   * IPID.
> > + *
> > + * Currently, we can derive the channel number by looking at the 6th byte in
> > + * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
> > + * number.
> >   */
> > -static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
> > +static int find_umc_channel(struct mce *m)
> >  {
> > -	u32 umc_instance_id[] = {0x50f00, 0x150f00};
> > -	u32 instance_id = m->ipid & GENMASK(31, 0);
> > -	int i, channel = -1;
> > -
> > -	for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
> > -		if (umc_instance_id[i] == instance_id)
> > -			channel = i;
> > -
> > -	return channel;
> > +	return (m->ipid & GENMASK(31, 0)) >> 20;
> >  }
> >
> >  static void decode_umc_error(int node_id, struct mce *m)
> > @@ -2506,11 +2505,7 @@ static void decode_umc_error(int node_id, struct mce *m)
> >  	if (m->status & MCI_STATUS_DEFERRED)
> >  		ecc_type = 3;
> >
> > -	err.channel = find_umc_channel(pvt, m);
> > -	if (err.channel < 0) {
> > -		err.err_code = ERR_CHANNEL;
> > -		goto log_error;
> > -	}
> > +	err.channel = find_umc_channel(m);
> >
> >  	if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
> >  		err.err_code = ERR_NORM_ADDR;
> > @@ -2612,7 +2607,7 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
> >  	if (pvt->umc) {
> >  		u8 i;
> >
> > -		for (i = 0; i < NUM_UMCS; i++) {
> > +		for_each_umc(i) {
> >  			/* Check enabled channels only: */
> >  			if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
> >  			    (pvt->umc[i].ecc_ctrl & BIT(7))) {
> > @@ -2648,7 +2643,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
> >  	u32 i, umc_base;
> >
> >  	/* Read registers from each UMC */
> > -	for (i = 0; i < NUM_UMCS; i++) {
> > +	for_each_umc(i) {
> >
> >  		umc_base = get_umc_base(i);
> >  		umc = &pvt->umc[i];
> > @@ -3061,7 +3056,8 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
> >  	if (boot_cpu_data.x86 >= 0x17) {
> >  		u8 umc_en_mask = 0, ecc_en_mask = 0;
> >
> > -		for (i = 0; i < NUM_UMCS; i++) {
> > +		set_num_umcs();
> 
> Do you think this call belongs here conceptually?
> 
> It is called once per each driver instance even though the number is
> static and needs to be computed only once, at driver init.
> 

Yes, you're right. I'll fix that.

> Also, the function name should be something like "compute_num_umcs" or
> so.
> 

Okay.

> > +		for_each_umc(i) {
> >  			u32 base = get_umc_base(i);
> >
> >  			/* Only check enabled UMCs. */
> > @@ -3114,7 +3110,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
> >  {
> >  	u8 i, ecc_en = 1, cpk_en = 1;
> >
> > -	for (i = 0; i < NUM_UMCS; i++) {
> > +	for_each_umc(i) {
> >  		if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
> >  			ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
> >  			cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
> > @@ -3273,7 +3269,7 @@ static int init_one_instance(unsigned int nid)
> >  		goto err_free;
> >
> >  	if (pvt->fam >= 0x17) {
> > -		pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
> > +		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
> >  		if (!pvt->umc) {
> >  			ret = -ENOMEM;
> >  			goto err_free;
> > diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
> > index de8dbb0b42b5..435450bf8684 100644
> > --- a/drivers/edac/amd64_edac.h
> > +++ b/drivers/edac/amd64_edac.h
> > @@ -274,8 +274,6 @@
> >
> >  #define UMC_SDP_INIT			BIT(31)
> >
> > -#define NUM_UMCS			2
> > -
> >  enum amd_families {
> >  	K8_CPUS = 0,
> >  	F10_CPUS,
> > @@ -399,8 +397,22 @@ struct err_info {
> >
> >  static inline u32 get_umc_base(u8 channel)
> >  {
> > -	/* ch0: 0x50000, ch1: 0x150000 */
> > -	return 0x50000 + (!!channel << 20);
> > +	/* chY: 0xY50000 */
> > +	return 0x50000 + (channel << 20);
> > +}
> > +
> > +static u8 num_umcs;
> > +
> 
> You and I know what "UMC" means but I doubt the reader does. Please add
> some blurb here so that it is clear what it is.
> 

Okay.

> > +static inline void set_num_umcs(void)
> > +{
> > +	u8 model = boot_cpu_data.x86_model;
> > +
> > +	if (model >= 0x30 && model <= 0x3f)
> > +		num_umcs = 8;
> > +	else
> > +		num_umcs = 2;
> > +
> > +	edac_dbg(1, "Number of UMCs: %x", num_umcs);
> >  }
> 
> Why is this function inline and in the header? I don't see anything
> special about it and should be in amd64_edac.c instead, AFAICT.
> 

Yes, you're right.

Thanks,
Yazen

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-02-26 15:12 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-19 20:25 [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Ghannam, Yazen
2019-02-19 20:25 ` [PATCH 2/5] EDAC/amd64: Support more than two UMCs Ghannam, Yazen
2019-02-26 11:07   ` Borislav Petkov
2019-02-26 15:12     ` Ghannam, Yazen
2019-02-19 20:25 ` [PATCH 3/5] EDAC/amd64: Recognize x16 Symbol Size Ghannam, Yazen
2019-02-26 13:17   ` Borislav Petkov
2019-02-19 20:26 ` [PATCH 4/5] EDAC/amd64: Support more than two Controllers for Chip Select handling Ghannam, Yazen
2019-02-26 13:24   ` Borislav Petkov
2019-02-19 20:26 ` [PATCH 5/5] EDAC/amd64: Adjust printed Chip Select sizes when interleaved Ghannam, Yazen
2019-02-26 10:41 ` [PATCH 1/5] EDAC/amd64: Add Fam17hMod30h PCI IDs Borislav Petkov
2019-02-26 15:07   ` Ghannam, Yazen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).