Linux-EDAC Archive on lore.kernel.org
 help / color / Atom feed
* [v3] EDAC, mellanox: Add ECC support for BlueField DDR4
@ 2019-03-21 14:31 ` Junhan Zhou
  2019-05-23 17:29   ` [PATCH v3] " James Morse
  0 siblings, 1 reply; 4+ messages in thread
From: Junhan Zhou @ 2019-03-21 14:31 UTC (permalink / raw)
  To: Borislav Petkov, Mauro Carvalho Chehab
  Cc: Junhan Zhou, Liming Sun, linux-edac, linux-kernel

Add ECC support for Mellanox BlueField SoC DDR controller.
This requires SMC to the running Arm Trusted Firmware to report
what is the current memory configuration.

Signed-off-by: Junhan Zhou <Junhan@mellanox.com>
---
 MAINTAINERS                   |   5 +
 drivers/edac/Kconfig          |   7 +
 drivers/edac/Makefile         |   1 +
 drivers/edac/bluefield_edac.c | 396 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 409 insertions(+)
 create mode 100644 drivers/edac/bluefield_edac.c

diff --git a/MAINTAINERS b/MAINTAINERS
index e17ebf7..394d6c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5539,6 +5539,11 @@ S:	Supported
 F:	drivers/edac/aspeed_edac.c
 F:	Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt
 
+EDAC-BLUEFIELD
+M:	Junhan Zhou <Junhan@mellanox.com>
+S:	Supported
+F:	drivers/edac/bluefield_edac.c
+
 EDAC-CALXEDA
 M:	Robert Richter <rric@kernel.org>
 L:	linux-edac@vger.kernel.org
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 47eb4d1..404d853 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -504,4 +504,11 @@ config EDAC_ASPEED
 	  First, ECC must be configured in the bootloader. Then, this driver
 	  will expose error counters via the EDAC kernel framework.
 
+config EDAC_BLUEFIELD
+	tristate "Mellanox BlueField Memory ECC"
+	depends on ARM64 && ((MELLANOX_PLATFORM && ACPI) || COMPILE_TEST)
+	help
+	  Support for error detection and correction on the
+	  Mellanox BlueField SoCs.
+
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 89ad4a84..0294a67 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -84,3 +84,4 @@ obj-$(CONFIG_EDAC_XGENE)		+= xgene_edac.o
 obj-$(CONFIG_EDAC_TI)			+= ti_edac.o
 obj-$(CONFIG_EDAC_QCOM)			+= qcom_edac.o
 obj-$(CONFIG_EDAC_ASPEED)		+= aspeed_edac.o
+obj-$(CONFIG_EDAC_BLUEFIELD)		+= bluefield_edac.o
diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c
new file mode 100644
index 0000000..88f51f7
--- /dev/null
+++ b/drivers/edac/bluefield_edac.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Bluefield-specific EDAC driver.
+ *
+ * Copyright (c) 2019 Mellanox Technologies.
+ */
+
+#include <linux/acpi.h>
+#include <linux/arm-smccc.h>
+#include <linux/edac.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include "edac_module.h"
+
+#define DRIVER_NAME		"bluefield-edac"
+
+/*
+ * Mellanox BlueField EMI (External Memory Interface) register definitions.
+ * Registers which have individual bitfields have a union defining the
+ * bitfields following the address define.
+ */
+
+#define MLXBF_EMI_DRAM_ECC_COUNT 0x340
+
+union mlxbf_emi_dram_ecc_count {
+	struct {
+		u32 single_error_count : 16;
+		u32 double_error_count : 16;
+	};
+
+	u32 word;
+};
+
+#define MLXBF_EMI_DRAM_ECC_ERROR 0x348
+
+union mlxbf_emi_dram_ecc_error {
+	struct {
+		u32 dram_ecc_single : 1;
+		u32 __reserved_0    : 15;
+		u32 dram_ecc_double : 1;
+		u32 __reserved_1    : 15;
+	};
+
+	u32 word;
+};
+
+#define MLXBF_EMI_DRAM_ECC_LATCH_SELECT 0x354
+
+union mlxbf_emi_dram_ecc_latch_select {
+	struct {
+		u32 dram_ecc_first : 1;
+		u32 __reserved_0   : 15;
+		u32 edge_sel       : 4;
+		u32 __reserved_1   : 4;
+		u32 start          : 1;
+		u32 __reserved_2   : 7;
+	};
+
+	u32 word;
+};
+
+#define MLXBF_EMI_DRAM_ERR_ADDR_0 0x358
+
+#define MLXBF_EMI_DRAM_ERR_ADDR_1 0x37c
+
+#define MLXBF_EMI_DRAM_SYNDROM 0x35c
+
+union mlxbf_emi_dram_syndrom {
+	struct {
+		u32 derr         : 1;
+		u32 serr         : 1;
+		u32 __reserved_0 : 14;
+		/* ECC syndrome (error bit according to the Hamming code). */
+		u32 syndrom      : 10;
+		u32 __reserved_1 : 6;
+	};
+
+	u32 word;
+};
+
+#define MLXBF_EMI_DRAM_ADDITIONAL_INFO_0 0x364
+
+union mlxbf_emi_dram_additional_info_0 {
+	struct {
+		u32 err_bank     : 4;
+		u32 err_lrank    : 2;
+		u32 __reserved_0 : 2;
+		u32 err_prank    : 2;
+		u32 __reserved_1 : 6;
+		u32 err_edge     : 8;
+		u32 __reserved_2 : 8;
+	};
+
+	u32 word;
+};
+
+#define MLXBF_EDAC_DIMM_PER_MC		2
+#define MLXBF_EDAC_ERROR_GRAIN		8
+
+/*
+ * Request MLNX_SIP_GET_DIMM_INFO
+ *
+ * Retrieve information about DIMM on a certain slot.
+ *
+ * Call register usage:
+ * a0: MLNX_SIP_GET_DIMM_INFO
+ * a1: (Memory controller index) << 16 | (Dimm index in memory controller)
+ * a2-7: not used.
+ *
+ * Return status:
+ * a0: dimm_info_smc union defined below describing the DIMM.
+ * a1-3: not used.
+ */
+#define MLNX_SIP_GET_DIMM_INFO		0x82000008
+
+/* Format for the SMC response about the memory information */
+union dimm_info_smc {
+	struct {
+		unsigned long size_GB : 16;
+		unsigned long is_rdimm : 1;
+		unsigned long is_lrdimm : 1;
+		unsigned long is_nvdimm : 1;
+		unsigned long __reserved0 : 2;
+		unsigned long ranks : 3;
+		unsigned long package_X : 8;	/* Bits per memory package */
+		unsigned long __reserved1 : 32;
+	};
+	unsigned long val;
+};
+
+struct bluefield_edac_priv {
+	int dimm_ranks[MLXBF_EDAC_DIMM_PER_MC];
+	void __iomem *emi_base;
+};
+
+static unsigned long smc_call1(unsigned long smc_op, unsigned long smc_arg)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res);
+
+	return res.a0;
+}
+
+/*
+ * Gather the ECC information from the External Memory Interface registers
+ * and report it to the edac handler.
+ */
+static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
+					int error_cnt,
+					int is_single_ecc)
+{
+	struct bluefield_edac_priv *priv = mci->pvt_info;
+	union mlxbf_emi_dram_additional_info_0 edai0;
+	union mlxbf_emi_dram_ecc_latch_select edels;
+	union mlxbf_emi_dram_syndrom eds;
+	enum hw_event_mc_err_type ecc_type;
+	unsigned long ecc_dimm_addr;
+	int ecc_dimm;
+	u32 edea0;
+	u32 edea1;
+
+	ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED :
+				   HW_EVENT_ERR_UNCORRECTED;
+
+	/*
+	 * Tell the External Memory Interface to populate the relevant
+	 * registers with information about the last ECC error occurrence.
+	 */
+	edels.word = 0;
+	edels.start = 1;
+	writel(edels.word, priv->emi_base + MLXBF_EMI_DRAM_ECC_LATCH_SELECT);
+
+	/*
+	 * Verify that the ECC reported info in the registers is of the
+	 * same type as the one asked to report. If not, just report the
+	 * error without the detailed information.
+	 */
+	eds.word = readl(priv->emi_base + MLXBF_EMI_DRAM_SYNDROM);
+
+	if ((is_single_ecc && !eds.serr) || (!is_single_ecc && !eds.derr)) {
+		edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0,
+				     0, 0, -1, mci->ctl_name, "");
+		return;
+	}
+
+	edai0.word = readl(priv->emi_base + MLXBF_EMI_DRAM_ADDITIONAL_INFO_0);
+
+	ecc_dimm = edai0.err_prank >= 2 && priv->dimm_ranks[0] <= 2;
+
+	edea0 = readl(priv->emi_base + MLXBF_EMI_DRAM_ERR_ADDR_0);
+	edea1 = readl(priv->emi_base + MLXBF_EMI_DRAM_ERR_ADDR_1);
+
+	ecc_dimm_addr = ((unsigned long)edea1 << 32) | edea0;
+
+	edac_mc_handle_error(ecc_type, mci, error_cnt,
+			     ecc_dimm_addr / 0x1000, ecc_dimm_addr & 0xfff,
+			     eds.syndrom, ecc_dimm, 0, 0, mci->ctl_name, "");
+}
+
+static void bluefield_edac_check(struct mem_ctl_info *mci)
+{
+	union mlxbf_emi_dram_ecc_error edee = { .word = 0 };
+	struct bluefield_edac_priv *priv = mci->pvt_info;
+	union mlxbf_emi_dram_ecc_count edec;
+
+	/*
+	 * The memory controller might not be initialized by the firmware
+	 * when there isn't memory, which may lead to bad register readings.
+	 */
+	if (mci->edac_cap == EDAC_FLAG_NONE)
+		return;
+
+	edec.word = readl(priv->emi_base + MLXBF_EMI_DRAM_ECC_COUNT);
+
+	if (edec.single_error_count) {
+		edee.dram_ecc_single = 1;
+
+		bluefield_gather_report_ecc(mci, edec.single_error_count, 1);
+	}
+
+	if (edec.double_error_count) {
+		edee.dram_ecc_double = 1;
+
+		bluefield_gather_report_ecc(mci, edec.double_error_count, 0);
+	}
+
+	/* Write to clear reported errors. */
+	if (edec.word)
+		writel(edee.word, priv->emi_base + MLXBF_EMI_DRAM_ECC_ERROR);
+}
+
+/* Initialize the DIMMs information for the given memory controller. */
+static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
+{
+	struct bluefield_edac_priv *priv = mci->pvt_info;
+	int mem_ctrl_idx = mci->mc_idx;
+	union dimm_info_smc smc_info;
+	struct dimm_info *dimm;
+	unsigned long smc_arg;
+	int is_empty = 1;
+	int i;
+
+	for (i = 0; i < MLXBF_EDAC_DIMM_PER_MC; i++) {
+		dimm = mci->dimms[i];
+
+		smc_arg = mem_ctrl_idx << 16 | i;
+		smc_info = (union dimm_info_smc) {
+			.val = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg),
+		};
+
+		if (!smc_info.size_GB) {
+			dimm->mtype = MEM_EMPTY;
+			continue;
+		}
+
+		is_empty = 0;
+
+		dimm->edac_mode = EDAC_SECDED;
+
+		if (smc_info.is_nvdimm)
+			dimm->mtype = MEM_NVDIMM;
+		else if (smc_info.is_lrdimm)
+			dimm->mtype = MEM_LRDDR4;
+		else if (smc_info.is_rdimm)
+			dimm->mtype = MEM_RDDR4;
+		else
+			dimm->mtype = MEM_DDR4;
+
+		dimm->nr_pages = smc_info.size_GB * 256 * 1024;
+		dimm->grain = MLXBF_EDAC_ERROR_GRAIN;
+
+		/* Mem controller for BlueField only supports x4, x8 and x16 */
+		if (smc_info.package_X == 4)
+			dimm->dtype = DEV_X4;
+		else if (smc_info.package_X == 8)
+			dimm->dtype = DEV_X8;
+		else if (smc_info.package_X == 16)
+			dimm->dtype = DEV_X16;
+		else
+			dimm->dtype = DEV_UNKNOWN;
+
+		priv->dimm_ranks[i] = smc_info.ranks;
+	}
+
+	if (is_empty)
+		mci->edac_cap = EDAC_FLAG_NONE;
+	else
+		mci->edac_cap = EDAC_FLAG_SECDED;
+}
+
+static int bluefield_edac_mc_probe(struct platform_device *pdev)
+{
+	struct bluefield_edac_priv *priv;
+	struct device *dev = &pdev->dev;
+	struct edac_mc_layer layers[1];
+	struct mem_ctl_info *mci;
+	struct resource *emi_res;
+	unsigned int mc_idx;
+	int rc, ret;
+
+	/* Read the MSS (Memory SubSystem) index from ACPI table. */
+	if (device_property_read_u32(dev, "mss_number", &mc_idx)) {
+		dev_warn(dev, "bf_edac: MSS number unknown\n");
+		return -EINVAL;
+	}
+
+	emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!emi_res)
+		return -EINVAL;
+
+	layers[0].type = EDAC_MC_LAYER_SLOT;
+	layers[0].size = MLXBF_EDAC_DIMM_PER_MC;
+	layers[0].is_virt_csrow = true;
+
+	mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv));
+	if (!mci)
+		return -ENOMEM;
+
+	priv = mci->pvt_info;
+
+	priv->emi_base = devm_ioremap_resource(dev, emi_res);
+	if (IS_ERR(priv->emi_base)) {
+		dev_err(dev, "failed to map EMI IO resource\n");
+		ret = PTR_ERR(priv->emi_base);
+		goto err;
+	}
+
+	mci->pdev = dev;
+	mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 |
+			 MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM;
+	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+
+	mci->mod_name = DRIVER_NAME;
+	mci->ctl_name = "BlueField_Memory_Controller";
+	mci->dev_name = dev_name(dev);
+	mci->edac_check = bluefield_edac_check;
+
+	/* Initialize mci with the actual populated DIMM information. */
+	bluefield_edac_init_dimms(mci);
+
+	platform_set_drvdata(pdev, mci);
+
+	/* Register with EDAC core */
+	rc = edac_mc_add_mc(mci);
+	if (rc) {
+		dev_err(dev, "failed to register with EDAC core\n");
+		ret = rc;
+		goto err;
+	}
+
+	/* Only POLL mode supported so far. */
+	edac_op_state = EDAC_OPSTATE_POLL;
+
+	return 0;
+
+err:
+	edac_mc_free(mci);
+
+	return ret;
+
+}
+
+static int bluefield_edac_mc_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+	edac_mc_del_mc(&pdev->dev);
+	edac_mc_free(mci);
+
+	return 0;
+}
+
+static const struct acpi_device_id bluefield_mc_acpi_ids[] = {
+	{"MLNXBF08", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids);
+
+static struct platform_driver bluefield_edac_mc_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.acpi_match_table = bluefield_mc_acpi_ids,
+	},
+	.probe = bluefield_edac_mc_probe,
+	.remove = bluefield_edac_mc_remove,
+};
+
+module_platform_driver(bluefield_edac_mc_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField memory edac driver");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_LICENSE("GPL v2");

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] EDAC, mellanox: Add ECC support for BlueField DDR4
  2019-03-21 14:31 ` [v3] EDAC, mellanox: Add ECC support for BlueField DDR4 Junhan Zhou
@ 2019-05-23 17:29   ` " James Morse
  2019-05-30 18:48     ` Junhan Zhou
  0 siblings, 1 reply; 4+ messages in thread
From: James Morse @ 2019-05-23 17:29 UTC (permalink / raw)
  To: Junhan Zhou
  Cc: Borislav Petkov, Mauro Carvalho Chehab, Liming Sun, linux-edac,
	linux-kernel

Hi Junhan,

On 21/03/2019 14:31, Junhan Zhou wrote:
> Add ECC support for Mellanox BlueField SoC DDR controller.
> This requires SMC to the running Arm Trusted Firmware to report
> what is the current memory configuration.

Sorry for the delay on this, it slipped through the cracks. (Please don't reply with new
patches to the discussion of an old patch/series, this makes it look like ongoing
discussion on a v1, and v2 never arrives!)


> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 47eb4d1..404d853 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -504,4 +504,11 @@ config EDAC_ASPEED
>  	  First, ECC must be configured in the bootloader. Then, this driver
>  	  will expose error counters via the EDAC kernel framework.
>  
> +config EDAC_BLUEFIELD
> +	tristate "Mellanox BlueField Memory ECC"
> +	depends on ARM64 && ((MELLANOX_PLATFORM && ACPI) || COMPILE_TEST)

What is the MELLANOX_PLATFORM needed for? Is it just to turn off a set of drivers in one
go? I can't see what other infrastructure you depend on.


> +	help
> +	  Support for error detection and correction on the
> +	  Mellanox BlueField SoCs.
> +
>  endif # EDAC


> diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c
> new file mode 100644
> index 0000000..88f51f7
> --- /dev/null
> +++ b/drivers/edac/bluefield_edac.c
> @@ -0,0 +1,396 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Bluefield-specific EDAC driver.
> + *
> + * Copyright (c) 2019 Mellanox Technologies.
> + */
> +
> +#include <linux/acpi.h>
> +#include <linux/arm-smccc.h>
> +#include <linux/edac.h>
> +#include <linux/io.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +
> +#include "edac_module.h"
> +
> +#define DRIVER_NAME		"bluefield-edac"
> +
> +/*
> + * Mellanox BlueField EMI (External Memory Interface) register definitions.
> + * Registers which have individual bitfields have a union defining the
> + * bitfields following the address define.
> + */
> +
> +#define MLXBF_EMI_DRAM_ECC_COUNT 0x340
> +
> +union mlxbf_emi_dram_ecc_count {
> +	struct {
> +		u32 single_error_count : 16;
> +		u32 double_error_count : 16;
> +	};
> +
> +	u32 word;
> +};
> +
> +#define MLXBF_EMI_DRAM_ECC_ERROR 0x348
> +
> +union mlxbf_emi_dram_ecc_error {
> +	struct {
> +		u32 dram_ecc_single : 1;
> +		u32 __reserved_0    : 15;
> +		u32 dram_ecc_double : 1;
> +		u32 __reserved_1    : 15;
> +	};
> +
> +	u32 word;
> +};
> +
> +#define MLXBF_EMI_DRAM_ECC_LATCH_SELECT 0x354
> +
> +union mlxbf_emi_dram_ecc_latch_select {
> +	struct {
> +		u32 dram_ecc_first : 1;
> +		u32 __reserved_0   : 15;
> +		u32 edge_sel       : 4;
> +		u32 __reserved_1   : 4;
> +		u32 start          : 1;
> +		u32 __reserved_2   : 7;
> +	};
> +
> +	u32 word;
> +};
> +
> +#define MLXBF_EMI_DRAM_ERR_ADDR_0 0x358
> +
> +#define MLXBF_EMI_DRAM_ERR_ADDR_1 0x37c
> +
> +#define MLXBF_EMI_DRAM_SYNDROM 0x35c
> +
> +union mlxbf_emi_dram_syndrom {
> +	struct {
> +		u32 derr         : 1;
> +		u32 serr         : 1;
> +		u32 __reserved_0 : 14;
> +		/* ECC syndrome (error bit according to the Hamming code). */
> +		u32 syndrom      : 10;
> +		u32 __reserved_1 : 6;
> +	};
> +
> +	u32 word;
> +};
> +
> +#define MLXBF_EMI_DRAM_ADDITIONAL_INFO_0 0x364
> +
> +union mlxbf_emi_dram_additional_info_0 {
> +	struct {
> +		u32 err_bank     : 4;
> +		u32 err_lrank    : 2;
> +		u32 __reserved_0 : 2;
> +		u32 err_prank    : 2;
> +		u32 __reserved_1 : 6;
> +		u32 err_edge     : 8;
> +		u32 __reserved_2 : 8;
> +	};
> +
> +	u32 word;
> +};

... you're expecting the compiler to pack this bitfield in exactly the same way your
hardware did. I don't think that's guaranteed.
It evidently works for your current compiler, but another compiler may pack this structure
differently. Toggling endianness will break this, (arm64 supports both). If your platform
supports aarch32, someone may want to get 32bit arm running, which may have different
compiler behaviour.

You are also using bitfields between hardware and firmware, so its currently possible the
firmware requires the kernel to be built with a compiler that means it can't interact with
the hardware...

When this has come up in the past, the advice was not to use bitfields:
https://lore.kernel.org/lkml/1077080607.1078.109.camel@gaston/

Please use shifts and masks.


> +#define MLXBF_EDAC_DIMM_PER_MC		2
> +#define MLXBF_EDAC_ERROR_GRAIN		8

If these numbers changed, would it still be a BlueField SoC?
(if next years made-up:BlueField2 supports more Dimms per MC, it would be better to make
this a property in the firmware table).


> +/*
> + * Request MLNX_SIP_GET_DIMM_INFO
> + *
> + * Retrieve information about DIMM on a certain slot.
> + *
> + * Call register usage:
> + * a0: MLNX_SIP_GET_DIMM_INFO
> + * a1: (Memory controller index) << 16 | (Dimm index in memory controller)
> + * a2-7: not used.
> + *
> + * Return status:
> + * a0: dimm_info_smc union defined below describing the DIMM.
> + * a1-3: not used.
> + */

Have Mellanox published these call numbers/arguments in a document somewhere? If they
differ with the firmware, it would be good to know which side needs fixing.

It is a little odd that you read the number of memory controllers from the ACPI table, but
use an SMC call to read the DIMM information.
Is it too-late to describe the DIMMs in the ACPI table too? (this would let firmware
hard-code it on platforms where it could never change, instead of having to support a
runtime call)

The DIMM information should also be in the SMBIOS table. See ghes_edac.c for some code
that uses this. SMBIOS isn't popular in the arm world: but edac already uses this, and we
want to match DIMM numbers with the text on the board's silk-screen so the user can
replace the correct DIMM.


> +#define MLNX_SIP_GET_DIMM_INFO		0x82000008
> +
> +/* Format for the SMC response about the memory information */
> +union dimm_info_smc {
> +	struct {
> +		unsigned long size_GB : 16;
> +		unsigned long is_rdimm : 1;
> +		unsigned long is_lrdimm : 1;
> +		unsigned long is_nvdimm : 1;
> +		unsigned long __reserved0 : 2;
> +		unsigned long ranks : 3;
> +		unsigned long package_X : 8;	/* Bits per memory package */
> +		unsigned long __reserved1 : 32;
> +	};
> +	unsigned long val;
> +};

If your firmware and the kernel were built with different compilers, this isn't guaranteed
to work. Please use shifts and masks.


> +struct bluefield_edac_priv {
> +	int dimm_ranks[MLXBF_EDAC_DIMM_PER_MC];
> +	void __iomem *emi_base;
> +};
> +
> +static unsigned long smc_call1(unsigned long smc_op, unsigned long smc_arg)
> +{
> +	struct arm_smccc_res res;
> +
> +	arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res);
> +
> +	return res.a0;
> +}
> +
> +/*
> + * Gather the ECC information from the External Memory Interface registers
> + * and report it to the edac handler.
> + */
> +static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
> +					int error_cnt,
> +					int is_single_ecc)
> +{
> +	struct bluefield_edac_priv *priv = mci->pvt_info;
> +	union mlxbf_emi_dram_additional_info_0 edai0;
> +	union mlxbf_emi_dram_ecc_latch_select edels;
> +	union mlxbf_emi_dram_syndrom eds;
> +	enum hw_event_mc_err_type ecc_type;
> +	unsigned long ecc_dimm_addr;
> +	int ecc_dimm;
> +	u32 edea0;
> +	u32 edea1;
> +
> +	ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED :
> +				   HW_EVENT_ERR_UNCORRECTED;
> +
> +	/*
> +	 * Tell the External Memory Interface to populate the relevant
> +	 * registers with information about the last ECC error occurrence.
> +	 */
> +	edels.word = 0;
> +	edels.start = 1;


> +	writel(edels.word, priv->emi_base + MLXBF_EMI_DRAM_ECC_LATCH_SELECT);
> +
> +	/*
> +	 * Verify that the ECC reported info in the registers is of the
> +	 * same type as the one asked to report. If not, just report the
> +	 * error without the detailed information.
> +	 */
> +	eds.word = readl(priv->emi_base + MLXBF_EMI_DRAM_SYNDROM);

Does the device need to have seen the write to MLXBF_EMI_DRAM_ECC_LATCH_SELECT before it
sees this read?

Will Deacon gave a presentation on this stuff at ELCE:
https://elinux.org/images/a/a8/Uh-oh-Its-IO-Ordering-Will-Deacon-Arm.pdf

(I don't understand this stuff, so may have it totally wrong here:)

From the arch code's definitions of these:
| #define writel(v,c)	({ __iowmb(); writel_relaxed((v),(c)); })
| #define readl(c)	({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })

This means you've got back-to-back writel_relaxed()/readl_relaxed() here, and probably
need an mb() between them.

(slides 17 and 19 of that pdf are handy).

As an example,
drivers/edac/cell.c::cell_edac_check() has this out_be64(); mb(); in_be64() sequence,
which I think is the same.


> +	if ((is_single_ecc && !eds.serr) || (!is_single_ecc && !eds.derr)) {
> +		edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0,
> +				     0, 0, -1, mci->ctl_name, "");
> +		return;
> +	}
> +
> +	edai0.word = readl(priv->emi_base + MLXBF_EMI_DRAM_ADDITIONAL_INFO_0);
> +
> +	ecc_dimm = edai0.err_prank >= 2 && priv->dimm_ranks[0] <= 2;
> +
> +	edea0 = readl(priv->emi_base + MLXBF_EMI_DRAM_ERR_ADDR_0);
> +	edea1 = readl(priv->emi_base + MLXBF_EMI_DRAM_ERR_ADDR_1);
> +
> +	ecc_dimm_addr = ((unsigned long)edea1 << 32) | edea0;
> +
> +	edac_mc_handle_error(ecc_type, mci, error_cnt,

> +			     ecc_dimm_addr / 0x1000,

Please use PFN_DOWN() to take account of the (configurable!) kernel PAGE_SIZE.


> 			     ecc_dimm_addr & 0xfff,
Please use something like PAGE_MASK, to take account of the PAGE_SIZE.


> +			     eds.syndrom, ecc_dimm, 0, 0, mci->ctl_name, "");
> +}
> +
> +static void bluefield_edac_check(struct mem_ctl_info *mci)
> +{
> +	union mlxbf_emi_dram_ecc_error edee = { .word = 0 };
> +	struct bluefield_edac_priv *priv = mci->pvt_info;
> +	union mlxbf_emi_dram_ecc_count edec;
> +
> +	/*
> +	 * The memory controller might not be initialized by the firmware
> +	 * when there isn't memory, which may lead to bad register readings.
> +	 */
> +	if (mci->edac_cap == EDAC_FLAG_NONE)
> +		return;
> +
> +	edec.word = readl(priv->emi_base + MLXBF_EMI_DRAM_ECC_COUNT);
> +
> +	if (edec.single_error_count) {
> +		edee.dram_ecc_single = 1;
> +
> +		bluefield_gather_report_ecc(mci, edec.single_error_count, 1);
> +	}
> +
> +	if (edec.double_error_count) {
> +		edee.dram_ecc_double = 1;
> +
> +		bluefield_gather_report_ecc(mci, edec.double_error_count, 0);
> +	}
> +
> +	/* Write to clear reported errors. */
> +	if (edec.word)
> +		writel(edee.word, priv->emi_base + MLXBF_EMI_DRAM_ECC_ERROR);
> +}
> +
> +/* Initialize the DIMMs information for the given memory controller. */
> +static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
> +{
> +	struct bluefield_edac_priv *priv = mci->pvt_info;
> +	int mem_ctrl_idx = mci->mc_idx;
> +	union dimm_info_smc smc_info;
> +	struct dimm_info *dimm;
> +	unsigned long smc_arg;
> +	int is_empty = 1;
> +	int i;
> +
> +	for (i = 0; i < MLXBF_EDAC_DIMM_PER_MC; i++) {
> +		dimm = mci->dimms[i];
> +
> +		smc_arg = mem_ctrl_idx << 16 | i;
> +		smc_info = (union dimm_info_smc) {
> +			.val = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg),
> +		};
> +
> +		if (!smc_info.size_GB) {
> +			dimm->mtype = MEM_EMPTY;
> +			continue;
> +		}
> +
> +		is_empty = 0;
> +
> +		dimm->edac_mode = EDAC_SECDED;
> +
> +		if (smc_info.is_nvdimm)
> +			dimm->mtype = MEM_NVDIMM;
> +		else if (smc_info.is_lrdimm)
> +			dimm->mtype = MEM_LRDDR4;
> +		else if (smc_info.is_rdimm)
> +			dimm->mtype = MEM_RDDR4;
> +		else
> +			dimm->mtype = MEM_DDR4;
> +
> +		dimm->nr_pages = smc_info.size_GB * 256 * 1024;

How come PAGE_SIZE doesn't appear here?
You may want to use SZ_1G and friends as part of the calculation to make it more readable.


> +		dimm->grain = MLXBF_EDAC_ERROR_GRAIN;
> +
> +		/* Mem controller for BlueField only supports x4, x8 and x16 */
> +		if (smc_info.package_X == 4)
> +			dimm->dtype = DEV_X4;
> +		else if (smc_info.package_X == 8)
> +			dimm->dtype = DEV_X8;
> +		else if (smc_info.package_X == 16)
> +			dimm->dtype = DEV_X16;
> +		else
> +			dimm->dtype = DEV_UNKNOWN;
> +
> +		priv->dimm_ranks[i] = smc_info.ranks;
> +	}
> +
> +	if (is_empty)
> +		mci->edac_cap = EDAC_FLAG_NONE;
> +	else
> +		mci->edac_cap = EDAC_FLAG_SECDED;
> +}


Thanks,

James

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH v3] EDAC, mellanox: Add ECC support for BlueField DDR4
  2019-05-23 17:29   ` [PATCH v3] " James Morse
@ 2019-05-30 18:48     ` Junhan Zhou
  2019-06-04 17:13       ` James Morse
  0 siblings, 1 reply; 4+ messages in thread
From: Junhan Zhou @ 2019-05-30 18:48 UTC (permalink / raw)
  To: James Morse
  Cc: Borislav Petkov, Mauro Carvalho Chehab, Liming Sun, linux-edac,
	linux-kernel, Shravan Ramani

> -----Original Message-----
> From: James Morse <james.morse@arm.com>
> Sent: Thursday, May 23, 2019 1:30 PM
> To: Junhan Zhou <Junhan@mellanox.com>
> Cc: Borislav Petkov <bp@alien8.de>; Mauro Carvalho Chehab
> <mchehab@kernel.org>; Liming Sun <lsun@mellanox.com>; linux-
> edac@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v3] EDAC, mellanox: Add ECC support for BlueField DDR4
> 
> Hi Junhan,
> 
> On 21/03/2019 14:31, Junhan Zhou wrote:
> > Add ECC support for Mellanox BlueField SoC DDR controller.
> > This requires SMC to the running Arm Trusted Firmware to report what
> > is the current memory configuration.
> 
> Sorry for the delay on this, it slipped through the cracks. (Please don't reply
> with new patches to the discussion of an old patch/series, this makes it look
> like ongoing discussion on a v1, and v2 never arrives!)
> 
Sorry about this. But at least you caught me before leaving. Shravan (cc'ed) would be pushing the V4 and beyond versions of this patch.

> > +config EDAC_BLUEFIELD
> > +	tristate "Mellanox BlueField Memory ECC"
> > +	depends on ARM64 && ((MELLANOX_PLATFORM && ACPI) ||
> COMPILE_TEST)
> 
> What is the MELLANOX_PLATFORM needed for? Is it just to turn off a set of
> drivers in one go? I can't see what other infrastructure you depend on.
> 
Correct. Our memory controller is only present on the BlueField platform, so it doesn't make sense to be built on other platforms. There were discussions with other maintainers about what config option is used when upstreaming other drivers for BlueField, e.g. having a BLUEFIELD_SOC config. But in the end they settled down on simply using this flag.

> > +union mlxbf_emi_dram_additional_info_0 {
> > +	struct {
> > +		u32 err_bank     : 4;
> > +		u32 err_lrank    : 2;
> > +		u32 __reserved_0 : 2;
> > +		u32 err_prank    : 2;
> > +		u32 __reserved_1 : 6;
> > +		u32 err_edge     : 8;
> > +		u32 __reserved_2 : 8;
> > +	};
> > +
> > +	u32 word;
> > +};
> 
> ... you're expecting the compiler to pack this bitfield in exactly the same way
> your hardware did. I don't think that's guaranteed.
> It evidently works for your current compiler, but another compiler may pack
> this structure differently. Toggling endianness will break this, (arm64
> supports both). If your platform supports aarch32, someone may want to get
> 32bit arm running, which may have different compiler behaviour.
> 
> You are also using bitfields between hardware and firmware, so its currently
> possible the firmware requires the kernel to be built with a compiler that
> means it can't interact with the hardware...
> 
> When this has come up in the past, the advice was not to use bitfields:
> https://lore.kernel.org/lkml/1077080607.1078.109.camel@gaston/
> 
> Please use shifts and masks.
> 
Okay if you insist. I do see this kind of style used elsewhere, e.g. in pnd2_edac.h. And this driver is only used on the BlueField SoC which our bootloader would configure it to be only running at Aarch64 little endian.

> 
> > +#define MLXBF_EDAC_DIMM_PER_MC		2
> > +#define MLXBF_EDAC_ERROR_GRAIN		8
> 
> If these numbers changed, would it still be a BlueField SoC?
> (if next years made-up:BlueField2 supports more Dimms per MC, it would be
> better to make this a property in the firmware table).
> 
No it won't happen. The memory controller we are using doesn't have the strength to drive more than 2 DIMMs. If this were ever fixed it in the future, I would anticipate they design a completely new memory controller for which this driver code won't be compatible with.
BTW, how did you ever guess that the next gen chip would be called BlueField2? Did you search for "BlueField" in the registered PCI IDs and accidentally found it? (https://fossies.org/linux/systemd/hwdb/pci.ids)
> 
> > +/*
> > + * Request MLNX_SIP_GET_DIMM_INFO
> > + *
> > + * Retrieve information about DIMM on a certain slot.
> > + *
> > + * Call register usage:
> > + * a0: MLNX_SIP_GET_DIMM_INFO
> > + * a1: (Memory controller index) << 16 | (Dimm index in memory
> > +controller)
> > + * a2-7: not used.
> > + *
> > + * Return status:
> > + * a0: dimm_info_smc union defined below describing the DIMM.
> > + * a1-3: not used.
> > + */
> 
> Have Mellanox published these call numbers/arguments in a document
> somewhere? If they differ with the firmware, it would be good to know
> which side needs fixing.
> 
> It is a little odd that you read the number of memory controllers from the
> ACPI table, but use an SMC call to read the DIMM information.
> Is it too-late to describe the DIMMs in the ACPI table too? (this would let
> firmware hard-code it on platforms where it could never change, instead of
> having to support a runtime call)
> 
> The DIMM information should also be in the SMBIOS table. See ghes_edac.c
> for some code that uses this. SMBIOS isn't popular in the arm world: but
> edac already uses this, and we want to match DIMM numbers with the text
> on the board's silk-screen so the user can replace the correct DIMM.
> 

No we haven't. But, we publish the source code to our firmware so anybody can edit and boot their own version.
We are using ATF (Arm Trusted Firmware) as the primary firmware (BL1, BL2, BL31) which does the memory training, and EDK2 for the UEFI implementation. If you extract our release tarball (scroll down at http://www.mellanox.com/page/products_dyn?product_family=279&mtag=bluefield_software and download BlueField-2.0.1.10841.tar.xz), you would find the files atf-d48f19.patch and edk2-465be7.patch, which once executed would download the upstream code and then apply our changes on top of it. (Yes, we are also in the process of pushing those code to their respective upstream repositories).
So the issue here is that ATF does the memory training, but UEFI is in charge of publishing the ACPI tables (which is where the SMBIOS table would be located correct?). And the BlueField SoC are used both on boards with soldered DRAMs chips (which doesn’t have SPDs) and sockets to insert DIMMs on. So it would be a real mess for UEFI to figure out what is the DIMM configuration, and the most obvious way it to communicate through SMCs. So why go through this extra complicity when Linux can directly do the SMC? Or are you concerned about making this a standard so Windows can also figure out the DIMM configuration?
The number of memory controller as well as which physical address correspond to which memory controller is a fixed fact for the BlueField SoC, so this can be added to the ACPI table very easily without needing to dynamically patching it. Or do you rather this also be an SMC? This number can vary between the current BlueField and "BlueField2".


> > +/* Format for the SMC response about the memory information */ union
> > +dimm_info_smc {
> > +	struct {
> > +		unsigned long size_GB : 16;
> > +		unsigned long is_rdimm : 1;
> > +		unsigned long is_lrdimm : 1;
> > +		unsigned long is_nvdimm : 1;
> > +		unsigned long __reserved0 : 2;
> > +		unsigned long ranks : 3;
> > +		unsigned long package_X : 8;	/* Bits per memory package
> */
> > +		unsigned long __reserved1 : 32;
> > +	};
> > +	unsigned long val;
> > +};
> 
> If your firmware and the kernel were built with different compilers, this isn't
> guaranteed to work. Please use shifts and masks.
> 

Okay if you insist. I would think if one would tinker with the kernel driver code for BlueField, one might as well also build the firmware code which source we already provided.

> > +	writel(edels.word, priv->emi_base +
> > +MLXBF_EMI_DRAM_ECC_LATCH_SELECT);
> > +
> > +	/*
> > +	 * Verify that the ECC reported info in the registers is of the
> > +	 * same type as the one asked to report. If not, just report the
> > +	 * error without the detailed information.
> > +	 */
> > +	eds.word = readl(priv->emi_base + MLXBF_EMI_DRAM_SYNDROM);
> 
> Does the device need to have seen the write to
> MLXBF_EMI_DRAM_ECC_LATCH_SELECT before it sees this read?
> 
> Will Deacon gave a presentation on this stuff at ELCE:
> https://elinux.org/images/a/a8/Uh-oh-Its-IO-Ordering-Will-Deacon-Arm.pdf
> 
> (I don't understand this stuff, so may have it totally wrong here:)
> 
> From the arch code's definitions of these:
> | #define writel(v,c)	({ __iowmb(); writel_relaxed((v),(c)); })
> | #define readl(c)	({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
> 
> This means you've got back-to-back writel_relaxed()/readl_relaxed() here,
> and probably need an mb() between them.
> 
> (slides 17 and 19 of that pdf are handy).
> 
> As an example,
> drivers/edac/cell.c::cell_edac_check() has this out_be64(); mb(); in_be64()
> sequence, which I think is the same.
> 
> 
This is of no concern for the BlueField SoC. For accessing device memory, the hardware enforces a strict memory ordering. The next register access won't be issued before the previous instruction have been retired. So there won't be a case on the BlueField SoC (which is the only platform this driver is valid for) where the latter read happens before the previous write.

> > +
> > +	edac_mc_handle_error(ecc_type, mci, error_cnt,
> 
> > +			     ecc_dimm_addr / 0x1000,
> 
> Please use PFN_DOWN() to take account of the (configurable!) kernel
> PAGE_SIZE.
> 
> 
Ok

> > 			     ecc_dimm_addr & 0xfff,
> Please use something like PAGE_MASK, to take account of the PAGE_SIZE.
> 
> 
Ok

> > +			     eds.syndrom, ecc_dimm, 0, 0, mci->ctl_name, "");
> }
> > +		dimm->nr_pages = smc_info.size_GB * 256 * 1024;
> 
> How come PAGE_SIZE doesn't appear here?
> You may want to use SZ_1G and friends as part of the calculation to make it
> more readable.
> 
> 
Ok
> 
> 
> Thanks,
> 
> James



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] EDAC, mellanox: Add ECC support for BlueField DDR4
  2019-05-30 18:48     ` Junhan Zhou
@ 2019-06-04 17:13       ` James Morse
  0 siblings, 0 replies; 4+ messages in thread
From: James Morse @ 2019-06-04 17:13 UTC (permalink / raw)
  To: Junhan Zhou
  Cc: Borislav Petkov, Mauro Carvalho Chehab, Liming Sun, linux-edac,
	linux-kernel, Shravan Ramani

Hi Junhan,

On 30/05/2019 19:48, Junhan Zhou wrote:
>> -----Original Message-----
>> From: James Morse <james.morse@arm.com>
>> Sent: Thursday, May 23, 2019 1:30 PM
>> To: Junhan Zhou <Junhan@mellanox.com>

>>> +union mlxbf_emi_dram_additional_info_0 {
>>> +	struct {
>>> +		u32 err_bank     : 4;
>>> +		u32 err_lrank    : 2;
>>> +		u32 __reserved_0 : 2;
>>> +		u32 err_prank    : 2;
>>> +		u32 __reserved_1 : 6;
>>> +		u32 err_edge     : 8;
>>> +		u32 __reserved_2 : 8;
>>> +	};
>>> +
>>> +	u32 word;
>>> +};
>>
>> ... you're expecting the compiler to pack this bitfield in exactly the same way
>> your hardware did. I don't think that's guaranteed.
>> It evidently works for your current compiler, but another compiler may pack
>> this structure differently. Toggling endianness will break this, (arm64
>> supports both). If your platform supports aarch32, someone may want to get
>> 32bit arm running, which may have different compiler behaviour.
>>
>> You are also using bitfields between hardware and firmware, so its currently
>> possible the firmware requires the kernel to be built with a compiler that
>> means it can't interact with the hardware...
>>
>> When this has come up in the past, the advice was not to use bitfields:
>> https://lore.kernel.org/lkml/1077080607.1078.109.camel@gaston/
>>
>> Please use shifts and masks.
>>
> Okay if you insist. I do see this kind of style used elsewhere, e.g. in pnd2_edac.h.

Yup, and that is fragile. It can only work if the toolchain for firmware, the kernel and
hardware have only one configuration between them. I don't think we have enough control of
the platform to assert this.


> And this driver is only used on the BlueField SoC which our bootloader would
> configure it to be only running at Aarch64 little endian.

(endian-ness is something the kernel configures during boot. I can happily kexec a
big-endian kernel on my Juno, even though the bootloader chokes and splutters when its
asked to do this.)

This stuff is 'implementation defined' for the compiler, we can't complain to the compiler
people if it doesn't do what we wanted.


>>> +#define MLXBF_EDAC_DIMM_PER_MC		2
>>> +#define MLXBF_EDAC_ERROR_GRAIN		8
>>
>> If these numbers changed, would it still be a BlueField SoC?
>> (if next years made-up:BlueField2 supports more Dimms per MC, it would be

> BTW, how did you ever guess that the next gen chip would be called BlueField2?
> Did you search for "BlueField" in the registered PCI IDs and accidentally
> found it? (https://fossies.org/linux/systemd/hwdb/pci.ids)

Ha! The 'made-up:' was to indicate I really know nothing about this. I should have gone
for Bluefield++


>>> +/*
>>> + * Request MLNX_SIP_GET_DIMM_INFO
>>> + *
>>> + * Retrieve information about DIMM on a certain slot.
>>> + *
>>> + * Call register usage:
>>> + * a0: MLNX_SIP_GET_DIMM_INFO
>>> + * a1: (Memory controller index) << 16 | (Dimm index in memory
>>> +controller)
>>> + * a2-7: not used.
>>> + *
>>> + * Return status:
>>> + * a0: dimm_info_smc union defined below describing the DIMM.
>>> + * a1-3: not used.
>>> + */
>>
>> Have Mellanox published these call numbers/arguments in a document
>> somewhere? If they differ with the firmware, it would be good to know
>> which side needs fixing.
>>
>> It is a little odd that you read the number of memory controllers from the
>> ACPI table, but use an SMC call to read the DIMM information.
>> Is it too-late to describe the DIMMs in the ACPI table too? (this would let
>> firmware hard-code it on platforms where it could never change, instead of
>> having to support a runtime call)
>>
>> The DIMM information should also be in the SMBIOS table. See ghes_edac.c
>> for some code that uses this. SMBIOS isn't popular in the arm world: but
>> edac already uses this, and we want to match DIMM numbers with the text
>> on the board's silk-screen so the user can replace the correct DIMM.
>>
> 
> No we haven't. But, we publish the source code to our firmware so anybody can edit
> and boot their own version.

Cool!
(With a different compiler that does the bitfields differently? *cough* *cough*)

[..]

> So the issue here is that ATF does the memory training, but UEFI is in charge
> of publishing the ACPI tables (which is where the SMBIOS table would be located
> correct?).

Yes, edk2-platforms has a "Platform/ARM/JunoPkg/SmbiosPlatformDxe/SmbiosPlatformDxe.c"
which I assume is relevant to this.


> And the BlueField SoC are used both on boards with soldered DRAMs chips (which doesn’t
> have SPDs) and sockets to insert DIMMs on.

Do you ship the same firmware image for both variants? I bet this isn't the only
difference. The SMC is overhead on the soldered-DRAM boards as its returning a constant
you could have baked into UEFI.

This is the sort of thing that platform firmware can just know.


> So it would be a real mess for UEFI to figure
> out what is the DIMM configuration, and the most obvious way it to communicate through SMCs.

If it really is variable yes, but in your soldered-DRAM chips case, surely its all numbers
you could bake into the tables.

If you add the DIMMs to the SMBIOS tables then this information is useful to other tools
in user-space too. The whole edac-facade is only useful if someone can put their paw in
the enclosure to replace the faulty dimm. Ultimately we need to correlate the errors with
something replaceable, and what we really want to know is what is the label on the
silk-screen next to the faulty dimm. This is what the SMBIOS tables are for.

Once the information is in the smbios tables, we don't want to duplicate it.
Only firmware can generate the smbios tables.


> So why go through this extra complicity when Linux can directly do the SMC?

From my view? Because these 'firmware tables' are where we expect to find all the
information from firmware. If the number of DIMMs changed while the system was running,
then an SMC would be the only choice. Having a mix of firmware-tables and runtime calls is
a bit odd.


> Or are you
> concerned about making this a standard so Windows can also figure out the DIMM configuration?

No, I'm just curious that you have some information in static firmware tables, and other
static information via a firmware runtime call.


> The number of memory controller as well as which physical address correspond to which
> memory controller is a fixed fact for the BlueField SoC, so this can be added to the
> ACPI table very easily without needing to dynamically patching it. Or do you rather
> this also be an SMC? This number can vary between the current BlueField and "BlueField2".

Number of memory-controllers? This was just to check this hard-coded number really is
fixed. Its almost always better to shove things in the firmware table, even if you don't
think they're going to change. This way you don't need to change the driver when the
hardware people 'make it better'.

As it sounds like BlueField2 is going to be a thing, and it doesn't have the same memory
controller, this isn't a problem.


>>> +/* Format for the SMC response about the memory information */ union
>>> +dimm_info_smc {
>>> +	struct {
>>> +		unsigned long size_GB : 16;
>>> +		unsigned long is_rdimm : 1;
>>> +		unsigned long is_lrdimm : 1;
>>> +		unsigned long is_nvdimm : 1;
>>> +		unsigned long __reserved0 : 2;
>>> +		unsigned long ranks : 3;
>>> +		unsigned long package_X : 8;	/* Bits per memory package
>> */
>>> +		unsigned long __reserved1 : 32;
>>> +	};
>>> +	unsigned long val;
>>> +};
>>
>> If your firmware and the kernel were built with different compilers, this isn't
>> guaranteed to work. Please use shifts and masks.

> Okay if you insist. I would think if one would tinker with the kernel driver
> code for BlueField, one might as well also build the firmware code which
> source we already provided.

Its just a case of avoiding dependencies across exception boundaries, and with other projects.


>>> +	writel(edels.word, priv->emi_base +
>>> +MLXBF_EMI_DRAM_ECC_LATCH_SELECT);
>>> +
>>> +	/*
>>> +	 * Verify that the ECC reported info in the registers is of the
>>> +	 * same type as the one asked to report. If not, just report the
>>> +	 * error without the detailed information.
>>> +	 */
>>> +	eds.word = readl(priv->emi_base + MLXBF_EMI_DRAM_SYNDROM);
>>
>> Does the device need to have seen the write to
>> MLXBF_EMI_DRAM_ECC_LATCH_SELECT before it sees this read?
>>
>> Will Deacon gave a presentation on this stuff at ELCE:
>> https://elinux.org/images/a/a8/Uh-oh-Its-IO-Ordering-Will-Deacon-Arm.pdf
>>
>> (I don't understand this stuff, so may have it totally wrong here:)
>>
>> From the arch code's definitions of these:
>> | #define writel(v,c)	({ __iowmb(); writel_relaxed((v),(c)); })
>> | #define readl(c)	({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
>>
>> This means you've got back-to-back writel_relaxed()/readl_relaxed() here,
>> and probably need an mb() between them.
>>
>> (slides 17 and 19 of that pdf are handy).
>>
>> As an example,
>> drivers/edac/cell.c::cell_edac_check() has this out_be64(); mb(); in_be64()
>> sequence, which I think is the same.

> This is of no concern for the BlueField SoC. For accessing device memory, the hardware
> enforces a strict memory ordering. The next register access won't be issued before the
> previous instruction have been retired. So there won't be a case on the BlueField SoC
> (which is the only platform this driver is valid for) where the latter read happens
> before the previous write.

Did I mention I may have it totally wrong? I've found some more on this to read...

This was tripped by your comment:
> Verify that the ECC reported info in the registers is of the same type as the one asked
> to report.

Was this determined experimentally, or is it expected/documented behaviour? If it seems
like that write doesn't always complete we should get to the bottom of this.


Thanks,

James

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <e59276b6f3dc165703ddcb47a8a006d8a62d9c95.1551216637.git.Junhan@mellanox.com>
2019-03-21 14:31 ` [v3] EDAC, mellanox: Add ECC support for BlueField DDR4 Junhan Zhou
2019-05-23 17:29   ` [PATCH v3] " James Morse
2019-05-30 18:48     ` Junhan Zhou
2019-06-04 17:13       ` James Morse

Linux-EDAC Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-edac/0 linux-edac/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-edac linux-edac/ https://lore.kernel.org/linux-edac \
		linux-edac@vger.kernel.org linux-edac@archiver.kernel.org
	public-inbox-index linux-edac


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-edac


AGPL code for this site: git clone https://public-inbox.org/ public-inbox