All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/4] EDAC support for Marvell Armada SoCs
@ 2017-06-08  4:11 Chris Packham
  2017-06-08  4:11   ` [RFC,1/4] " Chris Packham
                   ` (3 more replies)
  0 siblings, 4 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: linux-arm-kernel

This series adds a basic EDAC driver for the memory controller and L2 cache
on the Marvell Armada SoCs. My test platform uses armada-xp-98dx3236 so I've
included some patches for that also.

I'm still chasing Marvell for some more info on how to inject errors, hence
the RFC.

Chris Packham (4):
  EDAC: mvebu: Add driver for Marvell Armada SoCs
  ARM: l2x0: support parity-enable/disable on aurora
  ARM: l2x0: add arm,ecc-enable property for aurora
  ARM: dts: enable l2c parity and ecc protection on 98dx3236

 Documentation/devicetree/bindings/arm/l2c2x0.txt |   2 +
 arch/arm/boot/dts/armada-xp-98dx3236.dtsi        |   2 +
 arch/arm/mm/cache-l2x0.c                         |  14 +
 drivers/edac/Kconfig                             |   7 +
 drivers/edac/Makefile                            |   1 +
 drivers/edac/mvebu_edac.c                        | 506 +++++++++++++++++++++++
 drivers/edac/mvebu_edac.h                        |  66 +++
 7 files changed, 598 insertions(+)
 create mode 100644 drivers/edac/mvebu_edac.c
 create mode 100644 drivers/edac/mvebu_edac.h

-- 
2.13.0

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: bp, linux-arm-kernel, linux-edac
  Cc: Chris Packham, Mauro Carvalho Chehab, linux-kernel

This adds an EDAC driver for the memory controller and L2 cache used on
a number of Marvell Armada SoCs.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: linux-arm-kernel@lists.infradead.org
---
 drivers/edac/Kconfig      |   7 +
 drivers/edac/Makefile     |   1 +
 drivers/edac/mvebu_edac.c | 506 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/edac/mvebu_edac.h |  66 ++++++
 4 files changed, 580 insertions(+)
 create mode 100644 drivers/edac/mvebu_edac.c
 create mode 100644 drivers/edac/mvebu_edac.h

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 96afb2aeed18..5cc84b333949 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -266,6 +266,13 @@ config EDAC_MV64X60
 	  Support for error detection and correction on the Marvell
 	  MV64360 and MV64460 chipsets.
 
+config EDAC_MVEBU
+	tristate "Marvell Armada 370/385/XP"
+	depends on ARCH_MVEBU
+	help
+	  Support for error detection and correction on the Marvell
+	  ARM SoCs.
+
 config EDAC_PASEMI
 	tristate "PA Semi PWRficient"
 	depends on PPC_PASEMI && PCI
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 0fd9ffa63299..29de6c23d15c 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -59,6 +59,7 @@ layerscape_edac_mod-y			:= fsl_ddr_edac.o layerscape_edac.o
 obj-$(CONFIG_EDAC_LAYERSCAPE)		+= layerscape_edac_mod.o
 
 obj-$(CONFIG_EDAC_MV64X60)		+= mv64x60_edac.o
+obj-$(CONFIG_EDAC_MVEBU)		+= mvebu_edac.o
 obj-$(CONFIG_EDAC_CELL)			+= cell_edac.o
 obj-$(CONFIG_EDAC_PPC4XX)		+= ppc4xx_edac.o
 obj-$(CONFIG_EDAC_AMD8111)		+= amd8111_edac.o
diff --git a/drivers/edac/mvebu_edac.c b/drivers/edac/mvebu_edac.c
new file mode 100644
index 000000000000..624cf10f821b
--- /dev/null
+++ b/drivers/edac/mvebu_edac.c
@@ -0,0 +1,506 @@
+/*
+ * EDAC driver for Marvell ARM SoCs
+ *
+ * Copyright (C) 2017 Allied Telesis Labs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include "edac_module.h"
+#include "mvebu_edac.h"
+
+static const char *mvebu_ctl_name = "MVEBU";
+static int edac_mc_idx;
+static int edac_l2_idx;
+
+/*********************** DRAM err device **********************************/
+
+static void mvebu_mc_check(struct mem_ctl_info *mci)
+{
+	struct mvebu_mc_pdata *pdata = mci->pvt_info;
+	u32 reg;
+	u32 err_addr;
+	u32 sdram_ecc;
+	u32 comp_ecc;
+	u32 syndrome;
+
+	reg = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+	if (!reg)
+		return;
+
+	err_addr = reg & ~0x3;
+	sdram_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_RCVD);
+	comp_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CALC);
+	syndrome = sdram_ecc ^ comp_ecc;
+
+	/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
+	if (!(reg & 0x1))
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+				     err_addr >> PAGE_SHIFT,
+				     err_addr & PAGE_MASK, syndrome,
+				     0, 0, -1,
+				     mci->ctl_name, "");
+	else	/* 2 bit error, UE */
+		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+				     err_addr >> PAGE_SHIFT,
+				     err_addr & PAGE_MASK, 0,
+				     0, 0, -1,
+				     mci->ctl_name, "");
+
+	/* clear the error */
+	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+}
+
+static irqreturn_t mvebu_mc_isr(int irq, void *dev_id)
+{
+	struct mem_ctl_info *mci = dev_id;
+	struct mvebu_mc_pdata *pdata = mci->pvt_info;
+	u32 reg;
+
+	reg = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+	if (!reg)
+		return IRQ_NONE;
+
+	/* writing 0's to the ECC err addr in check function clears irq */
+	mvebu_mc_check(mci);
+
+	return IRQ_HANDLED;
+}
+
+static void get_total_mem(struct mvebu_mc_pdata *pdata)
+{
+	struct device_node *np = NULL;
+	struct resource res;
+	int ret;
+	unsigned long total_mem = 0;
+
+	for_each_node_by_type(np, "memory") {
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret)
+			continue;
+
+		total_mem += resource_size(&res);
+	}
+
+	pdata->total_mem = total_mem;
+}
+
+static void mvebu_init_csrows(struct mem_ctl_info *mci,
+				struct mvebu_mc_pdata *pdata)
+{
+	struct csrow_info *csrow;
+	struct dimm_info *dimm;
+
+	u32 devtype;
+	u32 ctl;
+
+	get_total_mem(pdata);
+
+	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_CONFIG);
+
+	csrow = mci->csrows[0];
+	dimm = csrow->channels[0]->dimm;
+
+	dimm->nr_pages = pdata->total_mem >> PAGE_SHIFT;
+	dimm->grain = 8;
+
+	dimm->mtype = (ctl & MVEBU_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
+
+	devtype = (ctl >> 20) & 0x3;
+	switch (devtype) {
+	case 0x0:
+		dimm->dtype = DEV_X32;
+		break;
+	case 0x2:		/* could be X8 too, but no way to tell */
+		dimm->dtype = DEV_X16;
+		break;
+	case 0x3:
+		dimm->dtype = DEV_X4;
+		break;
+	default:
+		dimm->dtype = DEV_UNKNOWN;
+		break;
+	}
+
+	dimm->edac_mode = EDAC_SECDED;
+}
+
+static int mvebu_mc_err_probe(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[2];
+	struct mvebu_mc_pdata *pdata;
+	struct resource *r;
+	u32 ctl;
+	int res = 0;
+
+	if (!devres_open_group(&pdev->dev, mvebu_mc_err_probe, GFP_KERNEL))
+		return -ENOMEM;
+
+	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+	layers[0].size = 1;
+	layers[0].is_virt_csrow = true;
+	layers[1].type = EDAC_MC_LAYER_CHANNEL;
+	layers[1].size = 1;
+	layers[1].is_virt_csrow = false;
+	mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
+			    sizeof(struct mvebu_mc_pdata));
+	if (!mci) {
+		pr_err("%s: No memory for CPU err\n", __func__);
+		devres_release_group(&pdev->dev, mvebu_mc_err_probe);
+		return -ENOMEM;
+	}
+
+	pdata = mci->pvt_info;
+	mci->pdev = &pdev->dev;
+	platform_set_drvdata(pdev, mci);
+	pdata->name = "mvebu_mc_err";
+	mci->dev_name = dev_name(&pdev->dev);
+	pdata->edac_idx = edac_mc_idx++;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		pr_err("%s: Unable to get resource for MC err regs\n",
+		       __func__);
+		res = -ENOENT;
+		goto err;
+	}
+
+	pdata->mc_vbase = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(pdata->mc_vbase)) {
+		pr_err("%s: Unable to setup MC err regs\n", __func__);
+		res = PTR_ERR(pdata->mc_vbase);
+		goto err;
+	}
+
+	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_CONFIG);
+	if (!(ctl & MVEBU_SDRAM_ECC)) {
+		/* Non-ECC RAM? */
+		pr_warn("%s: No ECC DIMMs discovered\n", __func__);
+		res = -ENODEV;
+		goto err;
+	}
+
+	edac_dbg(3, "init mci\n");
+	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = EDAC_MOD_STR;
+	mci->mod_ver = MVEBU_REVISION;
+	mci->ctl_name = mvebu_ctl_name;
+
+	if (edac_op_state == EDAC_OPSTATE_POLL)
+		mci->edac_check = mvebu_mc_check;
+
+	mci->ctl_page_to_phys = NULL;
+
+	mci->scrub_mode = SCRUB_SW_SRC;
+
+	mvebu_init_csrows(mci, pdata);
+
+	/* setup MC registers */
+	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
+	ctl = (ctl & 0xff00ffff) | 0x10000;
+	writel(ctl, pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
+
+	if (edac_op_state == EDAC_OPSTATE_INT) {
+		/* acquire interrupt that reports errors */
+		pdata->irq = platform_get_irq(pdev, 0);
+		res = devm_request_irq(&pdev->dev,
+				       pdata->irq,
+				       mvebu_mc_isr,
+				       0,
+				       "[EDAC] MC err",
+				       mci);
+		if (res < 0) {
+			pr_err("%s: Unable to request irq %d\n", __func__,
+			       pdata->irq);
+			res = -ENODEV;
+			goto err;
+		}
+
+		pr_info("acquired irq %d for MC Err\n",
+		       pdata->irq);
+	}
+
+	res = edac_mc_add_mc(mci);
+	if (res) {
+		edac_dbg(3, "failed edac_mc_add_mc()\n");
+		goto err;
+	}
+
+
+	/* get this far and it's successful */
+	edac_dbg(3, "success\n");
+
+	return 0;
+
+err:
+	devres_release_group(&pdev->dev, mvebu_mc_err_probe);
+	edac_mc_free(mci);
+	return res;
+}
+
+static int mvebu_mc_err_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+	edac_dbg(0, "\n");
+
+	edac_mc_del_mc(&pdev->dev);
+	edac_mc_free(mci);
+
+	return 0;
+}
+
+static const struct of_device_id mvebu_mc_err_of_match[] = {
+	{ .compatible = "marvell,armada-xp-sdram-controller", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mvebu_mc_err_of_match);
+
+static struct platform_driver mvebu_mc_err_driver = {
+	.probe = mvebu_mc_err_probe,
+	.remove = mvebu_mc_err_remove,
+	.driver = {
+		   .name = "mvebu_mc_err",
+		   .of_match_table = of_match_ptr(mvebu_mc_err_of_match),
+	},
+};
+
+/*********************** L2 err device **********************************/
+static void mvebu_l2_check(struct edac_device_ctl_info *edac_dev)
+{
+
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	u32 val;
+
+	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+	if (!(val & 1))
+		return;
+
+	pr_err("ECC Error in CPU L2 cache\n");
+	pr_err("L2 Error Attributes Capture Register: 0x%08x\n", val);
+	pr_err("L2 Error Address Capture Register: 0x%08x\n",
+		readl(pdata->l2_vbase + MVEBU_L2_ERR_ADDR));
+
+	if (L2_ERR_TYPE(val) == 0)
+		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
+
+	if (L2_ERR_TYPE(val) == 1)
+		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
+
+	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+}
+
+static irqreturn_t mvebu_l2_isr(int irq, void *dev_id)
+{
+	struct edac_device_ctl_info *edac_dev = dev_id;
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	u32 val;
+
+	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+	if (!(val & 1))
+		return IRQ_NONE;
+
+	mvebu_l2_check(edac_dev);
+
+	return IRQ_HANDLED;
+}
+
+static ssize_t inject_ctrl_show(struct edac_device_ctl_info *edac_dev,
+				char *data)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+
+	return sprintf(data, "0x%08x",
+		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL));
+}
+
+static ssize_t inject_ctrl_store(struct edac_device_ctl_info *edac_dev,
+				 const char *data, size_t count)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	unsigned long val;
+
+	if (!kstrtoul(data, 0, &val)) {
+		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL);
+		return count;
+	}
+
+	return 0;
+}
+
+static ssize_t inject_mask_show(struct edac_device_ctl_info *edac_dev,
+				     char *data)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+
+	return sprintf(data, "0x%08x",
+		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK));
+}
+
+static ssize_t inject_mask_store(struct edac_device_ctl_info *edac_dev,
+				      const char *data, size_t count)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	unsigned long val;
+
+	if (!kstrtoul(data, 0, &val)) {
+		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK);
+		return count;
+	}
+
+	return 0;
+}
+
+static struct edac_dev_sysfs_attribute mvebu_l2_sysfs_attributes[] = {
+	__ATTR_RW(inject_ctrl),
+	__ATTR_RW(inject_mask),
+	{},
+};
+
+static int mvebu_l2_err_probe(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edac_dev;
+	struct mvebu_l2_pdata *pdata;
+	struct resource *r;
+	int res;
+
+	if (!devres_open_group(&pdev->dev, mvebu_l2_err_probe, GFP_KERNEL))
+		return -ENOMEM;
+
+	edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata),
+					      "cpu", 1, "L", 1, 2, NULL, 0,
+					      edac_l2_idx);
+	if (!edac_dev) {
+		devres_release_group(&pdev->dev, mvebu_l2_err_probe);
+		return -ENOMEM;
+	}
+
+	pdata = edac_dev->pvt_info;
+	pdata->name = "mvebu_l2_err";
+	edac_dev->dev = &pdev->dev;
+	dev_set_drvdata(edac_dev->dev, edac_dev);
+	edac_dev->mod_name = EDAC_MOD_STR;
+	edac_dev->ctl_name = pdata->name;
+	edac_dev->dev_name = pdata->name;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* skip to error registers */
+	r->start += 0x600;
+
+	pdata->l2_vbase = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(pdata->l2_vbase)) {
+		res = PTR_ERR(pdata->l2_vbase);
+		goto err;
+	}
+
+	writel(L2_ERR_UE_THRESH(1) | L2_ERR_CE_THRESH(1),
+	       pdata->l2_vbase + MVEBU_L2_ERR_THRESH);
+	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+
+	if (edac_op_state == EDAC_OPSTATE_POLL)
+		edac_dev->edac_check = mvebu_l2_check;
+
+	edac_dev->sysfs_attributes = mvebu_l2_sysfs_attributes;
+
+	pdata->edac_idx = edac_l2_idx++;
+
+	if (edac_op_state == EDAC_OPSTATE_INT) {
+		pdata->irq = platform_get_irq(pdev, 0);
+		res = devm_request_irq(&pdev->dev, pdata->irq,
+				       mvebu_l2_isr, IRQF_SHARED,
+				       "[EDAC] L2 err", edac_dev);
+		if (res < 0)
+			goto err;
+	}
+
+	if (edac_device_add_device(edac_dev) > 0) {
+		res = -EIO;
+		goto err;
+	}
+
+	devres_remove_group(&pdev->dev, mvebu_l2_err_probe);
+	return 0;
+
+err:
+	devres_release_group(&pdev->dev, mvebu_l2_err_probe);
+	edac_device_free_ctl_info(edac_dev);
+	return res;
+}
+
+static int mvebu_l2_err_remove(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edac_dev = dev_get_drvdata(&pdev->dev);
+
+	edac_device_del_device(&pdev->dev);
+	edac_device_free_ctl_info(edac_dev);
+	return 0;
+}
+
+static const struct of_device_id mvebu_l2_err_of_match[] = {
+	{ .compatible = "marvell,aurora-system-cache", },
+	{},
+};
+
+static struct platform_driver mvebu_l2_err_driver = {
+	.probe = mvebu_l2_err_probe,
+	.remove = mvebu_l2_err_remove,
+	.driver = {
+		   .name = "mvebu_l2_err",
+		   .of_match_table = of_match_ptr(mvebu_l2_err_of_match),
+	},
+};
+
+static struct platform_driver * const drivers[] = {
+	&mvebu_mc_err_driver,
+	&mvebu_l2_err_driver,
+};
+
+static int __init mvebu_edac_init(void)
+{
+	/* make sure error reporting method is sane */
+	switch (edac_op_state) {
+	case EDAC_OPSTATE_POLL:
+	case EDAC_OPSTATE_INT:
+		break;
+	default:
+		edac_op_state = EDAC_OPSTATE_INT;
+		break;
+	}
+
+	return  platform_register_drivers(drivers, ARRAY_SIZE(drivers));
+}
+module_init(mvebu_edac_init);
+
+static void __exit mvebu_edac_exit(void)
+{
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
+}
+module_exit(mvebu_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Allied Telesis Labs");
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state,
+		 "EDAC Error Reporting state: 0=Poll, 2=Interrupt");
diff --git a/drivers/edac/mvebu_edac.h b/drivers/edac/mvebu_edac.h
new file mode 100644
index 000000000000..33f0534b87df
--- /dev/null
+++ b/drivers/edac/mvebu_edac.h
@@ -0,0 +1,66 @@
+/*
+ * EDAC defs for Marvell SoCs
+ *
+ * Copyright (C) 2017 Allied Telesis Labs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details
+ */
+#ifndef _MVEBU_EDAC_H_
+#define _MVEBU_EDAC_H_
+
+#define MVEBU_REVISION " Ver: 2.0.0"
+#define EDAC_MOD_STR	"MVEBU_edac"
+
+/*
+ * L2 Err Registers
+ */
+#define MVEBU_L2_ERR_COUNT		0x00	/* 0x8600 */
+#define MVEBU_L2_ERR_THRESH		0x04	/* 0x8604 */
+#define MVEBU_L2_ERR_ATTR		0x08	/* 0x8608 */
+#define MVEBU_L2_ERR_ADDR		0x0c	/* 0x860c */
+#define MVEBU_L2_ERR_CAP		0x10	/* 0x8610 */
+#define MVEBU_L2_ERR_INJ_CTRL		0x14	/* 0x8614 */
+#define MVEBU_L2_ERR_INJ_MASK		0x18	/* 0x8618 */
+
+#define L2_ERR_UE_THRESH(val)		((val & 0xff) << 16)
+#define L2_ERR_CE_THRESH(val)		(val & 0xffff)
+#define L2_ERR_TYPE(val)		((val >> 8) & 0x3)
+
+/*
+ * SDRAM Controller Registers
+ */
+#define MVEBU_SDRAM_CONFIG		0x00	/* 0x1400 */
+#define MVEBU_SDRAM_ERR_DATA_HI		0x40	/* 0x1440 */
+#define MVEBU_SDRAM_ERR_DATA_LO		0x44	/* 0x1444 */
+#define MVEBU_SDRAM_ERR_ECC_RCVD	0x48	/* 0x1448 */
+#define MVEBU_SDRAM_ERR_ECC_CALC	0x4c	/* 0x144c */
+#define MVEBU_SDRAM_ERR_ADDR		0x50	/* 0x1450 */
+#define MVEBU_SDRAM_ERR_ECC_CNTL	0x54	/* 0x1454 */
+#define MVEBU_SDRAM_ERR_ECC_ERR_CNT	0x58	/* 0x1458 */
+
+#define MVEBU_SDRAM_REGISTERED	0x20000
+#define MVEBU_SDRAM_ECC		0x40000
+
+struct mvebu_l2_pdata {
+	void __iomem *l2_vbase;
+	char *name;
+	int irq;
+	int edac_idx;
+};
+
+struct mvebu_mc_pdata {
+	void __iomem *mc_vbase;
+	int total_mem;
+	char *name;
+	int irq;
+	int edac_idx;
+};
+
+#endif
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: bp, linux-arm-kernel, linux-edac
  Cc: Chris Packham, Mauro Carvalho Chehab, linux-kernel

This adds an EDAC driver for the memory controller and L2 cache used on
a number of Marvell Armada SoCs.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: linux-arm-kernel@lists.infradead.org
---
 drivers/edac/Kconfig      |   7 +
 drivers/edac/Makefile     |   1 +
 drivers/edac/mvebu_edac.c | 506 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/edac/mvebu_edac.h |  66 ++++++
 4 files changed, 580 insertions(+)
 create mode 100644 drivers/edac/mvebu_edac.c
 create mode 100644 drivers/edac/mvebu_edac.h

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 96afb2aeed18..5cc84b333949 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -266,6 +266,13 @@ config EDAC_MV64X60
 	  Support for error detection and correction on the Marvell
 	  MV64360 and MV64460 chipsets.
 
+config EDAC_MVEBU
+	tristate "Marvell Armada 370/385/XP"
+	depends on ARCH_MVEBU
+	help
+	  Support for error detection and correction on the Marvell
+	  ARM SoCs.
+
 config EDAC_PASEMI
 	tristate "PA Semi PWRficient"
 	depends on PPC_PASEMI && PCI
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 0fd9ffa63299..29de6c23d15c 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -59,6 +59,7 @@ layerscape_edac_mod-y			:= fsl_ddr_edac.o layerscape_edac.o
 obj-$(CONFIG_EDAC_LAYERSCAPE)		+= layerscape_edac_mod.o
 
 obj-$(CONFIG_EDAC_MV64X60)		+= mv64x60_edac.o
+obj-$(CONFIG_EDAC_MVEBU)		+= mvebu_edac.o
 obj-$(CONFIG_EDAC_CELL)			+= cell_edac.o
 obj-$(CONFIG_EDAC_PPC4XX)		+= ppc4xx_edac.o
 obj-$(CONFIG_EDAC_AMD8111)		+= amd8111_edac.o
diff --git a/drivers/edac/mvebu_edac.c b/drivers/edac/mvebu_edac.c
new file mode 100644
index 000000000000..624cf10f821b
--- /dev/null
+++ b/drivers/edac/mvebu_edac.c
@@ -0,0 +1,506 @@
+/*
+ * EDAC driver for Marvell ARM SoCs
+ *
+ * Copyright (C) 2017 Allied Telesis Labs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include "edac_module.h"
+#include "mvebu_edac.h"
+
+static const char *mvebu_ctl_name = "MVEBU";
+static int edac_mc_idx;
+static int edac_l2_idx;
+
+/*********************** DRAM err device **********************************/
+
+static void mvebu_mc_check(struct mem_ctl_info *mci)
+{
+	struct mvebu_mc_pdata *pdata = mci->pvt_info;
+	u32 reg;
+	u32 err_addr;
+	u32 sdram_ecc;
+	u32 comp_ecc;
+	u32 syndrome;
+
+	reg = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+	if (!reg)
+		return;
+
+	err_addr = reg & ~0x3;
+	sdram_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_RCVD);
+	comp_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CALC);
+	syndrome = sdram_ecc ^ comp_ecc;
+
+	/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
+	if (!(reg & 0x1))
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+				     err_addr >> PAGE_SHIFT,
+				     err_addr & PAGE_MASK, syndrome,
+				     0, 0, -1,
+				     mci->ctl_name, "");
+	else	/* 2 bit error, UE */
+		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+				     err_addr >> PAGE_SHIFT,
+				     err_addr & PAGE_MASK, 0,
+				     0, 0, -1,
+				     mci->ctl_name, "");
+
+	/* clear the error */
+	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+}
+
+static irqreturn_t mvebu_mc_isr(int irq, void *dev_id)
+{
+	struct mem_ctl_info *mci = dev_id;
+	struct mvebu_mc_pdata *pdata = mci->pvt_info;
+	u32 reg;
+
+	reg = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+	if (!reg)
+		return IRQ_NONE;
+
+	/* writing 0's to the ECC err addr in check function clears irq */
+	mvebu_mc_check(mci);
+
+	return IRQ_HANDLED;
+}
+
+static void get_total_mem(struct mvebu_mc_pdata *pdata)
+{
+	struct device_node *np = NULL;
+	struct resource res;
+	int ret;
+	unsigned long total_mem = 0;
+
+	for_each_node_by_type(np, "memory") {
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret)
+			continue;
+
+		total_mem += resource_size(&res);
+	}
+
+	pdata->total_mem = total_mem;
+}
+
+static void mvebu_init_csrows(struct mem_ctl_info *mci,
+				struct mvebu_mc_pdata *pdata)
+{
+	struct csrow_info *csrow;
+	struct dimm_info *dimm;
+
+	u32 devtype;
+	u32 ctl;
+
+	get_total_mem(pdata);
+
+	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_CONFIG);
+
+	csrow = mci->csrows[0];
+	dimm = csrow->channels[0]->dimm;
+
+	dimm->nr_pages = pdata->total_mem >> PAGE_SHIFT;
+	dimm->grain = 8;
+
+	dimm->mtype = (ctl & MVEBU_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
+
+	devtype = (ctl >> 20) & 0x3;
+	switch (devtype) {
+	case 0x0:
+		dimm->dtype = DEV_X32;
+		break;
+	case 0x2:		/* could be X8 too, but no way to tell */
+		dimm->dtype = DEV_X16;
+		break;
+	case 0x3:
+		dimm->dtype = DEV_X4;
+		break;
+	default:
+		dimm->dtype = DEV_UNKNOWN;
+		break;
+	}
+
+	dimm->edac_mode = EDAC_SECDED;
+}
+
+static int mvebu_mc_err_probe(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[2];
+	struct mvebu_mc_pdata *pdata;
+	struct resource *r;
+	u32 ctl;
+	int res = 0;
+
+	if (!devres_open_group(&pdev->dev, mvebu_mc_err_probe, GFP_KERNEL))
+		return -ENOMEM;
+
+	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+	layers[0].size = 1;
+	layers[0].is_virt_csrow = true;
+	layers[1].type = EDAC_MC_LAYER_CHANNEL;
+	layers[1].size = 1;
+	layers[1].is_virt_csrow = false;
+	mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
+			    sizeof(struct mvebu_mc_pdata));
+	if (!mci) {
+		pr_err("%s: No memory for CPU err\n", __func__);
+		devres_release_group(&pdev->dev, mvebu_mc_err_probe);
+		return -ENOMEM;
+	}
+
+	pdata = mci->pvt_info;
+	mci->pdev = &pdev->dev;
+	platform_set_drvdata(pdev, mci);
+	pdata->name = "mvebu_mc_err";
+	mci->dev_name = dev_name(&pdev->dev);
+	pdata->edac_idx = edac_mc_idx++;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		pr_err("%s: Unable to get resource for MC err regs\n",
+		       __func__);
+		res = -ENOENT;
+		goto err;
+	}
+
+	pdata->mc_vbase = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(pdata->mc_vbase)) {
+		pr_err("%s: Unable to setup MC err regs\n", __func__);
+		res = PTR_ERR(pdata->mc_vbase);
+		goto err;
+	}
+
+	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_CONFIG);
+	if (!(ctl & MVEBU_SDRAM_ECC)) {
+		/* Non-ECC RAM? */
+		pr_warn("%s: No ECC DIMMs discovered\n", __func__);
+		res = -ENODEV;
+		goto err;
+	}
+
+	edac_dbg(3, "init mci\n");
+	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = EDAC_MOD_STR;
+	mci->mod_ver = MVEBU_REVISION;
+	mci->ctl_name = mvebu_ctl_name;
+
+	if (edac_op_state == EDAC_OPSTATE_POLL)
+		mci->edac_check = mvebu_mc_check;
+
+	mci->ctl_page_to_phys = NULL;
+
+	mci->scrub_mode = SCRUB_SW_SRC;
+
+	mvebu_init_csrows(mci, pdata);
+
+	/* setup MC registers */
+	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
+	ctl = (ctl & 0xff00ffff) | 0x10000;
+	writel(ctl, pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
+
+	if (edac_op_state == EDAC_OPSTATE_INT) {
+		/* acquire interrupt that reports errors */
+		pdata->irq = platform_get_irq(pdev, 0);
+		res = devm_request_irq(&pdev->dev,
+				       pdata->irq,
+				       mvebu_mc_isr,
+				       0,
+				       "[EDAC] MC err",
+				       mci);
+		if (res < 0) {
+			pr_err("%s: Unable to request irq %d\n", __func__,
+			       pdata->irq);
+			res = -ENODEV;
+			goto err;
+		}
+
+		pr_info("acquired irq %d for MC Err\n",
+		       pdata->irq);
+	}
+
+	res = edac_mc_add_mc(mci);
+	if (res) {
+		edac_dbg(3, "failed edac_mc_add_mc()\n");
+		goto err;
+	}
+
+
+	/* get this far and it's successful */
+	edac_dbg(3, "success\n");
+
+	return 0;
+
+err:
+	devres_release_group(&pdev->dev, mvebu_mc_err_probe);
+	edac_mc_free(mci);
+	return res;
+}
+
+static int mvebu_mc_err_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+	edac_dbg(0, "\n");
+
+	edac_mc_del_mc(&pdev->dev);
+	edac_mc_free(mci);
+
+	return 0;
+}
+
+static const struct of_device_id mvebu_mc_err_of_match[] = {
+	{ .compatible = "marvell,armada-xp-sdram-controller", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mvebu_mc_err_of_match);
+
+static struct platform_driver mvebu_mc_err_driver = {
+	.probe = mvebu_mc_err_probe,
+	.remove = mvebu_mc_err_remove,
+	.driver = {
+		   .name = "mvebu_mc_err",
+		   .of_match_table = of_match_ptr(mvebu_mc_err_of_match),
+	},
+};
+
+/*********************** L2 err device **********************************/
+static void mvebu_l2_check(struct edac_device_ctl_info *edac_dev)
+{
+
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	u32 val;
+
+	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+	if (!(val & 1))
+		return;
+
+	pr_err("ECC Error in CPU L2 cache\n");
+	pr_err("L2 Error Attributes Capture Register: 0x%08x\n", val);
+	pr_err("L2 Error Address Capture Register: 0x%08x\n",
+		readl(pdata->l2_vbase + MVEBU_L2_ERR_ADDR));
+
+	if (L2_ERR_TYPE(val) == 0)
+		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
+
+	if (L2_ERR_TYPE(val) == 1)
+		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
+
+	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+}
+
+static irqreturn_t mvebu_l2_isr(int irq, void *dev_id)
+{
+	struct edac_device_ctl_info *edac_dev = dev_id;
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	u32 val;
+
+	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+	if (!(val & 1))
+		return IRQ_NONE;
+
+	mvebu_l2_check(edac_dev);
+
+	return IRQ_HANDLED;
+}
+
+static ssize_t inject_ctrl_show(struct edac_device_ctl_info *edac_dev,
+				char *data)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+
+	return sprintf(data, "0x%08x",
+		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL));
+}
+
+static ssize_t inject_ctrl_store(struct edac_device_ctl_info *edac_dev,
+				 const char *data, size_t count)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	unsigned long val;
+
+	if (!kstrtoul(data, 0, &val)) {
+		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL);
+		return count;
+	}
+
+	return 0;
+}
+
+static ssize_t inject_mask_show(struct edac_device_ctl_info *edac_dev,
+				     char *data)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+
+	return sprintf(data, "0x%08x",
+		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK));
+}
+
+static ssize_t inject_mask_store(struct edac_device_ctl_info *edac_dev,
+				      const char *data, size_t count)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	unsigned long val;
+
+	if (!kstrtoul(data, 0, &val)) {
+		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK);
+		return count;
+	}
+
+	return 0;
+}
+
+static struct edac_dev_sysfs_attribute mvebu_l2_sysfs_attributes[] = {
+	__ATTR_RW(inject_ctrl),
+	__ATTR_RW(inject_mask),
+	{},
+};
+
+static int mvebu_l2_err_probe(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edac_dev;
+	struct mvebu_l2_pdata *pdata;
+	struct resource *r;
+	int res;
+
+	if (!devres_open_group(&pdev->dev, mvebu_l2_err_probe, GFP_KERNEL))
+		return -ENOMEM;
+
+	edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata),
+					      "cpu", 1, "L", 1, 2, NULL, 0,
+					      edac_l2_idx);
+	if (!edac_dev) {
+		devres_release_group(&pdev->dev, mvebu_l2_err_probe);
+		return -ENOMEM;
+	}
+
+	pdata = edac_dev->pvt_info;
+	pdata->name = "mvebu_l2_err";
+	edac_dev->dev = &pdev->dev;
+	dev_set_drvdata(edac_dev->dev, edac_dev);
+	edac_dev->mod_name = EDAC_MOD_STR;
+	edac_dev->ctl_name = pdata->name;
+	edac_dev->dev_name = pdata->name;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* skip to error registers */
+	r->start += 0x600;
+
+	pdata->l2_vbase = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(pdata->l2_vbase)) {
+		res = PTR_ERR(pdata->l2_vbase);
+		goto err;
+	}
+
+	writel(L2_ERR_UE_THRESH(1) | L2_ERR_CE_THRESH(1),
+	       pdata->l2_vbase + MVEBU_L2_ERR_THRESH);
+	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+
+	if (edac_op_state == EDAC_OPSTATE_POLL)
+		edac_dev->edac_check = mvebu_l2_check;
+
+	edac_dev->sysfs_attributes = mvebu_l2_sysfs_attributes;
+
+	pdata->edac_idx = edac_l2_idx++;
+
+	if (edac_op_state == EDAC_OPSTATE_INT) {
+		pdata->irq = platform_get_irq(pdev, 0);
+		res = devm_request_irq(&pdev->dev, pdata->irq,
+				       mvebu_l2_isr, IRQF_SHARED,
+				       "[EDAC] L2 err", edac_dev);
+		if (res < 0)
+			goto err;
+	}
+
+	if (edac_device_add_device(edac_dev) > 0) {
+		res = -EIO;
+		goto err;
+	}
+
+	devres_remove_group(&pdev->dev, mvebu_l2_err_probe);
+	return 0;
+
+err:
+	devres_release_group(&pdev->dev, mvebu_l2_err_probe);
+	edac_device_free_ctl_info(edac_dev);
+	return res;
+}
+
+static int mvebu_l2_err_remove(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edac_dev = dev_get_drvdata(&pdev->dev);
+
+	edac_device_del_device(&pdev->dev);
+	edac_device_free_ctl_info(edac_dev);
+	return 0;
+}
+
+static const struct of_device_id mvebu_l2_err_of_match[] = {
+	{ .compatible = "marvell,aurora-system-cache", },
+	{},
+};
+
+static struct platform_driver mvebu_l2_err_driver = {
+	.probe = mvebu_l2_err_probe,
+	.remove = mvebu_l2_err_remove,
+	.driver = {
+		   .name = "mvebu_l2_err",
+		   .of_match_table = of_match_ptr(mvebu_l2_err_of_match),
+	},
+};
+
+static struct platform_driver * const drivers[] = {
+	&mvebu_mc_err_driver,
+	&mvebu_l2_err_driver,
+};
+
+static int __init mvebu_edac_init(void)
+{
+	/* make sure error reporting method is sane */
+	switch (edac_op_state) {
+	case EDAC_OPSTATE_POLL:
+	case EDAC_OPSTATE_INT:
+		break;
+	default:
+		edac_op_state = EDAC_OPSTATE_INT;
+		break;
+	}
+
+	return  platform_register_drivers(drivers, ARRAY_SIZE(drivers));
+}
+module_init(mvebu_edac_init);
+
+static void __exit mvebu_edac_exit(void)
+{
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
+}
+module_exit(mvebu_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Allied Telesis Labs");
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state,
+		 "EDAC Error Reporting state: 0=Poll, 2=Interrupt");
diff --git a/drivers/edac/mvebu_edac.h b/drivers/edac/mvebu_edac.h
new file mode 100644
index 000000000000..33f0534b87df
--- /dev/null
+++ b/drivers/edac/mvebu_edac.h
@@ -0,0 +1,66 @@
+/*
+ * EDAC defs for Marvell SoCs
+ *
+ * Copyright (C) 2017 Allied Telesis Labs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details
+ */
+#ifndef _MVEBU_EDAC_H_
+#define _MVEBU_EDAC_H_
+
+#define MVEBU_REVISION " Ver: 2.0.0"
+#define EDAC_MOD_STR	"MVEBU_edac"
+
+/*
+ * L2 Err Registers
+ */
+#define MVEBU_L2_ERR_COUNT		0x00	/* 0x8600 */
+#define MVEBU_L2_ERR_THRESH		0x04	/* 0x8604 */
+#define MVEBU_L2_ERR_ATTR		0x08	/* 0x8608 */
+#define MVEBU_L2_ERR_ADDR		0x0c	/* 0x860c */
+#define MVEBU_L2_ERR_CAP		0x10	/* 0x8610 */
+#define MVEBU_L2_ERR_INJ_CTRL		0x14	/* 0x8614 */
+#define MVEBU_L2_ERR_INJ_MASK		0x18	/* 0x8618 */
+
+#define L2_ERR_UE_THRESH(val)		((val & 0xff) << 16)
+#define L2_ERR_CE_THRESH(val)		(val & 0xffff)
+#define L2_ERR_TYPE(val)		((val >> 8) & 0x3)
+
+/*
+ * SDRAM Controller Registers
+ */
+#define MVEBU_SDRAM_CONFIG		0x00	/* 0x1400 */
+#define MVEBU_SDRAM_ERR_DATA_HI		0x40	/* 0x1440 */
+#define MVEBU_SDRAM_ERR_DATA_LO		0x44	/* 0x1444 */
+#define MVEBU_SDRAM_ERR_ECC_RCVD	0x48	/* 0x1448 */
+#define MVEBU_SDRAM_ERR_ECC_CALC	0x4c	/* 0x144c */
+#define MVEBU_SDRAM_ERR_ADDR		0x50	/* 0x1450 */
+#define MVEBU_SDRAM_ERR_ECC_CNTL	0x54	/* 0x1454 */
+#define MVEBU_SDRAM_ERR_ECC_ERR_CNT	0x58	/* 0x1458 */
+
+#define MVEBU_SDRAM_REGISTERED	0x20000
+#define MVEBU_SDRAM_ECC		0x40000
+
+struct mvebu_l2_pdata {
+	void __iomem *l2_vbase;
+	char *name;
+	int irq;
+	int edac_idx;
+};
+
+struct mvebu_mc_pdata {
+	void __iomem *mc_vbase;
+	int total_mem;
+	char *name;
+	int irq;
+	int edac_idx;
+};
+
+#endif

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: linux-arm-kernel

This adds an EDAC driver for the memory controller and L2 cache used on
a number of Marvell Armada SoCs.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: linux-arm-kernel at lists.infradead.org
---
 drivers/edac/Kconfig      |   7 +
 drivers/edac/Makefile     |   1 +
 drivers/edac/mvebu_edac.c | 506 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/edac/mvebu_edac.h |  66 ++++++
 4 files changed, 580 insertions(+)
 create mode 100644 drivers/edac/mvebu_edac.c
 create mode 100644 drivers/edac/mvebu_edac.h

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 96afb2aeed18..5cc84b333949 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -266,6 +266,13 @@ config EDAC_MV64X60
 	  Support for error detection and correction on the Marvell
 	  MV64360 and MV64460 chipsets.
 
+config EDAC_MVEBU
+	tristate "Marvell Armada 370/385/XP"
+	depends on ARCH_MVEBU
+	help
+	  Support for error detection and correction on the Marvell
+	  ARM SoCs.
+
 config EDAC_PASEMI
 	tristate "PA Semi PWRficient"
 	depends on PPC_PASEMI && PCI
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 0fd9ffa63299..29de6c23d15c 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -59,6 +59,7 @@ layerscape_edac_mod-y			:= fsl_ddr_edac.o layerscape_edac.o
 obj-$(CONFIG_EDAC_LAYERSCAPE)		+= layerscape_edac_mod.o
 
 obj-$(CONFIG_EDAC_MV64X60)		+= mv64x60_edac.o
+obj-$(CONFIG_EDAC_MVEBU)		+= mvebu_edac.o
 obj-$(CONFIG_EDAC_CELL)			+= cell_edac.o
 obj-$(CONFIG_EDAC_PPC4XX)		+= ppc4xx_edac.o
 obj-$(CONFIG_EDAC_AMD8111)		+= amd8111_edac.o
diff --git a/drivers/edac/mvebu_edac.c b/drivers/edac/mvebu_edac.c
new file mode 100644
index 000000000000..624cf10f821b
--- /dev/null
+++ b/drivers/edac/mvebu_edac.c
@@ -0,0 +1,506 @@
+/*
+ * EDAC driver for Marvell ARM SoCs
+ *
+ * Copyright (C) 2017 Allied Telesis Labs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include "edac_module.h"
+#include "mvebu_edac.h"
+
+static const char *mvebu_ctl_name = "MVEBU";
+static int edac_mc_idx;
+static int edac_l2_idx;
+
+/*********************** DRAM err device **********************************/
+
+static void mvebu_mc_check(struct mem_ctl_info *mci)
+{
+	struct mvebu_mc_pdata *pdata = mci->pvt_info;
+	u32 reg;
+	u32 err_addr;
+	u32 sdram_ecc;
+	u32 comp_ecc;
+	u32 syndrome;
+
+	reg = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+	if (!reg)
+		return;
+
+	err_addr = reg & ~0x3;
+	sdram_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_RCVD);
+	comp_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CALC);
+	syndrome = sdram_ecc ^ comp_ecc;
+
+	/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
+	if (!(reg & 0x1))
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+				     err_addr >> PAGE_SHIFT,
+				     err_addr & PAGE_MASK, syndrome,
+				     0, 0, -1,
+				     mci->ctl_name, "");
+	else	/* 2 bit error, UE */
+		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+				     err_addr >> PAGE_SHIFT,
+				     err_addr & PAGE_MASK, 0,
+				     0, 0, -1,
+				     mci->ctl_name, "");
+
+	/* clear the error */
+	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+}
+
+static irqreturn_t mvebu_mc_isr(int irq, void *dev_id)
+{
+	struct mem_ctl_info *mci = dev_id;
+	struct mvebu_mc_pdata *pdata = mci->pvt_info;
+	u32 reg;
+
+	reg = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+	if (!reg)
+		return IRQ_NONE;
+
+	/* writing 0's to the ECC err addr in check function clears irq */
+	mvebu_mc_check(mci);
+
+	return IRQ_HANDLED;
+}
+
+static void get_total_mem(struct mvebu_mc_pdata *pdata)
+{
+	struct device_node *np = NULL;
+	struct resource res;
+	int ret;
+	unsigned long total_mem = 0;
+
+	for_each_node_by_type(np, "memory") {
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret)
+			continue;
+
+		total_mem += resource_size(&res);
+	}
+
+	pdata->total_mem = total_mem;
+}
+
+static void mvebu_init_csrows(struct mem_ctl_info *mci,
+				struct mvebu_mc_pdata *pdata)
+{
+	struct csrow_info *csrow;
+	struct dimm_info *dimm;
+
+	u32 devtype;
+	u32 ctl;
+
+	get_total_mem(pdata);
+
+	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_CONFIG);
+
+	csrow = mci->csrows[0];
+	dimm = csrow->channels[0]->dimm;
+
+	dimm->nr_pages = pdata->total_mem >> PAGE_SHIFT;
+	dimm->grain = 8;
+
+	dimm->mtype = (ctl & MVEBU_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
+
+	devtype = (ctl >> 20) & 0x3;
+	switch (devtype) {
+	case 0x0:
+		dimm->dtype = DEV_X32;
+		break;
+	case 0x2:		/* could be X8 too, but no way to tell */
+		dimm->dtype = DEV_X16;
+		break;
+	case 0x3:
+		dimm->dtype = DEV_X4;
+		break;
+	default:
+		dimm->dtype = DEV_UNKNOWN;
+		break;
+	}
+
+	dimm->edac_mode = EDAC_SECDED;
+}
+
+static int mvebu_mc_err_probe(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[2];
+	struct mvebu_mc_pdata *pdata;
+	struct resource *r;
+	u32 ctl;
+	int res = 0;
+
+	if (!devres_open_group(&pdev->dev, mvebu_mc_err_probe, GFP_KERNEL))
+		return -ENOMEM;
+
+	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+	layers[0].size = 1;
+	layers[0].is_virt_csrow = true;
+	layers[1].type = EDAC_MC_LAYER_CHANNEL;
+	layers[1].size = 1;
+	layers[1].is_virt_csrow = false;
+	mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
+			    sizeof(struct mvebu_mc_pdata));
+	if (!mci) {
+		pr_err("%s: No memory for CPU err\n", __func__);
+		devres_release_group(&pdev->dev, mvebu_mc_err_probe);
+		return -ENOMEM;
+	}
+
+	pdata = mci->pvt_info;
+	mci->pdev = &pdev->dev;
+	platform_set_drvdata(pdev, mci);
+	pdata->name = "mvebu_mc_err";
+	mci->dev_name = dev_name(&pdev->dev);
+	pdata->edac_idx = edac_mc_idx++;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		pr_err("%s: Unable to get resource for MC err regs\n",
+		       __func__);
+		res = -ENOENT;
+		goto err;
+	}
+
+	pdata->mc_vbase = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(pdata->mc_vbase)) {
+		pr_err("%s: Unable to setup MC err regs\n", __func__);
+		res = PTR_ERR(pdata->mc_vbase);
+		goto err;
+	}
+
+	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_CONFIG);
+	if (!(ctl & MVEBU_SDRAM_ECC)) {
+		/* Non-ECC RAM? */
+		pr_warn("%s: No ECC DIMMs discovered\n", __func__);
+		res = -ENODEV;
+		goto err;
+	}
+
+	edac_dbg(3, "init mci\n");
+	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = EDAC_MOD_STR;
+	mci->mod_ver = MVEBU_REVISION;
+	mci->ctl_name = mvebu_ctl_name;
+
+	if (edac_op_state == EDAC_OPSTATE_POLL)
+		mci->edac_check = mvebu_mc_check;
+
+	mci->ctl_page_to_phys = NULL;
+
+	mci->scrub_mode = SCRUB_SW_SRC;
+
+	mvebu_init_csrows(mci, pdata);
+
+	/* setup MC registers */
+	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
+	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
+	ctl = (ctl & 0xff00ffff) | 0x10000;
+	writel(ctl, pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
+
+	if (edac_op_state == EDAC_OPSTATE_INT) {
+		/* acquire interrupt that reports errors */
+		pdata->irq = platform_get_irq(pdev, 0);
+		res = devm_request_irq(&pdev->dev,
+				       pdata->irq,
+				       mvebu_mc_isr,
+				       0,
+				       "[EDAC] MC err",
+				       mci);
+		if (res < 0) {
+			pr_err("%s: Unable to request irq %d\n", __func__,
+			       pdata->irq);
+			res = -ENODEV;
+			goto err;
+		}
+
+		pr_info("acquired irq %d for MC Err\n",
+		       pdata->irq);
+	}
+
+	res = edac_mc_add_mc(mci);
+	if (res) {
+		edac_dbg(3, "failed edac_mc_add_mc()\n");
+		goto err;
+	}
+
+
+	/* get this far and it's successful */
+	edac_dbg(3, "success\n");
+
+	return 0;
+
+err:
+	devres_release_group(&pdev->dev, mvebu_mc_err_probe);
+	edac_mc_free(mci);
+	return res;
+}
+
+static int mvebu_mc_err_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+	edac_dbg(0, "\n");
+
+	edac_mc_del_mc(&pdev->dev);
+	edac_mc_free(mci);
+
+	return 0;
+}
+
+static const struct of_device_id mvebu_mc_err_of_match[] = {
+	{ .compatible = "marvell,armada-xp-sdram-controller", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mvebu_mc_err_of_match);
+
+static struct platform_driver mvebu_mc_err_driver = {
+	.probe = mvebu_mc_err_probe,
+	.remove = mvebu_mc_err_remove,
+	.driver = {
+		   .name = "mvebu_mc_err",
+		   .of_match_table = of_match_ptr(mvebu_mc_err_of_match),
+	},
+};
+
+/*********************** L2 err device **********************************/
+static void mvebu_l2_check(struct edac_device_ctl_info *edac_dev)
+{
+
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	u32 val;
+
+	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+	if (!(val & 1))
+		return;
+
+	pr_err("ECC Error in CPU L2 cache\n");
+	pr_err("L2 Error Attributes Capture Register: 0x%08x\n", val);
+	pr_err("L2 Error Address Capture Register: 0x%08x\n",
+		readl(pdata->l2_vbase + MVEBU_L2_ERR_ADDR));
+
+	if (L2_ERR_TYPE(val) == 0)
+		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
+
+	if (L2_ERR_TYPE(val) == 1)
+		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
+
+	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+}
+
+static irqreturn_t mvebu_l2_isr(int irq, void *dev_id)
+{
+	struct edac_device_ctl_info *edac_dev = dev_id;
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	u32 val;
+
+	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+	if (!(val & 1))
+		return IRQ_NONE;
+
+	mvebu_l2_check(edac_dev);
+
+	return IRQ_HANDLED;
+}
+
+static ssize_t inject_ctrl_show(struct edac_device_ctl_info *edac_dev,
+				char *data)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+
+	return sprintf(data, "0x%08x",
+		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL));
+}
+
+static ssize_t inject_ctrl_store(struct edac_device_ctl_info *edac_dev,
+				 const char *data, size_t count)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	unsigned long val;
+
+	if (!kstrtoul(data, 0, &val)) {
+		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL);
+		return count;
+	}
+
+	return 0;
+}
+
+static ssize_t inject_mask_show(struct edac_device_ctl_info *edac_dev,
+				     char *data)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+
+	return sprintf(data, "0x%08x",
+		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK));
+}
+
+static ssize_t inject_mask_store(struct edac_device_ctl_info *edac_dev,
+				      const char *data, size_t count)
+{
+	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
+	unsigned long val;
+
+	if (!kstrtoul(data, 0, &val)) {
+		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK);
+		return count;
+	}
+
+	return 0;
+}
+
+static struct edac_dev_sysfs_attribute mvebu_l2_sysfs_attributes[] = {
+	__ATTR_RW(inject_ctrl),
+	__ATTR_RW(inject_mask),
+	{},
+};
+
+static int mvebu_l2_err_probe(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edac_dev;
+	struct mvebu_l2_pdata *pdata;
+	struct resource *r;
+	int res;
+
+	if (!devres_open_group(&pdev->dev, mvebu_l2_err_probe, GFP_KERNEL))
+		return -ENOMEM;
+
+	edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata),
+					      "cpu", 1, "L", 1, 2, NULL, 0,
+					      edac_l2_idx);
+	if (!edac_dev) {
+		devres_release_group(&pdev->dev, mvebu_l2_err_probe);
+		return -ENOMEM;
+	}
+
+	pdata = edac_dev->pvt_info;
+	pdata->name = "mvebu_l2_err";
+	edac_dev->dev = &pdev->dev;
+	dev_set_drvdata(edac_dev->dev, edac_dev);
+	edac_dev->mod_name = EDAC_MOD_STR;
+	edac_dev->ctl_name = pdata->name;
+	edac_dev->dev_name = pdata->name;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* skip to error registers */
+	r->start += 0x600;
+
+	pdata->l2_vbase = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(pdata->l2_vbase)) {
+		res = PTR_ERR(pdata->l2_vbase);
+		goto err;
+	}
+
+	writel(L2_ERR_UE_THRESH(1) | L2_ERR_CE_THRESH(1),
+	       pdata->l2_vbase + MVEBU_L2_ERR_THRESH);
+	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
+
+	if (edac_op_state == EDAC_OPSTATE_POLL)
+		edac_dev->edac_check = mvebu_l2_check;
+
+	edac_dev->sysfs_attributes = mvebu_l2_sysfs_attributes;
+
+	pdata->edac_idx = edac_l2_idx++;
+
+	if (edac_op_state == EDAC_OPSTATE_INT) {
+		pdata->irq = platform_get_irq(pdev, 0);
+		res = devm_request_irq(&pdev->dev, pdata->irq,
+				       mvebu_l2_isr, IRQF_SHARED,
+				       "[EDAC] L2 err", edac_dev);
+		if (res < 0)
+			goto err;
+	}
+
+	if (edac_device_add_device(edac_dev) > 0) {
+		res = -EIO;
+		goto err;
+	}
+
+	devres_remove_group(&pdev->dev, mvebu_l2_err_probe);
+	return 0;
+
+err:
+	devres_release_group(&pdev->dev, mvebu_l2_err_probe);
+	edac_device_free_ctl_info(edac_dev);
+	return res;
+}
+
+static int mvebu_l2_err_remove(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edac_dev = dev_get_drvdata(&pdev->dev);
+
+	edac_device_del_device(&pdev->dev);
+	edac_device_free_ctl_info(edac_dev);
+	return 0;
+}
+
+static const struct of_device_id mvebu_l2_err_of_match[] = {
+	{ .compatible = "marvell,aurora-system-cache", },
+	{},
+};
+
+static struct platform_driver mvebu_l2_err_driver = {
+	.probe = mvebu_l2_err_probe,
+	.remove = mvebu_l2_err_remove,
+	.driver = {
+		   .name = "mvebu_l2_err",
+		   .of_match_table = of_match_ptr(mvebu_l2_err_of_match),
+	},
+};
+
+static struct platform_driver * const drivers[] = {
+	&mvebu_mc_err_driver,
+	&mvebu_l2_err_driver,
+};
+
+static int __init mvebu_edac_init(void)
+{
+	/* make sure error reporting method is sane */
+	switch (edac_op_state) {
+	case EDAC_OPSTATE_POLL:
+	case EDAC_OPSTATE_INT:
+		break;
+	default:
+		edac_op_state = EDAC_OPSTATE_INT;
+		break;
+	}
+
+	return  platform_register_drivers(drivers, ARRAY_SIZE(drivers));
+}
+module_init(mvebu_edac_init);
+
+static void __exit mvebu_edac_exit(void)
+{
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
+}
+module_exit(mvebu_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Allied Telesis Labs");
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state,
+		 "EDAC Error Reporting state: 0=Poll, 2=Interrupt");
diff --git a/drivers/edac/mvebu_edac.h b/drivers/edac/mvebu_edac.h
new file mode 100644
index 000000000000..33f0534b87df
--- /dev/null
+++ b/drivers/edac/mvebu_edac.h
@@ -0,0 +1,66 @@
+/*
+ * EDAC defs for Marvell SoCs
+ *
+ * Copyright (C) 2017 Allied Telesis Labs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details
+ */
+#ifndef _MVEBU_EDAC_H_
+#define _MVEBU_EDAC_H_
+
+#define MVEBU_REVISION " Ver: 2.0.0"
+#define EDAC_MOD_STR	"MVEBU_edac"
+
+/*
+ * L2 Err Registers
+ */
+#define MVEBU_L2_ERR_COUNT		0x00	/* 0x8600 */
+#define MVEBU_L2_ERR_THRESH		0x04	/* 0x8604 */
+#define MVEBU_L2_ERR_ATTR		0x08	/* 0x8608 */
+#define MVEBU_L2_ERR_ADDR		0x0c	/* 0x860c */
+#define MVEBU_L2_ERR_CAP		0x10	/* 0x8610 */
+#define MVEBU_L2_ERR_INJ_CTRL		0x14	/* 0x8614 */
+#define MVEBU_L2_ERR_INJ_MASK		0x18	/* 0x8618 */
+
+#define L2_ERR_UE_THRESH(val)		((val & 0xff) << 16)
+#define L2_ERR_CE_THRESH(val)		(val & 0xffff)
+#define L2_ERR_TYPE(val)		((val >> 8) & 0x3)
+
+/*
+ * SDRAM Controller Registers
+ */
+#define MVEBU_SDRAM_CONFIG		0x00	/* 0x1400 */
+#define MVEBU_SDRAM_ERR_DATA_HI		0x40	/* 0x1440 */
+#define MVEBU_SDRAM_ERR_DATA_LO		0x44	/* 0x1444 */
+#define MVEBU_SDRAM_ERR_ECC_RCVD	0x48	/* 0x1448 */
+#define MVEBU_SDRAM_ERR_ECC_CALC	0x4c	/* 0x144c */
+#define MVEBU_SDRAM_ERR_ADDR		0x50	/* 0x1450 */
+#define MVEBU_SDRAM_ERR_ECC_CNTL	0x54	/* 0x1454 */
+#define MVEBU_SDRAM_ERR_ECC_ERR_CNT	0x58	/* 0x1458 */
+
+#define MVEBU_SDRAM_REGISTERED	0x20000
+#define MVEBU_SDRAM_ECC		0x40000
+
+struct mvebu_l2_pdata {
+	void __iomem *l2_vbase;
+	char *name;
+	int irq;
+	int edac_idx;
+};
+
+struct mvebu_mc_pdata {
+	void __iomem *mc_vbase;
+	int total_mem;
+	char *name;
+	int irq;
+	int edac_idx;
+};
+
+#endif
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC PATCH 2/4] ARM: l2x0: support parity-enable/disable on aurora
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: bp, linux-arm-kernel, linux-edac
  Cc: Chris Packham, Russell King, linux-kernel

The aurora cache on the Marvell Armada-XP SoC supports the same tag
parity features as the other l2x0 cache implementations.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
 arch/arm/mm/cache-l2x0.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 808efbb89b88..2cc2653b046f 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1505,6 +1505,13 @@ static void __init aurora_of_parse(const struct device_node *np,
 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
 	}
 
+	if (of_property_read_bool(np, "arm,parity-enable")) {
+		mask |= L2C_AUX_CTRL_PARITY_ENABLE;
+		val |= L2C_AUX_CTRL_PARITY_ENABLE;
+	} else if (of_property_read_bool(np, "arm,parity-disable")) {
+		mask |= L2C_AUX_CTRL_PARITY_ENABLE;
+	}
+
 	*aux_val &= ~mask;
 	*aux_val |= val;
 	*aux_mask &= ~mask;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC,2/4] ARM: l2x0: support parity-enable/disable on aurora
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: bp, linux-arm-kernel, linux-edac
  Cc: Chris Packham, Russell King, linux-kernel

The aurora cache on the Marvell Armada-XP SoC supports the same tag
parity features as the other l2x0 cache implementations.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
 arch/arm/mm/cache-l2x0.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 808efbb89b88..2cc2653b046f 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1505,6 +1505,13 @@ static void __init aurora_of_parse(const struct device_node *np,
 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
 	}
 
+	if (of_property_read_bool(np, "arm,parity-enable")) {
+		mask |= L2C_AUX_CTRL_PARITY_ENABLE;
+		val |= L2C_AUX_CTRL_PARITY_ENABLE;
+	} else if (of_property_read_bool(np, "arm,parity-disable")) {
+		mask |= L2C_AUX_CTRL_PARITY_ENABLE;
+	}
+
 	*aux_val &= ~mask;
 	*aux_val |= val;
 	*aux_mask &= ~mask;

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC PATCH 2/4] ARM: l2x0: support parity-enable/disable on aurora
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: linux-arm-kernel

The aurora cache on the Marvell Armada-XP SoC supports the same tag
parity features as the other l2x0 cache implementations.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
 arch/arm/mm/cache-l2x0.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 808efbb89b88..2cc2653b046f 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1505,6 +1505,13 @@ static void __init aurora_of_parse(const struct device_node *np,
 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
 	}
 
+	if (of_property_read_bool(np, "arm,parity-enable")) {
+		mask |= L2C_AUX_CTRL_PARITY_ENABLE;
+		val |= L2C_AUX_CTRL_PARITY_ENABLE;
+	} else if (of_property_read_bool(np, "arm,parity-disable")) {
+		mask |= L2C_AUX_CTRL_PARITY_ENABLE;
+	}
+
 	*aux_val &= ~mask;
 	*aux_val |= val;
 	*aux_mask &= ~mask;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: bp, linux-arm-kernel, linux-edac
  Cc: Chris Packham, Rob Herring, Mark Rutland, Russell King,
	devicetree, linux-kernel

The aurora cache on the Marvell Armada-XP SoC supports ECC protection
for the L2 data arrays. Add a "arm,ecc-enable" device tree property
which can be used to enable this.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
 Documentation/devicetree/bindings/arm/l2c2x0.txt | 2 ++
 arch/arm/mm/cache-l2x0.c                         | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/l2c2x0.txt b/Documentation/devicetree/bindings/arm/l2c2x0.txt
index d9650c1788f4..6316e673307a 100644
--- a/Documentation/devicetree/bindings/arm/l2c2x0.txt
+++ b/Documentation/devicetree/bindings/arm/l2c2x0.txt
@@ -76,6 +76,8 @@ Optional properties:
   specified to indicate that such transforms are precluded.
 - arm,parity-enable : enable parity checking on the L2 cache (L220 or PL310).
 - arm,parity-disable : disable parity checking on the L2 cache (L220 or PL310).
+- arm,ecc-enable : enable ECC protection on the L2 cache
+- arm,ecc-disable : disable ECC protection on the L2 cache
 - arm,outer-sync-disable : disable the outer sync operation on the L2 cache.
   Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
   will randomly hang unless outer sync operations are disabled.
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 2cc2653b046f..4f0e6d9b151d 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1505,6 +1505,13 @@ static void __init aurora_of_parse(const struct device_node *np,
 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
 	}
 
+	if (of_property_read_bool(np, "arm,ecc-enable")) {
+		mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
+		val |= L2C_AUX_CTRL_EVTMON_ENABLE;
+	} else if (of_property_read_bool(np, "arm,ecc-disable")) {
+		mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
+	}
+
 	if (of_property_read_bool(np, "arm,parity-enable")) {
 		mask |= L2C_AUX_CTRL_PARITY_ENABLE;
 		val |= L2C_AUX_CTRL_PARITY_ENABLE;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC,3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: bp, linux-arm-kernel, linux-edac
  Cc: Chris Packham, Rob Herring, Mark Rutland, Russell King,
	devicetree, linux-kernel

The aurora cache on the Marvell Armada-XP SoC supports ECC protection
for the L2 data arrays. Add a "arm,ecc-enable" device tree property
which can be used to enable this.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
 Documentation/devicetree/bindings/arm/l2c2x0.txt | 2 ++
 arch/arm/mm/cache-l2x0.c                         | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/l2c2x0.txt b/Documentation/devicetree/bindings/arm/l2c2x0.txt
index d9650c1788f4..6316e673307a 100644
--- a/Documentation/devicetree/bindings/arm/l2c2x0.txt
+++ b/Documentation/devicetree/bindings/arm/l2c2x0.txt
@@ -76,6 +76,8 @@ Optional properties:
   specified to indicate that such transforms are precluded.
 - arm,parity-enable : enable parity checking on the L2 cache (L220 or PL310).
 - arm,parity-disable : disable parity checking on the L2 cache (L220 or PL310).
+- arm,ecc-enable : enable ECC protection on the L2 cache
+- arm,ecc-disable : disable ECC protection on the L2 cache
 - arm,outer-sync-disable : disable the outer sync operation on the L2 cache.
   Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
   will randomly hang unless outer sync operations are disabled.
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 2cc2653b046f..4f0e6d9b151d 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1505,6 +1505,13 @@ static void __init aurora_of_parse(const struct device_node *np,
 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
 	}
 
+	if (of_property_read_bool(np, "arm,ecc-enable")) {
+		mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
+		val |= L2C_AUX_CTRL_EVTMON_ENABLE;
+	} else if (of_property_read_bool(np, "arm,ecc-disable")) {
+		mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
+	}
+
 	if (of_property_read_bool(np, "arm,parity-enable")) {
 		mask |= L2C_AUX_CTRL_PARITY_ENABLE;
 		val |= L2C_AUX_CTRL_PARITY_ENABLE;

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: linux-arm-kernel

The aurora cache on the Marvell Armada-XP SoC supports ECC protection
for the L2 data arrays. Add a "arm,ecc-enable" device tree property
which can be used to enable this.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
 Documentation/devicetree/bindings/arm/l2c2x0.txt | 2 ++
 arch/arm/mm/cache-l2x0.c                         | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/l2c2x0.txt b/Documentation/devicetree/bindings/arm/l2c2x0.txt
index d9650c1788f4..6316e673307a 100644
--- a/Documentation/devicetree/bindings/arm/l2c2x0.txt
+++ b/Documentation/devicetree/bindings/arm/l2c2x0.txt
@@ -76,6 +76,8 @@ Optional properties:
   specified to indicate that such transforms are precluded.
 - arm,parity-enable : enable parity checking on the L2 cache (L220 or PL310).
 - arm,parity-disable : disable parity checking on the L2 cache (L220 or PL310).
+- arm,ecc-enable : enable ECC protection on the L2 cache
+- arm,ecc-disable : disable ECC protection on the L2 cache
 - arm,outer-sync-disable : disable the outer sync operation on the L2 cache.
   Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
   will randomly hang unless outer sync operations are disabled.
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 2cc2653b046f..4f0e6d9b151d 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1505,6 +1505,13 @@ static void __init aurora_of_parse(const struct device_node *np,
 		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
 	}
 
+	if (of_property_read_bool(np, "arm,ecc-enable")) {
+		mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
+		val |= L2C_AUX_CTRL_EVTMON_ENABLE;
+	} else if (of_property_read_bool(np, "arm,ecc-disable")) {
+		mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
+	}
+
 	if (of_property_read_bool(np, "arm,parity-enable")) {
 		mask |= L2C_AUX_CTRL_PARITY_ENABLE;
 		val |= L2C_AUX_CTRL_PARITY_ENABLE;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC PATCH 4/4] ARM: dts: enable l2c parity and ecc protection on 98dx3236
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: bp, linux-arm-kernel, linux-edac
  Cc: Chris Packham, Jason Cooper, Andrew Lunn, Gregory Clement,
	Sebastian Hesselbarth, Rob Herring, Mark Rutland, Russell King,
	devicetree, linux-kernel

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
 arch/arm/boot/dts/armada-xp-98dx3236.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
index 84cc232a29e9..32d05ec88ffd 100644
--- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
+++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
@@ -138,6 +138,8 @@
 				cache-level = <2>;
 				cache-unified;
 				wt-override;
+				arm,parity-enable;
+				arm,ecc-enable;
 			};
 
 			gpio0: gpio@18100 {
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC,4/4] ARM: dts: enable l2c parity and ecc protection on 98dx3236
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: bp, linux-arm-kernel, linux-edac
  Cc: Chris Packham, Jason Cooper, Andrew Lunn, Gregory Clement,
	Sebastian Hesselbarth, Rob Herring, Mark Rutland, Russell King,
	devicetree, linux-kernel

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
 arch/arm/boot/dts/armada-xp-98dx3236.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
index 84cc232a29e9..32d05ec88ffd 100644
--- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
+++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
@@ -138,6 +138,8 @@
 				cache-level = <2>;
 				cache-unified;
 				wt-override;
+				arm,parity-enable;
+				arm,ecc-enable;
 			};
 
 			gpio0: gpio@18100 {

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC PATCH 4/4] ARM: dts: enable l2c parity and ecc protection on 98dx3236
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: bp-Gina5bIWoIWzQB+pC5nmwQ,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-edac-u79uwXL29TY76Z2rM5mHXA
  Cc: Chris Packham, Jason Cooper, Andrew Lunn, Gregory Clement,
	Sebastian Hesselbarth, Rob Herring, Mark Rutland, Russell King,
	devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

Signed-off-by: Chris Packham <chris.packham-6g8wRflRTwXFdCa3tKVlE6U/zSkkHjvu@public.gmane.org>
---
 arch/arm/boot/dts/armada-xp-98dx3236.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
index 84cc232a29e9..32d05ec88ffd 100644
--- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
+++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
@@ -138,6 +138,8 @@
 				cache-level = <2>;
 				cache-unified;
 				wt-override;
+				arm,parity-enable;
+				arm,ecc-enable;
 			};
 
 			gpio0: gpio@18100 {
-- 
2.13.0

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [RFC PATCH 4/4] ARM: dts: enable l2c parity and ecc protection on 98dx3236
@ 2017-06-08  4:11   ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-08  4:11 UTC (permalink / raw)
  To: linux-arm-kernel

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
 arch/arm/boot/dts/armada-xp-98dx3236.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
index 84cc232a29e9..32d05ec88ffd 100644
--- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
+++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
@@ -138,6 +138,8 @@
 				cache-level = <2>;
 				cache-unified;
 				wt-override;
+				arm,parity-enable;
+				arm,ecc-enable;
 			};
 
 			gpio0: gpio at 18100 {
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-09  8:58     ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-09  8:58 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Rob Herring, Mark Rutland,
	Russell King, devicetree, linux-kernel

On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +       }

Unless I misunderstand the code in __l2c_init(), the mask is used to
specify the bits to preserve:
        old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
        aux &= aux_mask;
        aux |= aux_val;

        if (old_aux != aux)
                pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
                        old_aux, aux);

So the arm,ecc-disable property will have no effect. This probably also
applies to patch 2/4. The existing property *-disable code removes the
corresponding bit from the mask.

Regards,
Jan
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-09  8:58     ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-09  8:58 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Rob Herring, Mark Rutland,
	Russell King, devicetree, linux-kernel

On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +       }

Unless I misunderstand the code in __l2c_init(), the mask is used to
specify the bits to preserve:
        old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
        aux &= aux_mask;
        aux |= aux_val;

        if (old_aux != aux)
                pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
                        old_aux, aux);

So the arm,ecc-disable property will have no effect. This probably also
applies to patch 2/4. The existing property *-disable code removes the
corresponding bit from the mask.

Regards,
Jan

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-09  8:58     ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-09  8:58 UTC (permalink / raw)
  To: Chris Packham
  Cc: Mark Rutland, devicetree, Russell King, linux-kernel,
	Rob Herring, bp, linux-arm-kernel, linux-edac

On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +       }

Unless I misunderstand the code in __l2c_init(), the mask is used to
specify the bits to preserve:
        old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
        aux &= aux_mask;
        aux |= aux_val;

        if (old_aux != aux)
                pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
                        old_aux, aux);

So the arm,ecc-disable property will have no effect. This probably also
applies to patch 2/4. The existing property *-disable code removes the
corresponding bit from the mask.

Regards,
Jan
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-09  8:58     ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-09  8:58 UTC (permalink / raw)
  To: linux-arm-kernel

On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> +       }

Unless I misunderstand the code in __l2c_init(), the mask is used to
specify the bits to preserve:
        old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
        aux &= aux_mask;
        aux |= aux_val;

        if (old_aux != aux)
                pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
                        old_aux, aux);

So the arm,ecc-disable property will have no effect. This probably also
applies to patch 2/4. The existing property *-disable code removes the
corresponding bit from the mask.

Regards,
Jan
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-09 10:39     ` Borislav Petkov
  0 siblings, 0 replies; 54+ messages in thread
From: Borislav Petkov @ 2017-06-09 10:39 UTC (permalink / raw)
  To: Chris Packham
  Cc: linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

On Thu, Jun 08, 2017 at 04:11:21PM +1200, Chris Packham wrote:
> This adds an EDAC driver for the memory controller and L2 cache used on
> a number of Marvell Armada SoCs.
> 
> Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
> Cc: linux-arm-kernel@lists.infradead.org
> ---
>  drivers/edac/Kconfig      |   7 +
>  drivers/edac/Makefile     |   1 +
>  drivers/edac/mvebu_edac.c | 506 ++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/edac/mvebu_edac.h |  66 ++++++
>  4 files changed, 580 insertions(+)
>  create mode 100644 drivers/edac/mvebu_edac.c
>  create mode 100644 drivers/edac/mvebu_edac.h

...

> diff --git a/drivers/edac/mvebu_edac.c b/drivers/edac/mvebu_edac.c
> new file mode 100644
> index 000000000000..624cf10f821b
> --- /dev/null
> +++ b/drivers/edac/mvebu_edac.c
> @@ -0,0 +1,506 @@
> +/*
> + * EDAC driver for Marvell ARM SoCs
> + *
> + * Copyright (C) 2017 Allied Telesis Labs
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

We have all those fancy edac_printk() macros, no need to use pr_* ones.

...

> +static int mvebu_mc_err_probe(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci;
> +	struct edac_mc_layer layers[2];
> +	struct mvebu_mc_pdata *pdata;
> +	struct resource *r;
> +	u32 ctl;
> +	int res = 0;
> +
> +	if (!devres_open_group(&pdev->dev, mvebu_mc_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = true;
> +	layers[1].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = false;
> +	mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
> +			    sizeof(struct mvebu_mc_pdata));
> +	if (!mci) {
> +		pr_err("%s: No memory for CPU err\n", __func__);
> +		devres_release_group(&pdev->dev, mvebu_mc_err_probe);
> +		return -ENOMEM;
> +	}

Make that call after all platform_get_resource(),
devm_ioremap_resource() so that you can save yourself the unwinding
"goto err" and return directly then.

> +
> +	pdata = mci->pvt_info;
> +	mci->pdev = &pdev->dev;
> +	platform_set_drvdata(pdev, mci);
> +	pdata->name = "mvebu_mc_err";
> +	mci->dev_name = dev_name(&pdev->dev);
> +	pdata->edac_idx = edac_mc_idx++;
> +
> +	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!r) {
> +		pr_err("%s: Unable to get resource for MC err regs\n",
> +		       __func__);
> +		res = -ENOENT;
> +		goto err;
> +	}
> +
> +	pdata->mc_vbase = devm_ioremap_resource(&pdev->dev, r);
> +	if (IS_ERR(pdata->mc_vbase)) {
> +		pr_err("%s: Unable to setup MC err regs\n", __func__);
> +		res = PTR_ERR(pdata->mc_vbase);
> +		goto err;
> +	}
> +
> +	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_CONFIG);
> +	if (!(ctl & MVEBU_SDRAM_ECC)) {
> +		/* Non-ECC RAM? */
> +		pr_warn("%s: No ECC DIMMs discovered\n", __func__);
> +		res = -ENODEV;
> +		goto err;
> +	}
> +
> +	edac_dbg(3, "init mci\n");
> +	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
> +	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
> +	mci->edac_cap = EDAC_FLAG_SECDED;
> +	mci->mod_name = EDAC_MOD_STR;
> +	mci->mod_ver = MVEBU_REVISION;
> +	mci->ctl_name = mvebu_ctl_name;
> +
> +	if (edac_op_state == EDAC_OPSTATE_POLL)
> +		mci->edac_check = mvebu_mc_check;
> +
> +	mci->ctl_page_to_phys = NULL;
> +
> +	mci->scrub_mode = SCRUB_SW_SRC;
> +
> +	mvebu_init_csrows(mci, pdata);
> +
> +	/* setup MC registers */
> +	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
> +	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
> +	ctl = (ctl & 0xff00ffff) | 0x10000;
> +	writel(ctl, pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
> +
> +	if (edac_op_state == EDAC_OPSTATE_INT) {
> +		/* acquire interrupt that reports errors */
> +		pdata->irq = platform_get_irq(pdev, 0);
> +		res = devm_request_irq(&pdev->dev,
> +				       pdata->irq,
> +				       mvebu_mc_isr,
> +				       0,
> +				       "[EDAC] MC err",
> +				       mci);
> +		if (res < 0) {
> +			pr_err("%s: Unable to request irq %d\n", __func__,
> +			       pdata->irq);
> +			res = -ENODEV;
> +			goto err;
> +		}
> +
> +		pr_info("acquired irq %d for MC Err\n",
> +		       pdata->irq);

Really needed?

> +	}
> +
> +	res = edac_mc_add_mc(mci);
> +	if (res) {
> +		edac_dbg(3, "failed edac_mc_add_mc()\n");
> +		goto err;
> +	}
> +
> +
> +	/* get this far and it's successful */
> +	edac_dbg(3, "success\n");
> +
> +	return 0;
> +
> +err:
> +	devres_release_group(&pdev->dev, mvebu_mc_err_probe);
> +	edac_mc_free(mci);
> +	return res;
> +}
> +
> +static int mvebu_mc_err_remove(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
> +
> +	edac_dbg(0, "\n");
> +
> +	edac_mc_del_mc(&pdev->dev);
> +	edac_mc_free(mci);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id mvebu_mc_err_of_match[] = {
> +	{ .compatible = "marvell,armada-xp-sdram-controller", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, mvebu_mc_err_of_match);
> +
> +static struct platform_driver mvebu_mc_err_driver = {
> +	.probe = mvebu_mc_err_probe,
> +	.remove = mvebu_mc_err_remove,
> +	.driver = {
> +		   .name = "mvebu_mc_err",
> +		   .of_match_table = of_match_ptr(mvebu_mc_err_of_match),
> +	},
> +};
> +
> +/*********************** L2 err device **********************************/
> +static void mvebu_l2_check(struct edac_device_ctl_info *edac_dev)
> +{
> +
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return;
> +
> +	pr_err("ECC Error in CPU L2 cache\n");
> +	pr_err("L2 Error Attributes Capture Register: 0x%08x\n", val);
> +	pr_err("L2 Error Address Capture Register: 0x%08x\n",
> +		readl(pdata->l2_vbase + MVEBU_L2_ERR_ADDR));

Ditto as above. edac_printk().

Also, I'd like to see this made more user-friendly and actually the
error decoded to human-readable strings than dumping raw registers and
then forcing users to go dig IP manuals for the definitions.

> +
> +	if (L2_ERR_TYPE(val) == 0)
> +		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
> +
> +	if (L2_ERR_TYPE(val) == 1)
> +		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
> +
> +	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +}
> +
> +static irqreturn_t mvebu_l2_isr(int irq, void *dev_id)
> +{
> +	struct edac_device_ctl_info *edac_dev = dev_id;
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return IRQ_NONE;
> +
> +	mvebu_l2_check(edac_dev);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static ssize_t inject_ctrl_show(struct edac_device_ctl_info *edac_dev,
> +				char *data)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +
> +	return sprintf(data, "0x%08x",
> +		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL));
> +}
> +
> +static ssize_t inject_ctrl_store(struct edac_device_ctl_info *edac_dev,
> +				 const char *data, size_t count)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	unsigned long val;
> +
> +	if (!kstrtoul(data, 0, &val)) {
> +		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL);
> +		return count;
> +	}
> +
> +	return 0;
> +}
> +
> +static ssize_t inject_mask_show(struct edac_device_ctl_info *edac_dev,
> +				     char *data)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +
> +	return sprintf(data, "0x%08x",
> +		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK));
> +}
> +
> +static ssize_t inject_mask_store(struct edac_device_ctl_info *edac_dev,
> +				      const char *data, size_t count)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	unsigned long val;
> +
> +	if (!kstrtoul(data, 0, &val)) {
> +		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK);
> +		return count;
> +	}
> +
> +	return 0;
> +}

Do you really want all those injection things to be present even on a
production system and people to inject stuff?

If not, you can stick them under CONFIG_EDAC_DEBUG.

> +
> +static struct edac_dev_sysfs_attribute mvebu_l2_sysfs_attributes[] = {
> +	__ATTR_RW(inject_ctrl),
> +	__ATTR_RW(inject_mask),
> +	{},
> +};
> +
> +static int mvebu_l2_err_probe(struct platform_device *pdev)
> +{
> +	struct edac_device_ctl_info *edac_dev;
> +	struct mvebu_l2_pdata *pdata;
> +	struct resource *r;
> +	int res;
> +
> +	if (!devres_open_group(&pdev->dev, mvebu_l2_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata),
> +					      "cpu", 1, "L", 1, 2, NULL, 0,
> +					      edac_l2_idx);
> +	if (!edac_dev) {
> +		devres_release_group(&pdev->dev, mvebu_l2_err_probe);
> +		return -ENOMEM;
> +	}

Same comment here as above - consider moving that call down in the
function to simplify error handling paths.

> +
> +	pdata = edac_dev->pvt_info;
> +	pdata->name = "mvebu_l2_err";
> +	edac_dev->dev = &pdev->dev;
> +	dev_set_drvdata(edac_dev->dev, edac_dev);
> +	edac_dev->mod_name = EDAC_MOD_STR;
> +	edac_dev->ctl_name = pdata->name;
> +	edac_dev->dev_name = pdata->name;

...

> diff --git a/drivers/edac/mvebu_edac.h b/drivers/edac/mvebu_edac.h
> new file mode 100644
> index 000000000000..33f0534b87df
> --- /dev/null
> +++ b/drivers/edac/mvebu_edac.h
> @@ -0,0 +1,66 @@
> +/*
> + * EDAC defs for Marvell SoCs
> + *
> + * Copyright (C) 2017 Allied Telesis Labs
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details
> + */
> +#ifndef _MVEBU_EDAC_H_
> +#define _MVEBU_EDAC_H_
> +
> +#define MVEBU_REVISION " Ver: 2.0.0"
> +#define EDAC_MOD_STR	"MVEBU_edac"
> +
> +/*
> + * L2 Err Registers
> + */
> +#define MVEBU_L2_ERR_COUNT		0x00	/* 0x8600 */
> +#define MVEBU_L2_ERR_THRESH		0x04	/* 0x8604 */
> +#define MVEBU_L2_ERR_ATTR		0x08	/* 0x8608 */
> +#define MVEBU_L2_ERR_ADDR		0x0c	/* 0x860c */
> +#define MVEBU_L2_ERR_CAP		0x10	/* 0x8610 */
> +#define MVEBU_L2_ERR_INJ_CTRL		0x14	/* 0x8614 */
> +#define MVEBU_L2_ERR_INJ_MASK		0x18	/* 0x8618 */
> +
> +#define L2_ERR_UE_THRESH(val)		((val & 0xff) << 16)
> +#define L2_ERR_CE_THRESH(val)		(val & 0xffff)
> +#define L2_ERR_TYPE(val)		((val >> 8) & 0x3)
> +
> +/*
> + * SDRAM Controller Registers
> + */
> +#define MVEBU_SDRAM_CONFIG		0x00	/* 0x1400 */
> +#define MVEBU_SDRAM_ERR_DATA_HI		0x40	/* 0x1440 */
> +#define MVEBU_SDRAM_ERR_DATA_LO		0x44	/* 0x1444 */
> +#define MVEBU_SDRAM_ERR_ECC_RCVD	0x48	/* 0x1448 */
> +#define MVEBU_SDRAM_ERR_ECC_CALC	0x4c	/* 0x144c */
> +#define MVEBU_SDRAM_ERR_ADDR		0x50	/* 0x1450 */
> +#define MVEBU_SDRAM_ERR_ECC_CNTL	0x54	/* 0x1454 */
> +#define MVEBU_SDRAM_ERR_ECC_ERR_CNT	0x58	/* 0x1458 */
> +
> +#define MVEBU_SDRAM_REGISTERED	0x20000
> +#define MVEBU_SDRAM_ECC		0x40000

BIT()

> +
> +struct mvebu_l2_pdata {
> +	void __iomem *l2_vbase;
> +	char *name;
> +	int irq;
> +	int edac_idx;
> +};
> +
> +struct mvebu_mc_pdata {
> +	void __iomem *mc_vbase;
> +	int total_mem;
> +	char *name;
> +	int irq;
> +	int edac_idx;
> +};

Looks like you could merge those two structs into one.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-09 10:39     ` Borislav Petkov
  0 siblings, 0 replies; 54+ messages in thread
From: Borislav Petkov @ 2017-06-09 10:39 UTC (permalink / raw)
  To: Chris Packham
  Cc: linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

On Thu, Jun 08, 2017 at 04:11:21PM +1200, Chris Packham wrote:
> This adds an EDAC driver for the memory controller and L2 cache used on
> a number of Marvell Armada SoCs.
> 
> Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
> Cc: linux-arm-kernel@lists.infradead.org
> ---
>  drivers/edac/Kconfig      |   7 +
>  drivers/edac/Makefile     |   1 +
>  drivers/edac/mvebu_edac.c | 506 ++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/edac/mvebu_edac.h |  66 ++++++
>  4 files changed, 580 insertions(+)
>  create mode 100644 drivers/edac/mvebu_edac.c
>  create mode 100644 drivers/edac/mvebu_edac.h

...

> diff --git a/drivers/edac/mvebu_edac.c b/drivers/edac/mvebu_edac.c
> new file mode 100644
> index 000000000000..624cf10f821b
> --- /dev/null
> +++ b/drivers/edac/mvebu_edac.c
> @@ -0,0 +1,506 @@
> +/*
> + * EDAC driver for Marvell ARM SoCs
> + *
> + * Copyright (C) 2017 Allied Telesis Labs
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

We have all those fancy edac_printk() macros, no need to use pr_* ones.

...

> +static int mvebu_mc_err_probe(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci;
> +	struct edac_mc_layer layers[2];
> +	struct mvebu_mc_pdata *pdata;
> +	struct resource *r;
> +	u32 ctl;
> +	int res = 0;
> +
> +	if (!devres_open_group(&pdev->dev, mvebu_mc_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = true;
> +	layers[1].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = false;
> +	mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
> +			    sizeof(struct mvebu_mc_pdata));
> +	if (!mci) {
> +		pr_err("%s: No memory for CPU err\n", __func__);
> +		devres_release_group(&pdev->dev, mvebu_mc_err_probe);
> +		return -ENOMEM;
> +	}

Make that call after all platform_get_resource(),
devm_ioremap_resource() so that you can save yourself the unwinding
"goto err" and return directly then.

> +
> +	pdata = mci->pvt_info;
> +	mci->pdev = &pdev->dev;
> +	platform_set_drvdata(pdev, mci);
> +	pdata->name = "mvebu_mc_err";
> +	mci->dev_name = dev_name(&pdev->dev);
> +	pdata->edac_idx = edac_mc_idx++;
> +
> +	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!r) {
> +		pr_err("%s: Unable to get resource for MC err regs\n",
> +		       __func__);
> +		res = -ENOENT;
> +		goto err;
> +	}
> +
> +	pdata->mc_vbase = devm_ioremap_resource(&pdev->dev, r);
> +	if (IS_ERR(pdata->mc_vbase)) {
> +		pr_err("%s: Unable to setup MC err regs\n", __func__);
> +		res = PTR_ERR(pdata->mc_vbase);
> +		goto err;
> +	}
> +
> +	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_CONFIG);
> +	if (!(ctl & MVEBU_SDRAM_ECC)) {
> +		/* Non-ECC RAM? */
> +		pr_warn("%s: No ECC DIMMs discovered\n", __func__);
> +		res = -ENODEV;
> +		goto err;
> +	}
> +
> +	edac_dbg(3, "init mci\n");
> +	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
> +	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
> +	mci->edac_cap = EDAC_FLAG_SECDED;
> +	mci->mod_name = EDAC_MOD_STR;
> +	mci->mod_ver = MVEBU_REVISION;
> +	mci->ctl_name = mvebu_ctl_name;
> +
> +	if (edac_op_state == EDAC_OPSTATE_POLL)
> +		mci->edac_check = mvebu_mc_check;
> +
> +	mci->ctl_page_to_phys = NULL;
> +
> +	mci->scrub_mode = SCRUB_SW_SRC;
> +
> +	mvebu_init_csrows(mci, pdata);
> +
> +	/* setup MC registers */
> +	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
> +	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
> +	ctl = (ctl & 0xff00ffff) | 0x10000;
> +	writel(ctl, pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
> +
> +	if (edac_op_state == EDAC_OPSTATE_INT) {
> +		/* acquire interrupt that reports errors */
> +		pdata->irq = platform_get_irq(pdev, 0);
> +		res = devm_request_irq(&pdev->dev,
> +				       pdata->irq,
> +				       mvebu_mc_isr,
> +				       0,
> +				       "[EDAC] MC err",
> +				       mci);
> +		if (res < 0) {
> +			pr_err("%s: Unable to request irq %d\n", __func__,
> +			       pdata->irq);
> +			res = -ENODEV;
> +			goto err;
> +		}
> +
> +		pr_info("acquired irq %d for MC Err\n",
> +		       pdata->irq);

Really needed?

> +	}
> +
> +	res = edac_mc_add_mc(mci);
> +	if (res) {
> +		edac_dbg(3, "failed edac_mc_add_mc()\n");
> +		goto err;
> +	}
> +
> +
> +	/* get this far and it's successful */
> +	edac_dbg(3, "success\n");
> +
> +	return 0;
> +
> +err:
> +	devres_release_group(&pdev->dev, mvebu_mc_err_probe);
> +	edac_mc_free(mci);
> +	return res;
> +}
> +
> +static int mvebu_mc_err_remove(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
> +
> +	edac_dbg(0, "\n");
> +
> +	edac_mc_del_mc(&pdev->dev);
> +	edac_mc_free(mci);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id mvebu_mc_err_of_match[] = {
> +	{ .compatible = "marvell,armada-xp-sdram-controller", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, mvebu_mc_err_of_match);
> +
> +static struct platform_driver mvebu_mc_err_driver = {
> +	.probe = mvebu_mc_err_probe,
> +	.remove = mvebu_mc_err_remove,
> +	.driver = {
> +		   .name = "mvebu_mc_err",
> +		   .of_match_table = of_match_ptr(mvebu_mc_err_of_match),
> +	},
> +};
> +
> +/*********************** L2 err device **********************************/
> +static void mvebu_l2_check(struct edac_device_ctl_info *edac_dev)
> +{
> +
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return;
> +
> +	pr_err("ECC Error in CPU L2 cache\n");
> +	pr_err("L2 Error Attributes Capture Register: 0x%08x\n", val);
> +	pr_err("L2 Error Address Capture Register: 0x%08x\n",
> +		readl(pdata->l2_vbase + MVEBU_L2_ERR_ADDR));

Ditto as above. edac_printk().

Also, I'd like to see this made more user-friendly and actually the
error decoded to human-readable strings than dumping raw registers and
then forcing users to go dig IP manuals for the definitions.

> +
> +	if (L2_ERR_TYPE(val) == 0)
> +		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
> +
> +	if (L2_ERR_TYPE(val) == 1)
> +		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
> +
> +	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +}
> +
> +static irqreturn_t mvebu_l2_isr(int irq, void *dev_id)
> +{
> +	struct edac_device_ctl_info *edac_dev = dev_id;
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return IRQ_NONE;
> +
> +	mvebu_l2_check(edac_dev);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static ssize_t inject_ctrl_show(struct edac_device_ctl_info *edac_dev,
> +				char *data)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +
> +	return sprintf(data, "0x%08x",
> +		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL));
> +}
> +
> +static ssize_t inject_ctrl_store(struct edac_device_ctl_info *edac_dev,
> +				 const char *data, size_t count)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	unsigned long val;
> +
> +	if (!kstrtoul(data, 0, &val)) {
> +		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL);
> +		return count;
> +	}
> +
> +	return 0;
> +}
> +
> +static ssize_t inject_mask_show(struct edac_device_ctl_info *edac_dev,
> +				     char *data)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +
> +	return sprintf(data, "0x%08x",
> +		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK));
> +}
> +
> +static ssize_t inject_mask_store(struct edac_device_ctl_info *edac_dev,
> +				      const char *data, size_t count)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	unsigned long val;
> +
> +	if (!kstrtoul(data, 0, &val)) {
> +		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK);
> +		return count;
> +	}
> +
> +	return 0;
> +}

Do you really want all those injection things to be present even on a
production system and people to inject stuff?

If not, you can stick them under CONFIG_EDAC_DEBUG.

> +
> +static struct edac_dev_sysfs_attribute mvebu_l2_sysfs_attributes[] = {
> +	__ATTR_RW(inject_ctrl),
> +	__ATTR_RW(inject_mask),
> +	{},
> +};
> +
> +static int mvebu_l2_err_probe(struct platform_device *pdev)
> +{
> +	struct edac_device_ctl_info *edac_dev;
> +	struct mvebu_l2_pdata *pdata;
> +	struct resource *r;
> +	int res;
> +
> +	if (!devres_open_group(&pdev->dev, mvebu_l2_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata),
> +					      "cpu", 1, "L", 1, 2, NULL, 0,
> +					      edac_l2_idx);
> +	if (!edac_dev) {
> +		devres_release_group(&pdev->dev, mvebu_l2_err_probe);
> +		return -ENOMEM;
> +	}

Same comment here as above - consider moving that call down in the
function to simplify error handling paths.

> +
> +	pdata = edac_dev->pvt_info;
> +	pdata->name = "mvebu_l2_err";
> +	edac_dev->dev = &pdev->dev;
> +	dev_set_drvdata(edac_dev->dev, edac_dev);
> +	edac_dev->mod_name = EDAC_MOD_STR;
> +	edac_dev->ctl_name = pdata->name;
> +	edac_dev->dev_name = pdata->name;

...

> diff --git a/drivers/edac/mvebu_edac.h b/drivers/edac/mvebu_edac.h
> new file mode 100644
> index 000000000000..33f0534b87df
> --- /dev/null
> +++ b/drivers/edac/mvebu_edac.h
> @@ -0,0 +1,66 @@
> +/*
> + * EDAC defs for Marvell SoCs
> + *
> + * Copyright (C) 2017 Allied Telesis Labs
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details
> + */
> +#ifndef _MVEBU_EDAC_H_
> +#define _MVEBU_EDAC_H_
> +
> +#define MVEBU_REVISION " Ver: 2.0.0"
> +#define EDAC_MOD_STR	"MVEBU_edac"
> +
> +/*
> + * L2 Err Registers
> + */
> +#define MVEBU_L2_ERR_COUNT		0x00	/* 0x8600 */
> +#define MVEBU_L2_ERR_THRESH		0x04	/* 0x8604 */
> +#define MVEBU_L2_ERR_ATTR		0x08	/* 0x8608 */
> +#define MVEBU_L2_ERR_ADDR		0x0c	/* 0x860c */
> +#define MVEBU_L2_ERR_CAP		0x10	/* 0x8610 */
> +#define MVEBU_L2_ERR_INJ_CTRL		0x14	/* 0x8614 */
> +#define MVEBU_L2_ERR_INJ_MASK		0x18	/* 0x8618 */
> +
> +#define L2_ERR_UE_THRESH(val)		((val & 0xff) << 16)
> +#define L2_ERR_CE_THRESH(val)		(val & 0xffff)
> +#define L2_ERR_TYPE(val)		((val >> 8) & 0x3)
> +
> +/*
> + * SDRAM Controller Registers
> + */
> +#define MVEBU_SDRAM_CONFIG		0x00	/* 0x1400 */
> +#define MVEBU_SDRAM_ERR_DATA_HI		0x40	/* 0x1440 */
> +#define MVEBU_SDRAM_ERR_DATA_LO		0x44	/* 0x1444 */
> +#define MVEBU_SDRAM_ERR_ECC_RCVD	0x48	/* 0x1448 */
> +#define MVEBU_SDRAM_ERR_ECC_CALC	0x4c	/* 0x144c */
> +#define MVEBU_SDRAM_ERR_ADDR		0x50	/* 0x1450 */
> +#define MVEBU_SDRAM_ERR_ECC_CNTL	0x54	/* 0x1454 */
> +#define MVEBU_SDRAM_ERR_ECC_ERR_CNT	0x58	/* 0x1458 */
> +
> +#define MVEBU_SDRAM_REGISTERED	0x20000
> +#define MVEBU_SDRAM_ECC		0x40000

BIT()

> +
> +struct mvebu_l2_pdata {
> +	void __iomem *l2_vbase;
> +	char *name;
> +	int irq;
> +	int edac_idx;
> +};
> +
> +struct mvebu_mc_pdata {
> +	void __iomem *mc_vbase;
> +	int total_mem;
> +	char *name;
> +	int irq;
> +	int edac_idx;
> +};

Looks like you could merge those two structs into one.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-09 10:39     ` Borislav Petkov
  0 siblings, 0 replies; 54+ messages in thread
From: Borislav Petkov @ 2017-06-09 10:39 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jun 08, 2017 at 04:11:21PM +1200, Chris Packham wrote:
> This adds an EDAC driver for the memory controller and L2 cache used on
> a number of Marvell Armada SoCs.
> 
> Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
> Cc: linux-arm-kernel at lists.infradead.org
> ---
>  drivers/edac/Kconfig      |   7 +
>  drivers/edac/Makefile     |   1 +
>  drivers/edac/mvebu_edac.c | 506 ++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/edac/mvebu_edac.h |  66 ++++++
>  4 files changed, 580 insertions(+)
>  create mode 100644 drivers/edac/mvebu_edac.c
>  create mode 100644 drivers/edac/mvebu_edac.h

...

> diff --git a/drivers/edac/mvebu_edac.c b/drivers/edac/mvebu_edac.c
> new file mode 100644
> index 000000000000..624cf10f821b
> --- /dev/null
> +++ b/drivers/edac/mvebu_edac.c
> @@ -0,0 +1,506 @@
> +/*
> + * EDAC driver for Marvell ARM SoCs
> + *
> + * Copyright (C) 2017 Allied Telesis Labs
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

We have all those fancy edac_printk() macros, no need to use pr_* ones.

...

> +static int mvebu_mc_err_probe(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci;
> +	struct edac_mc_layer layers[2];
> +	struct mvebu_mc_pdata *pdata;
> +	struct resource *r;
> +	u32 ctl;
> +	int res = 0;
> +
> +	if (!devres_open_group(&pdev->dev, mvebu_mc_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = true;
> +	layers[1].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = false;
> +	mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
> +			    sizeof(struct mvebu_mc_pdata));
> +	if (!mci) {
> +		pr_err("%s: No memory for CPU err\n", __func__);
> +		devres_release_group(&pdev->dev, mvebu_mc_err_probe);
> +		return -ENOMEM;
> +	}

Make that call after all platform_get_resource(),
devm_ioremap_resource() so that you can save yourself the unwinding
"goto err" and return directly then.

> +
> +	pdata = mci->pvt_info;
> +	mci->pdev = &pdev->dev;
> +	platform_set_drvdata(pdev, mci);
> +	pdata->name = "mvebu_mc_err";
> +	mci->dev_name = dev_name(&pdev->dev);
> +	pdata->edac_idx = edac_mc_idx++;
> +
> +	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!r) {
> +		pr_err("%s: Unable to get resource for MC err regs\n",
> +		       __func__);
> +		res = -ENOENT;
> +		goto err;
> +	}
> +
> +	pdata->mc_vbase = devm_ioremap_resource(&pdev->dev, r);
> +	if (IS_ERR(pdata->mc_vbase)) {
> +		pr_err("%s: Unable to setup MC err regs\n", __func__);
> +		res = PTR_ERR(pdata->mc_vbase);
> +		goto err;
> +	}
> +
> +	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_CONFIG);
> +	if (!(ctl & MVEBU_SDRAM_ECC)) {
> +		/* Non-ECC RAM? */
> +		pr_warn("%s: No ECC DIMMs discovered\n", __func__);
> +		res = -ENODEV;
> +		goto err;
> +	}
> +
> +	edac_dbg(3, "init mci\n");
> +	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
> +	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
> +	mci->edac_cap = EDAC_FLAG_SECDED;
> +	mci->mod_name = EDAC_MOD_STR;
> +	mci->mod_ver = MVEBU_REVISION;
> +	mci->ctl_name = mvebu_ctl_name;
> +
> +	if (edac_op_state == EDAC_OPSTATE_POLL)
> +		mci->edac_check = mvebu_mc_check;
> +
> +	mci->ctl_page_to_phys = NULL;
> +
> +	mci->scrub_mode = SCRUB_SW_SRC;
> +
> +	mvebu_init_csrows(mci, pdata);
> +
> +	/* setup MC registers */
> +	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
> +	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
> +	ctl = (ctl & 0xff00ffff) | 0x10000;
> +	writel(ctl, pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
> +
> +	if (edac_op_state == EDAC_OPSTATE_INT) {
> +		/* acquire interrupt that reports errors */
> +		pdata->irq = platform_get_irq(pdev, 0);
> +		res = devm_request_irq(&pdev->dev,
> +				       pdata->irq,
> +				       mvebu_mc_isr,
> +				       0,
> +				       "[EDAC] MC err",
> +				       mci);
> +		if (res < 0) {
> +			pr_err("%s: Unable to request irq %d\n", __func__,
> +			       pdata->irq);
> +			res = -ENODEV;
> +			goto err;
> +		}
> +
> +		pr_info("acquired irq %d for MC Err\n",
> +		       pdata->irq);

Really needed?

> +	}
> +
> +	res = edac_mc_add_mc(mci);
> +	if (res) {
> +		edac_dbg(3, "failed edac_mc_add_mc()\n");
> +		goto err;
> +	}
> +
> +
> +	/* get this far and it's successful */
> +	edac_dbg(3, "success\n");
> +
> +	return 0;
> +
> +err:
> +	devres_release_group(&pdev->dev, mvebu_mc_err_probe);
> +	edac_mc_free(mci);
> +	return res;
> +}
> +
> +static int mvebu_mc_err_remove(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
> +
> +	edac_dbg(0, "\n");
> +
> +	edac_mc_del_mc(&pdev->dev);
> +	edac_mc_free(mci);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id mvebu_mc_err_of_match[] = {
> +	{ .compatible = "marvell,armada-xp-sdram-controller", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, mvebu_mc_err_of_match);
> +
> +static struct platform_driver mvebu_mc_err_driver = {
> +	.probe = mvebu_mc_err_probe,
> +	.remove = mvebu_mc_err_remove,
> +	.driver = {
> +		   .name = "mvebu_mc_err",
> +		   .of_match_table = of_match_ptr(mvebu_mc_err_of_match),
> +	},
> +};
> +
> +/*********************** L2 err device **********************************/
> +static void mvebu_l2_check(struct edac_device_ctl_info *edac_dev)
> +{
> +
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return;
> +
> +	pr_err("ECC Error in CPU L2 cache\n");
> +	pr_err("L2 Error Attributes Capture Register: 0x%08x\n", val);
> +	pr_err("L2 Error Address Capture Register: 0x%08x\n",
> +		readl(pdata->l2_vbase + MVEBU_L2_ERR_ADDR));

Ditto as above. edac_printk().

Also, I'd like to see this made more user-friendly and actually the
error decoded to human-readable strings than dumping raw registers and
then forcing users to go dig IP manuals for the definitions.

> +
> +	if (L2_ERR_TYPE(val) == 0)
> +		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
> +
> +	if (L2_ERR_TYPE(val) == 1)
> +		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
> +
> +	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +}
> +
> +static irqreturn_t mvebu_l2_isr(int irq, void *dev_id)
> +{
> +	struct edac_device_ctl_info *edac_dev = dev_id;
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return IRQ_NONE;
> +
> +	mvebu_l2_check(edac_dev);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static ssize_t inject_ctrl_show(struct edac_device_ctl_info *edac_dev,
> +				char *data)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +
> +	return sprintf(data, "0x%08x",
> +		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL));
> +}
> +
> +static ssize_t inject_ctrl_store(struct edac_device_ctl_info *edac_dev,
> +				 const char *data, size_t count)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	unsigned long val;
> +
> +	if (!kstrtoul(data, 0, &val)) {
> +		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_CTRL);
> +		return count;
> +	}
> +
> +	return 0;
> +}
> +
> +static ssize_t inject_mask_show(struct edac_device_ctl_info *edac_dev,
> +				     char *data)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +
> +	return sprintf(data, "0x%08x",
> +		       readl(pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK));
> +}
> +
> +static ssize_t inject_mask_store(struct edac_device_ctl_info *edac_dev,
> +				      const char *data, size_t count)
> +{
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	unsigned long val;
> +
> +	if (!kstrtoul(data, 0, &val)) {
> +		writel(val, pdata->l2_vbase + MVEBU_L2_ERR_INJ_MASK);
> +		return count;
> +	}
> +
> +	return 0;
> +}

Do you really want all those injection things to be present even on a
production system and people to inject stuff?

If not, you can stick them under CONFIG_EDAC_DEBUG.

> +
> +static struct edac_dev_sysfs_attribute mvebu_l2_sysfs_attributes[] = {
> +	__ATTR_RW(inject_ctrl),
> +	__ATTR_RW(inject_mask),
> +	{},
> +};
> +
> +static int mvebu_l2_err_probe(struct platform_device *pdev)
> +{
> +	struct edac_device_ctl_info *edac_dev;
> +	struct mvebu_l2_pdata *pdata;
> +	struct resource *r;
> +	int res;
> +
> +	if (!devres_open_group(&pdev->dev, mvebu_l2_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata),
> +					      "cpu", 1, "L", 1, 2, NULL, 0,
> +					      edac_l2_idx);
> +	if (!edac_dev) {
> +		devres_release_group(&pdev->dev, mvebu_l2_err_probe);
> +		return -ENOMEM;
> +	}

Same comment here as above - consider moving that call down in the
function to simplify error handling paths.

> +
> +	pdata = edac_dev->pvt_info;
> +	pdata->name = "mvebu_l2_err";
> +	edac_dev->dev = &pdev->dev;
> +	dev_set_drvdata(edac_dev->dev, edac_dev);
> +	edac_dev->mod_name = EDAC_MOD_STR;
> +	edac_dev->ctl_name = pdata->name;
> +	edac_dev->dev_name = pdata->name;

...

> diff --git a/drivers/edac/mvebu_edac.h b/drivers/edac/mvebu_edac.h
> new file mode 100644
> index 000000000000..33f0534b87df
> --- /dev/null
> +++ b/drivers/edac/mvebu_edac.h
> @@ -0,0 +1,66 @@
> +/*
> + * EDAC defs for Marvell SoCs
> + *
> + * Copyright (C) 2017 Allied Telesis Labs
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details
> + */
> +#ifndef _MVEBU_EDAC_H_
> +#define _MVEBU_EDAC_H_
> +
> +#define MVEBU_REVISION " Ver: 2.0.0"
> +#define EDAC_MOD_STR	"MVEBU_edac"
> +
> +/*
> + * L2 Err Registers
> + */
> +#define MVEBU_L2_ERR_COUNT		0x00	/* 0x8600 */
> +#define MVEBU_L2_ERR_THRESH		0x04	/* 0x8604 */
> +#define MVEBU_L2_ERR_ATTR		0x08	/* 0x8608 */
> +#define MVEBU_L2_ERR_ADDR		0x0c	/* 0x860c */
> +#define MVEBU_L2_ERR_CAP		0x10	/* 0x8610 */
> +#define MVEBU_L2_ERR_INJ_CTRL		0x14	/* 0x8614 */
> +#define MVEBU_L2_ERR_INJ_MASK		0x18	/* 0x8618 */
> +
> +#define L2_ERR_UE_THRESH(val)		((val & 0xff) << 16)
> +#define L2_ERR_CE_THRESH(val)		(val & 0xffff)
> +#define L2_ERR_TYPE(val)		((val >> 8) & 0x3)
> +
> +/*
> + * SDRAM Controller Registers
> + */
> +#define MVEBU_SDRAM_CONFIG		0x00	/* 0x1400 */
> +#define MVEBU_SDRAM_ERR_DATA_HI		0x40	/* 0x1440 */
> +#define MVEBU_SDRAM_ERR_DATA_LO		0x44	/* 0x1444 */
> +#define MVEBU_SDRAM_ERR_ECC_RCVD	0x48	/* 0x1448 */
> +#define MVEBU_SDRAM_ERR_ECC_CALC	0x4c	/* 0x144c */
> +#define MVEBU_SDRAM_ERR_ADDR		0x50	/* 0x1450 */
> +#define MVEBU_SDRAM_ERR_ECC_CNTL	0x54	/* 0x1454 */
> +#define MVEBU_SDRAM_ERR_ECC_ERR_CNT	0x58	/* 0x1458 */
> +
> +#define MVEBU_SDRAM_REGISTERED	0x20000
> +#define MVEBU_SDRAM_ECC		0x40000

BIT()

> +
> +struct mvebu_l2_pdata {
> +	void __iomem *l2_vbase;
> +	char *name;
> +	int irq;
> +	int edac_idx;
> +};
> +
> +struct mvebu_mc_pdata {
> +	void __iomem *mc_vbase;
> +	int total_mem;
> +	char *name;
> +	int irq;
> +	int edac_idx;
> +};

Looks like you could merge those two structs into one.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-09 13:14     ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-09 13:14 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

Hi Chris!

On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> This adds an EDAC driver for the memory controller and L2 cache used on
> a number of Marvell Armada SoCs.
Why have two separate drivers in the same file and enabled with the same
Kconfig option?

[...]
> +static void mvebu_mc_check(struct mem_ctl_info *mci)
> +{
> +	struct mvebu_mc_pdata *pdata = mci->pvt_info;
> +	u32 reg;
> +	u32 err_addr;
> +	u32 sdram_ecc;
> +	u32 comp_ecc;
> +	u32 syndrome;
> +
> +	reg = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
> +	if (!reg)
> +		return;
> +
> +	err_addr = reg & ~0x3;
The ERR_ADDR register is not a physical address but contains multiple
fields (bank, col, chip select and error type).

> +	sdram_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_RCVD);
> +	comp_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CALC);
> +	syndrome = sdram_ecc ^ comp_ecc;
> +
> +	/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
> +	if (!(reg & 0x1))
> +		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
> +				     err_addr >> PAGE_SHIFT,
> +				     err_addr & PAGE_MASK, syndrome,
> +				     0, 0, -1,
> +				     mci->ctl_name, "");
> +	else	/* 2 bit error, UE */
> +		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
> +				     err_addr >> PAGE_SHIFT,
> +				     err_addr & PAGE_MASK, 0,
> +				     0, 0, -1,
> +				     mci->ctl_name, "");
> +
> +	/* clear the error */
> +	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
This field is documented to be read-only. I had to write the Error
Interrupt Cause Register (0x14d0) or Message Interrupt Cause Register
(0x14d8) to allow the capture of new error details.

[...]
> +static void get_total_mem(struct mvebu_mc_pdata *pdata)
> +{
> +	struct device_node *np = NULL;
> +	struct resource res;
> +	int ret;
> +	unsigned long total_mem = 0;
> +
> +	for_each_node_by_type(np, "memory") {
> +		ret = of_address_to_resource(np, 0, &res);
> +		if (ret)
> +			continue;
> +
> +		total_mem += resource_size(&res);
> +	}
> +
> +	pdata->total_mem = total_mem;
> +}
I think we can calculate the sizes by reading back the individual CS
configuration registers.

> +static void mvebu_init_csrows(struct mem_ctl_info *mci,
> +				struct mvebu_mc_pdata *pdata)
[...]
> +	devtype = (ctl >> 20) & 0x3;
> +	switch (devtype) {
> +	case 0x0:
> +		dimm->dtype = DEV_X32;
> +		break;
> +	case 0x2:		/* could be X8 too, but no way to tell */
> +		dimm->dtype = DEV_X16;
> +		break;
> +	case 0x3:
> +		dimm->dtype = DEV_X4;
> +		break;
> +	default:
> +		dimm->dtype = DEV_UNKNOWN;
> +		break;
> +	}
This register is documented as reserved? I pulled the X8/X16 information
from the Address Control Register (CSnStruct bits).

> +	dimm->edac_mode = EDAC_SECDED;
> +}

[...]
> +	if (!devres_open_group(&pdev->dev, mvebu_mc_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
devres automatically opens a group per device, so this shouln't be
needed (although other EADC drivers do the same).

> +	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = true;
> +	layers[1].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = false;
I'm not sure if this is needed. In my driver, only one CHIP_SELECT layer
with size 4 (max number of chip selects) is configured and seems to work
fine.

[...]
> +	mci->scrub_mode = SCRUB_SW_SRC;
I'm not sure if this works as expected ARM as it is currently
implemented, but that's a topic for a different mail.

[...]
> +	/* setup MC registers */
> +	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
> +	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
> +	ctl = (ctl & 0xff00ffff) | 0x10000;
> +	writel(ctl, pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
This configures the single bit error threshold to 1. My driver does the
same.

> +
> +	if (edac_op_state == EDAC_OPSTATE_INT) {
> +		/* acquire interrupt that reports errors */
> +		pdata->irq = platform_get_irq(pdev, 0);
> +		res = devm_request_irq(&pdev->dev,
> +				       pdata->irq,
> +				       mvebu_mc_isr,
> +				       0,
> +				       "[EDAC] MC err",
> +				       mci);
Which IRQ do you use? The current DT doesn't configure interrupts. Also
it seems that the events are passed through additional layers of
mask/status registers which are not yet represented in the Armada-XP IRQ
hierarchy. So my driver currently uses polling.

[...]
> +static const struct of_device_id mvebu_mc_err_of_match[] = {
> +	{ .compatible = "marvell,armada-xp-sdram-controller", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, mvebu_mc_err_of_match);
I currently do the same, but that's not really correct:
In arch/arm/mach-mvebu/pm.c mvebu_pm_suspend_init already uses that
compatible to call request_mem_region(). So need some coordination
between that and the new EDAC driver.

> +/*********************** L2 err device **********************************/
> +static void mvebu_l2_check(struct edac_device_ctl_info *edac_dev)
> +{
> +
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return;
> +
> +	pr_err("ECC Error in CPU L2 cache\n");
> +	pr_err("L2 Error Attributes Capture Register: 0x%08x\n", val);
> +	pr_err("L2 Error Address Capture Register: 0x%08x\n",
> +		readl(pdata->l2_vbase + MVEBU_L2_ERR_ADDR));
> +
> +	if (L2_ERR_TYPE(val) == 0)
> +		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
> +
> +	if (L2_ERR_TYPE(val) == 1)
> +		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
We can use the address and attribute registers to collect more
information. Also, the counter registers need to be handled.

> +	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
OK, I do the same.

> +}
> +
> +static irqreturn_t mvebu_l2_isr(int irq, void *dev_id)
> +{
> +	struct edac_device_ctl_info *edac_dev = dev_id;
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return IRQ_NONE;
> +
> +	mvebu_l2_check(edac_dev);
> +
> +	return IRQ_HANDLED;
> +}
This interrupt seems to be shared with the PMU? Does this work without
implementing support for the additional IRQ status register?

> +static ssize_t inject_ctrl_show(struct edac_device_ctl_info *edac_dev,
[...]
> +static ssize_t inject_ctrl_store(struct edac_device_ctl_info *edac_dev,
> +				 const char *data, size_t count)
[...]
> +static ssize_t inject_mask_show(struct edac_device_ctl_info *edac_dev,
> +				     char *data)
[...]
> +static ssize_t inject_mask_store(struct edac_device_ctl_info *edac_dev,
> +				      const char *data, size_t count)
[...]
> +static struct edac_dev_sysfs_attribute mvebu_l2_sysfs_attributes[] = {
> +	__ATTR_RW(inject_ctrl),
> +	__ATTR_RW(inject_mask),
> +	{},
> +};
I'd prefer to use debugfs for that.

> +static int mvebu_l2_err_probe(struct platform_device *pdev)
> +{
> +	struct edac_device_ctl_info *edac_dev;
> +	struct mvebu_l2_pdata *pdata;
> +	struct resource *r;
> +	int res;
> +
> +	if (!devres_open_group(&pdev->dev, mvebu_l2_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
This is is not needed.

[...]

Regards,
Jan
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-09 13:14     ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-09 13:14 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

Hi Chris!

On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> This adds an EDAC driver for the memory controller and L2 cache used on
> a number of Marvell Armada SoCs.
Why have two separate drivers in the same file and enabled with the same
Kconfig option?

[...]
> +static void mvebu_mc_check(struct mem_ctl_info *mci)
> +{
> +	struct mvebu_mc_pdata *pdata = mci->pvt_info;
> +	u32 reg;
> +	u32 err_addr;
> +	u32 sdram_ecc;
> +	u32 comp_ecc;
> +	u32 syndrome;
> +
> +	reg = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
> +	if (!reg)
> +		return;
> +
> +	err_addr = reg & ~0x3;
The ERR_ADDR register is not a physical address but contains multiple
fields (bank, col, chip select and error type).

> +	sdram_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_RCVD);
> +	comp_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CALC);
> +	syndrome = sdram_ecc ^ comp_ecc;
> +
> +	/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
> +	if (!(reg & 0x1))
> +		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
> +				     err_addr >> PAGE_SHIFT,
> +				     err_addr & PAGE_MASK, syndrome,
> +				     0, 0, -1,
> +				     mci->ctl_name, "");
> +	else	/* 2 bit error, UE */
> +		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
> +				     err_addr >> PAGE_SHIFT,
> +				     err_addr & PAGE_MASK, 0,
> +				     0, 0, -1,
> +				     mci->ctl_name, "");
> +
> +	/* clear the error */
> +	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
This field is documented to be read-only. I had to write the Error
Interrupt Cause Register (0x14d0) or Message Interrupt Cause Register
(0x14d8) to allow the capture of new error details.

[...]
> +static void get_total_mem(struct mvebu_mc_pdata *pdata)
> +{
> +	struct device_node *np = NULL;
> +	struct resource res;
> +	int ret;
> +	unsigned long total_mem = 0;
> +
> +	for_each_node_by_type(np, "memory") {
> +		ret = of_address_to_resource(np, 0, &res);
> +		if (ret)
> +			continue;
> +
> +		total_mem += resource_size(&res);
> +	}
> +
> +	pdata->total_mem = total_mem;
> +}
I think we can calculate the sizes by reading back the individual CS
configuration registers.

> +static void mvebu_init_csrows(struct mem_ctl_info *mci,
> +				struct mvebu_mc_pdata *pdata)
[...]
> +	devtype = (ctl >> 20) & 0x3;
> +	switch (devtype) {
> +	case 0x0:
> +		dimm->dtype = DEV_X32;
> +		break;
> +	case 0x2:		/* could be X8 too, but no way to tell */
> +		dimm->dtype = DEV_X16;
> +		break;
> +	case 0x3:
> +		dimm->dtype = DEV_X4;
> +		break;
> +	default:
> +		dimm->dtype = DEV_UNKNOWN;
> +		break;
> +	}
This register is documented as reserved? I pulled the X8/X16 information
from the Address Control Register (CSnStruct bits).

> +	dimm->edac_mode = EDAC_SECDED;
> +}

[...]
> +	if (!devres_open_group(&pdev->dev, mvebu_mc_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
devres automatically opens a group per device, so this shouln't be
needed (although other EADC drivers do the same).

> +	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = true;
> +	layers[1].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = false;
I'm not sure if this is needed. In my driver, only one CHIP_SELECT layer
with size 4 (max number of chip selects) is configured and seems to work
fine.

[...]
> +	mci->scrub_mode = SCRUB_SW_SRC;
I'm not sure if this works as expected ARM as it is currently
implemented, but that's a topic for a different mail.

[...]
> +	/* setup MC registers */
> +	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
> +	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
> +	ctl = (ctl & 0xff00ffff) | 0x10000;
> +	writel(ctl, pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
This configures the single bit error threshold to 1. My driver does the
same.

> +
> +	if (edac_op_state == EDAC_OPSTATE_INT) {
> +		/* acquire interrupt that reports errors */
> +		pdata->irq = platform_get_irq(pdev, 0);
> +		res = devm_request_irq(&pdev->dev,
> +				       pdata->irq,
> +				       mvebu_mc_isr,
> +				       0,
> +				       "[EDAC] MC err",
> +				       mci);
Which IRQ do you use? The current DT doesn't configure interrupts. Also
it seems that the events are passed through additional layers of
mask/status registers which are not yet represented in the Armada-XP IRQ
hierarchy. So my driver currently uses polling.

[...]
> +static const struct of_device_id mvebu_mc_err_of_match[] = {
> +	{ .compatible = "marvell,armada-xp-sdram-controller", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, mvebu_mc_err_of_match);
I currently do the same, but that's not really correct:
In arch/arm/mach-mvebu/pm.c mvebu_pm_suspend_init already uses that
compatible to call request_mem_region(). So need some coordination
between that and the new EDAC driver.

> +/*********************** L2 err device **********************************/
> +static void mvebu_l2_check(struct edac_device_ctl_info *edac_dev)
> +{
> +
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return;
> +
> +	pr_err("ECC Error in CPU L2 cache\n");
> +	pr_err("L2 Error Attributes Capture Register: 0x%08x\n", val);
> +	pr_err("L2 Error Address Capture Register: 0x%08x\n",
> +		readl(pdata->l2_vbase + MVEBU_L2_ERR_ADDR));
> +
> +	if (L2_ERR_TYPE(val) == 0)
> +		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
> +
> +	if (L2_ERR_TYPE(val) == 1)
> +		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
We can use the address and attribute registers to collect more
information. Also, the counter registers need to be handled.

> +	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
OK, I do the same.

> +}
> +
> +static irqreturn_t mvebu_l2_isr(int irq, void *dev_id)
> +{
> +	struct edac_device_ctl_info *edac_dev = dev_id;
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return IRQ_NONE;
> +
> +	mvebu_l2_check(edac_dev);
> +
> +	return IRQ_HANDLED;
> +}
This interrupt seems to be shared with the PMU? Does this work without
implementing support for the additional IRQ status register?

> +static ssize_t inject_ctrl_show(struct edac_device_ctl_info *edac_dev,
[...]
> +static ssize_t inject_ctrl_store(struct edac_device_ctl_info *edac_dev,
> +				 const char *data, size_t count)
[...]
> +static ssize_t inject_mask_show(struct edac_device_ctl_info *edac_dev,
> +				     char *data)
[...]
> +static ssize_t inject_mask_store(struct edac_device_ctl_info *edac_dev,
> +				      const char *data, size_t count)
[...]
> +static struct edac_dev_sysfs_attribute mvebu_l2_sysfs_attributes[] = {
> +	__ATTR_RW(inject_ctrl),
> +	__ATTR_RW(inject_mask),
> +	{},
> +};
I'd prefer to use debugfs for that.

> +static int mvebu_l2_err_probe(struct platform_device *pdev)
> +{
> +	struct edac_device_ctl_info *edac_dev;
> +	struct mvebu_l2_pdata *pdata;
> +	struct resource *r;
> +	int res;
> +
> +	if (!devres_open_group(&pdev->dev, mvebu_l2_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
This is is not needed.

[...]

Regards,
Jan

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-09 13:14     ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-09 13:14 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Chris!

On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> This adds an EDAC driver for the memory controller and L2 cache used on
> a number of Marvell Armada SoCs.
Why have two separate drivers in the same file and enabled with the same
Kconfig option?

[...]
> +static void mvebu_mc_check(struct mem_ctl_info *mci)
> +{
> +	struct mvebu_mc_pdata *pdata = mci->pvt_info;
> +	u32 reg;
> +	u32 err_addr;
> +	u32 sdram_ecc;
> +	u32 comp_ecc;
> +	u32 syndrome;
> +
> +	reg = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
> +	if (!reg)
> +		return;
> +
> +	err_addr = reg & ~0x3;
The ERR_ADDR register is not a physical address but contains multiple
fields (bank, col, chip select and error type).

> +	sdram_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_RCVD);
> +	comp_ecc = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CALC);
> +	syndrome = sdram_ecc ^ comp_ecc;
> +
> +	/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
> +	if (!(reg & 0x1))
> +		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
> +				     err_addr >> PAGE_SHIFT,
> +				     err_addr & PAGE_MASK, syndrome,
> +				     0, 0, -1,
> +				     mci->ctl_name, "");
> +	else	/* 2 bit error, UE */
> +		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
> +				     err_addr >> PAGE_SHIFT,
> +				     err_addr & PAGE_MASK, 0,
> +				     0, 0, -1,
> +				     mci->ctl_name, "");
> +
> +	/* clear the error */
> +	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
This field is documented to be read-only. I had to write the Error
Interrupt Cause Register (0x14d0) or Message Interrupt Cause Register
(0x14d8) to allow the capture of new error details.

[...]
> +static void get_total_mem(struct mvebu_mc_pdata *pdata)
> +{
> +	struct device_node *np = NULL;
> +	struct resource res;
> +	int ret;
> +	unsigned long total_mem = 0;
> +
> +	for_each_node_by_type(np, "memory") {
> +		ret = of_address_to_resource(np, 0, &res);
> +		if (ret)
> +			continue;
> +
> +		total_mem += resource_size(&res);
> +	}
> +
> +	pdata->total_mem = total_mem;
> +}
I think we can calculate the sizes by reading back the individual CS
configuration registers.

> +static void mvebu_init_csrows(struct mem_ctl_info *mci,
> +				struct mvebu_mc_pdata *pdata)
[...]
> +	devtype = (ctl >> 20) & 0x3;
> +	switch (devtype) {
> +	case 0x0:
> +		dimm->dtype = DEV_X32;
> +		break;
> +	case 0x2:		/* could be X8 too, but no way to tell */
> +		dimm->dtype = DEV_X16;
> +		break;
> +	case 0x3:
> +		dimm->dtype = DEV_X4;
> +		break;
> +	default:
> +		dimm->dtype = DEV_UNKNOWN;
> +		break;
> +	}
This register is documented as reserved? I pulled the X8/X16 information
from the Address Control Register (CSnStruct bits).

> +	dimm->edac_mode = EDAC_SECDED;
> +}

[...]
> +	if (!devres_open_group(&pdev->dev, mvebu_mc_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
devres automatically opens a group per device, so this shouln't be
needed (although other EADC drivers do the same).

> +	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = true;
> +	layers[1].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = false;
I'm not sure if this is needed. In my driver, only one CHIP_SELECT layer
with size 4 (max number of chip selects) is configured and seems to work
fine.

[...]
> +	mci->scrub_mode = SCRUB_SW_SRC;
I'm not sure if this works as expected ARM as it is currently
implemented, but that's a topic for a different mail.

[...]
> +	/* setup MC registers */
> +	writel(0, pdata->mc_vbase + MVEBU_SDRAM_ERR_ADDR);
> +	ctl = readl(pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
> +	ctl = (ctl & 0xff00ffff) | 0x10000;
> +	writel(ctl, pdata->mc_vbase + MVEBU_SDRAM_ERR_ECC_CNTL);
This configures the single bit error threshold to 1. My driver does the
same.

> +
> +	if (edac_op_state == EDAC_OPSTATE_INT) {
> +		/* acquire interrupt that reports errors */
> +		pdata->irq = platform_get_irq(pdev, 0);
> +		res = devm_request_irq(&pdev->dev,
> +				       pdata->irq,
> +				       mvebu_mc_isr,
> +				       0,
> +				       "[EDAC] MC err",
> +				       mci);
Which IRQ do you use? The current DT doesn't configure interrupts. Also
it seems that the events are passed through additional layers of
mask/status registers which are not yet represented in the Armada-XP IRQ
hierarchy. So my driver currently uses polling.

[...]
> +static const struct of_device_id mvebu_mc_err_of_match[] = {
> +	{ .compatible = "marvell,armada-xp-sdram-controller", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, mvebu_mc_err_of_match);
I currently do the same, but that's not really correct:
In arch/arm/mach-mvebu/pm.c mvebu_pm_suspend_init already uses that
compatible to call request_mem_region(). So need some coordination
between that and the new EDAC driver.

> +/*********************** L2 err device **********************************/
> +static void mvebu_l2_check(struct edac_device_ctl_info *edac_dev)
> +{
> +
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return;
> +
> +	pr_err("ECC Error in CPU L2 cache\n");
> +	pr_err("L2 Error Attributes Capture Register: 0x%08x\n", val);
> +	pr_err("L2 Error Address Capture Register: 0x%08x\n",
> +		readl(pdata->l2_vbase + MVEBU_L2_ERR_ADDR));
> +
> +	if (L2_ERR_TYPE(val) == 0)
> +		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
> +
> +	if (L2_ERR_TYPE(val) == 1)
> +		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
We can use the address and attribute registers to collect more
information. Also, the counter registers need to be handled.

> +	writel(BIT(0), pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
OK, I do the same.

> +}
> +
> +static irqreturn_t mvebu_l2_isr(int irq, void *dev_id)
> +{
> +	struct edac_device_ctl_info *edac_dev = dev_id;
> +	struct mvebu_l2_pdata *pdata = edac_dev->pvt_info;
> +	u32 val;
> +
> +	val = readl(pdata->l2_vbase + MVEBU_L2_ERR_ATTR);
> +	if (!(val & 1))
> +		return IRQ_NONE;
> +
> +	mvebu_l2_check(edac_dev);
> +
> +	return IRQ_HANDLED;
> +}
This interrupt seems to be shared with the PMU? Does this work without
implementing support for the additional IRQ status register?

> +static ssize_t inject_ctrl_show(struct edac_device_ctl_info *edac_dev,
[...]
> +static ssize_t inject_ctrl_store(struct edac_device_ctl_info *edac_dev,
> +				 const char *data, size_t count)
[...]
> +static ssize_t inject_mask_show(struct edac_device_ctl_info *edac_dev,
> +				     char *data)
[...]
> +static ssize_t inject_mask_store(struct edac_device_ctl_info *edac_dev,
> +				      const char *data, size_t count)
[...]
> +static struct edac_dev_sysfs_attribute mvebu_l2_sysfs_attributes[] = {
> +	__ATTR_RW(inject_ctrl),
> +	__ATTR_RW(inject_mask),
> +	{},
> +};
I'd prefer to use debugfs for that.

> +static int mvebu_l2_err_probe(struct platform_device *pdev)
> +{
> +	struct edac_device_ctl_info *edac_dev;
> +	struct mvebu_l2_pdata *pdata;
> +	struct resource *r;
> +	int res;
> +
> +	if (!devres_open_group(&pdev->dev, mvebu_l2_err_probe, GFP_KERNEL))
> +		return -ENOMEM;
This is is not needed.

[...]

Regards,
Jan
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 54+ messages in thread

* DMA-safety for edac_atomic_scrub on ARM
@ 2017-06-09 13:56       ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-09 13:56 UTC (permalink / raw)
  To: bp, Rob Herring
  Cc: linux-arm-kernel, linux-edac, Mauro Carvalho Chehab,
	linux-kernel, Chris Packham, kernel

Hi,

I've CCed Rob as the original author of the ARM EDAC scrub function.

On Fr, 2017-06-09 at 15:14 +0200, Jan Lübbe wrote:
> [...]
> > +     mci->scrub_mode = SCRUB_SW_SRC;
> I'm not sure if this works as expected ARM as it is currently
> implemented, but that's a topic for a different mail.

Some more background as I understand it so far:

Configuring a EDAC MC to scrub_mode = SCRUB_SW_SRC causes the common
handler code to run a per-arch software scrub function. It is used by
several EADC drivers on ARM.

drivers/edac/edac_mc.c:
>         if (mci->scrub_mode == SCRUB_SW_SRC) {
>                 /*
>                         * Some memory controllers (called MCs below) can remap
>                         * memory so that it is still available at a different
>                         * address when PCI devices map into memory.
>                         * MC's that can't do this, lose the memory where PCI
>                         * devices are mapped. This mapping is MC-dependent
>                         * and so we call back into the MC driver for it to
>                         * map the MC page to a physical (CPU) page which can
>                         * then be mapped to a virtual page - which can then
>                         * be scrubbed.
>                         */
>                 remapped_page = mci->ctl_page_to_phys ?
>                         mci->ctl_page_to_phys(mci, page_frame_number) :
>                         page_frame_number;
> 
>                 edac_mc_scrub_block(remapped_page,
>                                         offset_in_page, grain);
>         }

edac_mc_scrub_block() then basically checks if it actually hit a valid
PFN, maps the page with kmap_atomic and runs edac_atomic_scrub().
For ARM this is implemented in arch/arm/include/asm/edac.h:
> /*
>  * ECC atomic, DMA, SMP and interrupt safe scrub function.
>  * Implements the per arch edac_atomic_scrub() that EDAC use for software
>  * ECC scrubbing.  It reads memory and then writes back the original
>  * value, allowing the hardware to detect and correct memory errors.
>  */
> 
> static inline void edac_atomic_scrub(void *va, u32 size)
> {
> #if __LINUX_ARM_ARCH__ >= 6
>         unsigned int *virt_addr = va;
>         unsigned int temp, temp2;
>         unsigned int i;
> 
>         for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
>                 /* Very carefully read and write to memory atomically
>                  * so we are interrupt, DMA and SMP safe.
>                  */
>                 __asm__ __volatile__("\n"
>                         "1:     ldrex   %0, [%2]\n"
>                         "       strex   %1, %0, [%2]\n"
>                         "       teq     %1, #0\n"
>                         "       bne     1b\n"
>                         : "=&r"(temp), "=&r"(temp2)
>                         : "r"(virt_addr)
>                         : "cc");
>         }
> #endif
> }

The comment "ECC atomic, DMA, SMP and interrupt safe scrub function"
seems to be copied from the initial implementation on x86, first to
powerpc, to mips and later to arm.

On ARM, other bus masters are usually not coherent to the CPUs. This
means that exclusive loads/stores only affect (at most) the sharable
domain, which is usually a subset of the SoC.

Consequently, when a bus master outside of the shareable domain (such as
a ethernet controller) writes to the same memory after the ldrex, the
strex will simply succeed and write stale data to the CPU cache, which
can later overwrite the data written by the ethernet controller.

I'm not sure if it is actually possible to implement this in a DMA-safe
way on ARM without stopping all other bus masters.

Regards,
Jan
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-09 13:56       ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-09 13:56 UTC (permalink / raw)
  To: bp, Rob Herring
  Cc: linux-arm-kernel, linux-edac, Mauro Carvalho Chehab,
	linux-kernel, Chris Packham, kernel

Hi,

I've CCed Rob as the original author of the ARM EDAC scrub function.

On Fr, 2017-06-09 at 15:14 +0200, Jan Lübbe wrote:
> [...]
> > +     mci->scrub_mode = SCRUB_SW_SRC;
> I'm not sure if this works as expected ARM as it is currently
> implemented, but that's a topic for a different mail.

Some more background as I understand it so far:

Configuring a EDAC MC to scrub_mode = SCRUB_SW_SRC causes the common
handler code to run a per-arch software scrub function. It is used by
several EADC drivers on ARM.

drivers/edac/edac_mc.c:
>         if (mci->scrub_mode == SCRUB_SW_SRC) {
>                 /*
>                         * Some memory controllers (called MCs below) can remap
>                         * memory so that it is still available at a different
>                         * address when PCI devices map into memory.
>                         * MC's that can't do this, lose the memory where PCI
>                         * devices are mapped. This mapping is MC-dependent
>                         * and so we call back into the MC driver for it to
>                         * map the MC page to a physical (CPU) page which can
>                         * then be mapped to a virtual page - which can then
>                         * be scrubbed.
>                         */
>                 remapped_page = mci->ctl_page_to_phys ?
>                         mci->ctl_page_to_phys(mci, page_frame_number) :
>                         page_frame_number;
> 
>                 edac_mc_scrub_block(remapped_page,
>                                         offset_in_page, grain);
>         }

edac_mc_scrub_block() then basically checks if it actually hit a valid
PFN, maps the page with kmap_atomic and runs edac_atomic_scrub().
For ARM this is implemented in arch/arm/include/asm/edac.h:
> /*
>  * ECC atomic, DMA, SMP and interrupt safe scrub function.
>  * Implements the per arch edac_atomic_scrub() that EDAC use for software
>  * ECC scrubbing.  It reads memory and then writes back the original
>  * value, allowing the hardware to detect and correct memory errors.
>  */
> 
> static inline void edac_atomic_scrub(void *va, u32 size)
> {
> #if __LINUX_ARM_ARCH__ >= 6
>         unsigned int *virt_addr = va;
>         unsigned int temp, temp2;
>         unsigned int i;
> 
>         for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
>                 /* Very carefully read and write to memory atomically
>                  * so we are interrupt, DMA and SMP safe.
>                  */
>                 __asm__ __volatile__("\n"
>                         "1:     ldrex   %0, [%2]\n"
>                         "       strex   %1, %0, [%2]\n"
>                         "       teq     %1, #0\n"
>                         "       bne     1b\n"
>                         : "=&r"(temp), "=&r"(temp2)
>                         : "r"(virt_addr)
>                         : "cc");
>         }
> #endif
> }

The comment "ECC atomic, DMA, SMP and interrupt safe scrub function"
seems to be copied from the initial implementation on x86, first to
powerpc, to mips and later to arm.

On ARM, other bus masters are usually not coherent to the CPUs. This
means that exclusive loads/stores only affect (at most) the sharable
domain, which is usually a subset of the SoC.

Consequently, when a bus master outside of the shareable domain (such as
a ethernet controller) writes to the same memory after the ldrex, the
strex will simply succeed and write stale data to the CPU cache, which
can later overwrite the data written by the ethernet controller.

I'm not sure if it is actually possible to implement this in a DMA-safe
way on ARM without stopping all other bus masters.

Regards,
Jan

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: DMA-safety for edac_atomic_scrub on ARM
@ 2017-06-09 16:13         ` Rob Herring
  0 siblings, 0 replies; 54+ messages in thread
From: Rob Herring @ 2017-06-09 16:13 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: Borislav Petkov, linux-arm-kernel, linux-edac,
	Mauro Carvalho Chehab, linux-kernel, Chris Packham, kernel

On Fri, Jun 9, 2017 at 8:56 AM, Jan Lübbe <jlu@pengutronix.de> wrote:
> Hi,
>
> I've CCed Rob as the original author of the ARM EDAC scrub function.
>
> On Fr, 2017-06-09 at 15:14 +0200, Jan Lübbe wrote:
>> [...]
>> > +     mci->scrub_mode = SCRUB_SW_SRC;
>> I'm not sure if this works as expected ARM as it is currently
>> implemented, but that's a topic for a different mail.
>
> Some more background as I understand it so far:
>
> Configuring a EDAC MC to scrub_mode = SCRUB_SW_SRC causes the common
> handler code to run a per-arch software scrub function. It is used by
> several EADC drivers on ARM.
>
> drivers/edac/edac_mc.c:
>>         if (mci->scrub_mode == SCRUB_SW_SRC) {
>>                 /*
>>                         * Some memory controllers (called MCs below) can remap
>>                         * memory so that it is still available at a different
>>                         * address when PCI devices map into memory.
>>                         * MC's that can't do this, lose the memory where PCI
>>                         * devices are mapped. This mapping is MC-dependent
>>                         * and so we call back into the MC driver for it to
>>                         * map the MC page to a physical (CPU) page which can
>>                         * then be mapped to a virtual page - which can then
>>                         * be scrubbed.
>>                         */
>>                 remapped_page = mci->ctl_page_to_phys ?
>>                         mci->ctl_page_to_phys(mci, page_frame_number) :
>>                         page_frame_number;
>>
>>                 edac_mc_scrub_block(remapped_page,
>>                                         offset_in_page, grain);
>>         }
>
> edac_mc_scrub_block() then basically checks if it actually hit a valid
> PFN, maps the page with kmap_atomic and runs edac_atomic_scrub().
> For ARM this is implemented in arch/arm/include/asm/edac.h:
>> /*
>>  * ECC atomic, DMA, SMP and interrupt safe scrub function.
>>  * Implements the per arch edac_atomic_scrub() that EDAC use for software
>>  * ECC scrubbing.  It reads memory and then writes back the original
>>  * value, allowing the hardware to detect and correct memory errors.
>>  */
>>
>> static inline void edac_atomic_scrub(void *va, u32 size)
>> {
>> #if __LINUX_ARM_ARCH__ >= 6
>>         unsigned int *virt_addr = va;
>>         unsigned int temp, temp2;
>>         unsigned int i;
>>
>>         for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
>>                 /* Very carefully read and write to memory atomically
>>                  * so we are interrupt, DMA and SMP safe.
>>                  */
>>                 __asm__ __volatile__("\n"
>>                         "1:     ldrex   %0, [%2]\n"
>>                         "       strex   %1, %0, [%2]\n"
>>                         "       teq     %1, #0\n"
>>                         "       bne     1b\n"
>>                         : "=&r"(temp), "=&r"(temp2)
>>                         : "r"(virt_addr)
>>                         : "cc");
>>         }
>> #endif
>> }
>
> The comment "ECC atomic, DMA, SMP and interrupt safe scrub function"
> seems to be copied from the initial implementation on x86, first to
> powerpc, to mips and later to arm.

Yes.

> On ARM, other bus masters are usually not coherent to the CPUs. This
> means that exclusive loads/stores only affect (at most) the sharable
> domain, which is usually a subset of the SoC.

Maybe usually, but that's entirely system dependent. For the system
that needed this (and maybe still the only 32-bit one), it was
coherent i/o. Good luck doing 10G networking with a non-coherent
master.

> Consequently, when a bus master outside of the shareable domain (such as
> a ethernet controller) writes to the same memory after the ldrex, the
> strex will simply succeed and write stale data to the CPU cache, which
> can later overwrite the data written by the ethernet controller.
>
> I'm not sure if it is actually possible to implement this in a DMA-safe
> way on ARM without stopping all other bus masters.

Generically, probably not. And stopping bus masters is probably not
going to work. Bottom line is systems have to be designed for this or
bus masters and drivers audited that they could deal with this. Many
bus masters are not going to both repeatedly read the same location
and write to it. It's a rare problem on an already rare occurrence.

Rob

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-09 16:13         ` Rob Herring
  0 siblings, 0 replies; 54+ messages in thread
From: Rob Herring @ 2017-06-09 16:13 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: Borislav Petkov, linux-arm-kernel, linux-edac,
	Mauro Carvalho Chehab, linux-kernel, Chris Packham, kernel

On Fri, Jun 9, 2017 at 8:56 AM, Jan Lübbe <jlu@pengutronix.de> wrote:
> Hi,
>
> I've CCed Rob as the original author of the ARM EDAC scrub function.
>
> On Fr, 2017-06-09 at 15:14 +0200, Jan Lübbe wrote:
>> [...]
>> > +     mci->scrub_mode = SCRUB_SW_SRC;
>> I'm not sure if this works as expected ARM as it is currently
>> implemented, but that's a topic for a different mail.
>
> Some more background as I understand it so far:
>
> Configuring a EDAC MC to scrub_mode = SCRUB_SW_SRC causes the common
> handler code to run a per-arch software scrub function. It is used by
> several EADC drivers on ARM.
>
> drivers/edac/edac_mc.c:
>>         if (mci->scrub_mode == SCRUB_SW_SRC) {
>>                 /*
>>                         * Some memory controllers (called MCs below) can remap
>>                         * memory so that it is still available at a different
>>                         * address when PCI devices map into memory.
>>                         * MC's that can't do this, lose the memory where PCI
>>                         * devices are mapped. This mapping is MC-dependent
>>                         * and so we call back into the MC driver for it to
>>                         * map the MC page to a physical (CPU) page which can
>>                         * then be mapped to a virtual page - which can then
>>                         * be scrubbed.
>>                         */
>>                 remapped_page = mci->ctl_page_to_phys ?
>>                         mci->ctl_page_to_phys(mci, page_frame_number) :
>>                         page_frame_number;
>>
>>                 edac_mc_scrub_block(remapped_page,
>>                                         offset_in_page, grain);
>>         }
>
> edac_mc_scrub_block() then basically checks if it actually hit a valid
> PFN, maps the page with kmap_atomic and runs edac_atomic_scrub().
> For ARM this is implemented in arch/arm/include/asm/edac.h:
>> /*
>>  * ECC atomic, DMA, SMP and interrupt safe scrub function.
>>  * Implements the per arch edac_atomic_scrub() that EDAC use for software
>>  * ECC scrubbing.  It reads memory and then writes back the original
>>  * value, allowing the hardware to detect and correct memory errors.
>>  */
>>
>> static inline void edac_atomic_scrub(void *va, u32 size)
>> {
>> #if __LINUX_ARM_ARCH__ >= 6
>>         unsigned int *virt_addr = va;
>>         unsigned int temp, temp2;
>>         unsigned int i;
>>
>>         for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
>>                 /* Very carefully read and write to memory atomically
>>                  * so we are interrupt, DMA and SMP safe.
>>                  */
>>                 __asm__ __volatile__("\n"
>>                         "1:     ldrex   %0, [%2]\n"
>>                         "       strex   %1, %0, [%2]\n"
>>                         "       teq     %1, #0\n"
>>                         "       bne     1b\n"
>>                         : "=&r"(temp), "=&r"(temp2)
>>                         : "r"(virt_addr)
>>                         : "cc");
>>         }
>> #endif
>> }
>
> The comment "ECC atomic, DMA, SMP and interrupt safe scrub function"
> seems to be copied from the initial implementation on x86, first to
> powerpc, to mips and later to arm.

Yes.

> On ARM, other bus masters are usually not coherent to the CPUs. This
> means that exclusive loads/stores only affect (at most) the sharable
> domain, which is usually a subset of the SoC.

Maybe usually, but that's entirely system dependent. For the system
that needed this (and maybe still the only 32-bit one), it was
coherent i/o. Good luck doing 10G networking with a non-coherent
master.

> Consequently, when a bus master outside of the shareable domain (such as
> a ethernet controller) writes to the same memory after the ldrex, the
> strex will simply succeed and write stale data to the CPU cache, which
> can later overwrite the data written by the ethernet controller.
>
> I'm not sure if it is actually possible to implement this in a DMA-safe
> way on ARM without stopping all other bus masters.

Generically, probably not. And stopping bus masters is probably not
going to work. Bottom line is systems have to be designed for this or
bus masters and drivers audited that they could deal with this. Many
bus masters are not going to both repeatedly read the same location
and write to it. It's a rare problem on an already rare occurrence.

Rob
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* DMA-safety for edac_atomic_scrub on ARM
@ 2017-06-09 16:13         ` Rob Herring
  0 siblings, 0 replies; 54+ messages in thread
From: Rob Herring @ 2017-06-09 16:13 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jun 9, 2017 at 8:56 AM, Jan L?bbe <jlu@pengutronix.de> wrote:
> Hi,
>
> I've CCed Rob as the original author of the ARM EDAC scrub function.
>
> On Fr, 2017-06-09 at 15:14 +0200, Jan L?bbe wrote:
>> [...]
>> > +     mci->scrub_mode = SCRUB_SW_SRC;
>> I'm not sure if this works as expected ARM as it is currently
>> implemented, but that's a topic for a different mail.
>
> Some more background as I understand it so far:
>
> Configuring a EDAC MC to scrub_mode = SCRUB_SW_SRC causes the common
> handler code to run a per-arch software scrub function. It is used by
> several EADC drivers on ARM.
>
> drivers/edac/edac_mc.c:
>>         if (mci->scrub_mode == SCRUB_SW_SRC) {
>>                 /*
>>                         * Some memory controllers (called MCs below) can remap
>>                         * memory so that it is still available at a different
>>                         * address when PCI devices map into memory.
>>                         * MC's that can't do this, lose the memory where PCI
>>                         * devices are mapped. This mapping is MC-dependent
>>                         * and so we call back into the MC driver for it to
>>                         * map the MC page to a physical (CPU) page which can
>>                         * then be mapped to a virtual page - which can then
>>                         * be scrubbed.
>>                         */
>>                 remapped_page = mci->ctl_page_to_phys ?
>>                         mci->ctl_page_to_phys(mci, page_frame_number) :
>>                         page_frame_number;
>>
>>                 edac_mc_scrub_block(remapped_page,
>>                                         offset_in_page, grain);
>>         }
>
> edac_mc_scrub_block() then basically checks if it actually hit a valid
> PFN, maps the page with kmap_atomic and runs edac_atomic_scrub().
> For ARM this is implemented in arch/arm/include/asm/edac.h:
>> /*
>>  * ECC atomic, DMA, SMP and interrupt safe scrub function.
>>  * Implements the per arch edac_atomic_scrub() that EDAC use for software
>>  * ECC scrubbing.  It reads memory and then writes back the original
>>  * value, allowing the hardware to detect and correct memory errors.
>>  */
>>
>> static inline void edac_atomic_scrub(void *va, u32 size)
>> {
>> #if __LINUX_ARM_ARCH__ >= 6
>>         unsigned int *virt_addr = va;
>>         unsigned int temp, temp2;
>>         unsigned int i;
>>
>>         for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
>>                 /* Very carefully read and write to memory atomically
>>                  * so we are interrupt, DMA and SMP safe.
>>                  */
>>                 __asm__ __volatile__("\n"
>>                         "1:     ldrex   %0, [%2]\n"
>>                         "       strex   %1, %0, [%2]\n"
>>                         "       teq     %1, #0\n"
>>                         "       bne     1b\n"
>>                         : "=&r"(temp), "=&r"(temp2)
>>                         : "r"(virt_addr)
>>                         : "cc");
>>         }
>> #endif
>> }
>
> The comment "ECC atomic, DMA, SMP and interrupt safe scrub function"
> seems to be copied from the initial implementation on x86, first to
> powerpc, to mips and later to arm.

Yes.

> On ARM, other bus masters are usually not coherent to the CPUs. This
> means that exclusive loads/stores only affect (at most) the sharable
> domain, which is usually a subset of the SoC.

Maybe usually, but that's entirely system dependent. For the system
that needed this (and maybe still the only 32-bit one), it was
coherent i/o. Good luck doing 10G networking with a non-coherent
master.

> Consequently, when a bus master outside of the shareable domain (such as
> a ethernet controller) writes to the same memory after the ldrex, the
> strex will simply succeed and write stale data to the CPU cache, which
> can later overwrite the data written by the ethernet controller.
>
> I'm not sure if it is actually possible to implement this in a DMA-safe
> way on ARM without stopping all other bus masters.

Generically, probably not. And stopping bus masters is probably not
going to work. Bottom line is systems have to be designed for this or
bus masters and drivers audited that they could deal with this. Many
bus masters are not going to both repeatedly read the same location
and write to it. It's a rare problem on an already rare occurrence.

Rob

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-09 16:29     ` Rob Herring
  0 siblings, 0 replies; 54+ messages in thread
From: Rob Herring @ 2017-06-09 16:29 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Mark Rutland, Russell King,
	devicetree, linux-kernel

On Thu, Jun 08, 2017 at 04:11:23PM +1200, Chris Packham wrote:
> The aurora cache on the Marvell Armada-XP SoC supports ECC protection
> for the L2 data arrays. Add a "arm,ecc-enable" device tree property
> which can be used to enable this.
> 
> Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
> ---
>  Documentation/devicetree/bindings/arm/l2c2x0.txt | 2 ++
>  arch/arm/mm/cache-l2x0.c                         | 7 +++++++
>  2 files changed, 9 insertions(+)

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-09 16:29     ` Rob Herring
  0 siblings, 0 replies; 54+ messages in thread
From: Rob Herring @ 2017-06-09 16:29 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Mark Rutland, Russell King,
	devicetree, linux-kernel

On Thu, Jun 08, 2017 at 04:11:23PM +1200, Chris Packham wrote:
> The aurora cache on the Marvell Armada-XP SoC supports ECC protection
> for the L2 data arrays. Add a "arm,ecc-enable" device tree property
> which can be used to enable this.
> 
> Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
> ---
>  Documentation/devicetree/bindings/arm/l2c2x0.txt | 2 ++
>  arch/arm/mm/cache-l2x0.c                         | 7 +++++++
>  2 files changed, 9 insertions(+)

Acked-by: Rob Herring <robh@kernel.org>
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-09 16:29     ` Rob Herring
  0 siblings, 0 replies; 54+ messages in thread
From: Rob Herring @ 2017-06-09 16:29 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp-Gina5bIWoIWzQB+pC5nmwQ,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-edac-u79uwXL29TY76Z2rM5mHXA, Mark Rutland, Russell King,
	devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Thu, Jun 08, 2017 at 04:11:23PM +1200, Chris Packham wrote:
> The aurora cache on the Marvell Armada-XP SoC supports ECC protection
> for the L2 data arrays. Add a "arm,ecc-enable" device tree property
> which can be used to enable this.
> 
> Signed-off-by: Chris Packham <chris.packham-6g8wRflRTwXFdCa3tKVlE6U/zSkkHjvu@public.gmane.org>
> ---
>  Documentation/devicetree/bindings/arm/l2c2x0.txt | 2 ++
>  arch/arm/mm/cache-l2x0.c                         | 7 +++++++
>  2 files changed, 9 insertions(+)

Acked-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 3/4] ARM: l2x0: add arm, ecc-enable property for aurora
@ 2017-06-09 16:29     ` Rob Herring
  0 siblings, 0 replies; 54+ messages in thread
From: Rob Herring @ 2017-06-09 16:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jun 08, 2017 at 04:11:23PM +1200, Chris Packham wrote:
> The aurora cache on the Marvell Armada-XP SoC supports ECC protection
> for the L2 data arrays. Add a "arm,ecc-enable" device tree property
> which can be used to enable this.
> 
> Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
> ---
>  Documentation/devicetree/bindings/arm/l2c2x0.txt | 2 ++
>  arch/arm/mm/cache-l2x0.c                         | 7 +++++++
>  2 files changed, 9 insertions(+)

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-11 22:37       ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-11 22:37 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp, linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

On 10/06/17 01:19, Jan Lübbe wrote:
>> +
>> +	if (edac_op_state == EDAC_OPSTATE_INT) {
>> +		/* acquire interrupt that reports errors */
>> +		pdata->irq = platform_get_irq(pdev, 0);
>> +		res = devm_request_irq(&pdev->dev,
>> +				       pdata->irq,
>> +				       mvebu_mc_isr,
>> +				       0,
>> +				       "[EDAC] MC err",
>> +				       mci);
> Which IRQ do you use? The current DT doesn't configure interrupts. Also
> it seems that the events are passed through additional layers of
> mask/status registers which are not yet represented in the Armada-XP IRQ
> hierarchy. So my driver currently uses polling.

Yes I'd been forcing polling too. To get this working properly I think 
we'd need to add another irqchip driver for the SoC Err interrupts. 
Which is kind of where I'd got stuck, the datasheet is a little 
confusing in that area. I think I'd figured out that the root interrupt 
comes through INT 4 which could be cascaded to the as yet unwritten SoC 
Err irqchip.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-11 22:37       ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-11 22:37 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp, linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

On 10/06/17 01:19, Jan Lübbe wrote:
>> +
>> +	if (edac_op_state == EDAC_OPSTATE_INT) {
>> +		/* acquire interrupt that reports errors */
>> +		pdata->irq = platform_get_irq(pdev, 0);
>> +		res = devm_request_irq(&pdev->dev,
>> +				       pdata->irq,
>> +				       mvebu_mc_isr,
>> +				       0,
>> +				       "[EDAC] MC err",
>> +				       mci);
> Which IRQ do you use? The current DT doesn't configure interrupts. Also
> it seems that the events are passed through additional layers of
> mask/status registers which are not yet represented in the Armada-XP IRQ
> hierarchy. So my driver currently uses polling.

Yes I'd been forcing polling too. To get this working properly I think 
we'd need to add another irqchip driver for the SoC Err interrupts. 
Which is kind of where I'd got stuck, the datasheet is a little 
confusing in that area. I think I'd figured out that the root interrupt 
comes through INT 4 which could be cascaded to the as yet unwritten SoC 
Err irqchip.
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-11 22:37       ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-11 22:37 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/06/17 01:19, Jan L?bbe wrote:
>> +
>> +	if (edac_op_state == EDAC_OPSTATE_INT) {
>> +		/* acquire interrupt that reports errors */
>> +		pdata->irq = platform_get_irq(pdev, 0);
>> +		res = devm_request_irq(&pdev->dev,
>> +				       pdata->irq,
>> +				       mvebu_mc_isr,
>> +				       0,
>> +				       "[EDAC] MC err",
>> +				       mci);
> Which IRQ do you use? The current DT doesn't configure interrupts. Also
> it seems that the events are passed through additional layers of
> mask/status registers which are not yet represented in the Armada-XP IRQ
> hierarchy. So my driver currently uses polling.

Yes I'd been forcing polling too. To get this working properly I think 
we'd need to add another irqchip driver for the SoC Err interrupts. 
Which is kind of where I'd got stuck, the datasheet is a little 
confusing in that area. I think I'd figured out that the root interrupt 
comes through INT 4 which could be cascaded to the as yet unwritten SoC 
Err irqchip.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-11 22:55       ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-11 22:55 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp, linux-arm-kernel, linux-edac, Rob Herring, Mark Rutland,
	Russell King, devicetree, linux-kernel

On 09/06/17 20:58, Jan Lübbe wrote:
> On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
>> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +       }
> 
> Unless I misunderstand the code in __l2c_init(), the mask is used to
> specify the bits to preserve:
>          old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
>          aux &= aux_mask;
>          aux |= aux_val;
> 
>          if (old_aux != aux)
>                  pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
>                          old_aux, aux);
> 
> So the arm,ecc-disable property will have no effect. This probably also
> applies to patch 2/4. The existing property *-disable code removes the
> corresponding bit from the mask.

Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE 
and I was probably a little lazy to have used the L2C EVTMON instead of 
adding AURORA specific ones like you have in your series.

I'll rebase my series on top of yours and send it direct to you so you 
can include it in the overall submission.

> 
> Regards,
> Jan
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-11 22:55       ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-11 22:55 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp, linux-arm-kernel, linux-edac, Rob Herring, Mark Rutland,
	Russell King, devicetree, linux-kernel

On 09/06/17 20:58, Jan Lübbe wrote:
> On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
>> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +       }
> 
> Unless I misunderstand the code in __l2c_init(), the mask is used to
> specify the bits to preserve:
>          old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
>          aux &= aux_mask;
>          aux |= aux_val;
> 
>          if (old_aux != aux)
>                  pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
>                          old_aux, aux);
> 
> So the arm,ecc-disable property will have no effect. This probably also
> applies to patch 2/4. The existing property *-disable code removes the
> corresponding bit from the mask.

Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE 
and I was probably a little lazy to have used the L2C EVTMON instead of 
adding AURORA specific ones like you have in your series.

I'll rebase my series on top of yours and send it direct to you so you 
can include it in the overall submission.

> 
> Regards,
> Jan
>
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-11 22:55       ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-11 22:55 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp-Gina5bIWoIWzQB+pC5nmwQ,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-edac-u79uwXL29TY76Z2rM5mHXA, Rob Herring, Mark Rutland,
	Russell King, devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 09/06/17 20:58, Jan Lübbe wrote:
> On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
>> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +       }
> 
> Unless I misunderstand the code in __l2c_init(), the mask is used to
> specify the bits to preserve:
>          old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
>          aux &= aux_mask;
>          aux |= aux_val;
> 
>          if (old_aux != aux)
>                  pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
>                          old_aux, aux);
> 
> So the arm,ecc-disable property will have no effect. This probably also
> applies to patch 2/4. The existing property *-disable code removes the
> corresponding bit from the mask.

Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE 
and I was probably a little lazy to have used the L2C EVTMON instead of 
adding AURORA specific ones like you have in your series.

I'll rebase my series on top of yours and send it direct to you so you 
can include it in the overall submission.

> 
> Regards,
> Jan
> 

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 3/4] ARM: l2x0: add arm, ecc-enable property for aurora
@ 2017-06-11 22:55       ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-11 22:55 UTC (permalink / raw)
  To: linux-arm-kernel

On 09/06/17 20:58, Jan L?bbe wrote:
> On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
>> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>> +       }
> 
> Unless I misunderstand the code in __l2c_init(), the mask is used to
> specify the bits to preserve:
>          old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
>          aux &= aux_mask;
>          aux |= aux_val;
> 
>          if (old_aux != aux)
>                  pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
>                          old_aux, aux);
> 
> So the arm,ecc-disable property will have no effect. This probably also
> applies to patch 2/4. The existing property *-disable code removes the
> corresponding bit from the mask.

Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE 
and I was probably a little lazy to have used the L2C EVTMON instead of 
adding AURORA specific ones like you have in your series.

I'll rebase my series on top of yours and send it direct to you so you 
can include it in the overall submission.

> 
> Regards,
> Jan
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-22 14:11       ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-22 14:11 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

Hi Chris,

On Fr, 2017-06-09 at 15:14 +0200, Jan Lübbe wrote:
> > +static void mvebu_init_csrows(struct mem_ctl_info *mci,
> > +                             struct mvebu_mc_pdata *pdata)
> [...]
> > +     devtype = (ctl >> 20) & 0x3;
> > +     switch (devtype) {
> > +     case 0x0:
> > +             dimm->dtype = DEV_X32;
> > +             break;
> > +     case 0x2:               /* could be X8 too, but no way to tell
> */
> > +             dimm->dtype = DEV_X16;
> > +             break;
> > +     case 0x3:
> > +             dimm->dtype = DEV_X4;
> > +             break;
> > +     default:
> > +             dimm->dtype = DEV_UNKNOWN;
> > +             break;
> > +     }
> This register is documented as reserved? I pulled the X8/X16
> information from the Address Control Register (CSnStruct bits).

Do you have more information on how to decode the Bus width?

It's not clear from the MV78230/78x60 docs:
- The SDRAM Configuration Register, offset 15 bit is:
  0 = Half (32 bit data bus)
  1 = Full (64 bit data bus)
- The SDRAM Address Control Register, offsets 0-1, 4-5, 8-9 and 12-13
(for CS 0, 1, 3 and 4):
  0 = X8
  1 = X16
  2 and 3 are not documented

Is this clearer in your documentation, so that we can have the same code
handle both variants? Otherwise, we'd probably need separate DT
compatibles.

Regards,
Jan
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-22 14:11       ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-22 14:11 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

Hi Chris,

On Fr, 2017-06-09 at 15:14 +0200, Jan Lübbe wrote:
> > +static void mvebu_init_csrows(struct mem_ctl_info *mci,
> > +                             struct mvebu_mc_pdata *pdata)
> [...]
> > +     devtype = (ctl >> 20) & 0x3;
> > +     switch (devtype) {
> > +     case 0x0:
> > +             dimm->dtype = DEV_X32;
> > +             break;
> > +     case 0x2:               /* could be X8 too, but no way to tell
> */
> > +             dimm->dtype = DEV_X16;
> > +             break;
> > +     case 0x3:
> > +             dimm->dtype = DEV_X4;
> > +             break;
> > +     default:
> > +             dimm->dtype = DEV_UNKNOWN;
> > +             break;
> > +     }
> This register is documented as reserved? I pulled the X8/X16
> information from the Address Control Register (CSnStruct bits).

Do you have more information on how to decode the Bus width?

It's not clear from the MV78230/78x60 docs:
- The SDRAM Configuration Register, offset 15 bit is:
  0 = Half (32 bit data bus)
  1 = Full (64 bit data bus)
- The SDRAM Address Control Register, offsets 0-1, 4-5, 8-9 and 12-13
(for CS 0, 1, 3 and 4):
  0 = X8
  1 = X16
  2 and 3 are not documented

Is this clearer in your documentation, so that we can have the same code
handle both variants? Otherwise, we'd probably need separate DT
compatibles.

Regards,
Jan

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-22 14:11       ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-22 14:11 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Chris,

On Fr, 2017-06-09 at 15:14 +0200, Jan L?bbe wrote:
> > +static void mvebu_init_csrows(struct mem_ctl_info *mci,
> > +                             struct mvebu_mc_pdata *pdata)
> [...]
> > +     devtype = (ctl >> 20) & 0x3;
> > +     switch (devtype) {
> > +     case 0x0:
> > +             dimm->dtype = DEV_X32;
> > +             break;
> > +     case 0x2:               /* could be X8 too, but no way to tell
> */
> > +             dimm->dtype = DEV_X16;
> > +             break;
> > +     case 0x3:
> > +             dimm->dtype = DEV_X4;
> > +             break;
> > +     default:
> > +             dimm->dtype = DEV_UNKNOWN;
> > +             break;
> > +     }
> This register is documented as reserved? I pulled the X8/X16
> information from the Address Control Register (CSnStruct bits).

Do you have more information on how to decode the Bus width?

It's not clear from the MV78230/78x60 docs:
- The SDRAM Configuration Register, offset 15 bit is:
  0 = Half (32 bit data bus)
  1 = Full (64 bit data bus)
- The SDRAM Address Control Register, offsets 0-1, 4-5, 8-9 and 12-13
(for CS 0, 1, 3 and 4):
  0 = X8
  1 = X16
  2 and 3 are not documented

Is this clearer in your documentation, so that we can have the same code
handle both variants? Otherwise, we'd probably need separate DT
compatibles.

Regards,
Jan
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-22 14:46         ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-22 14:46 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Rob Herring, Mark Rutland,
	Russell King, devicetree, linux-kernel

Chris,

On So, 2017-06-11 at 22:55 +0000, Chris Packham wrote:
> On 09/06/17 20:58, Jan Lübbe wrote:
> > On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> >> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
> >> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
> >> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +       }
> > 
> > Unless I misunderstand the code in __l2c_init(), the mask is used to
> > specify the bits to preserve:
> >          old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
> >          aux &= aux_mask;
> >          aux |= aux_val;
> > 
> >          if (old_aux != aux)
> >                  pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
> >                          old_aux, aux);
> > 
> > So the arm,ecc-disable property will have no effect. This probably also
> > applies to patch 2/4. The existing property *-disable code removes the
> > corresponding bit from the mask.
> 
> Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE 
> and I was probably a little lazy to have used the L2C EVTMON instead of 
> adding AURORA specific ones like you have in your series.
> 
> I'll rebase my series on top of yours and send it direct to you so you 
> can include it in the overall submission.

While picking up your arm,parity-enable and arm,ecc-enable patches for
my series, I noticed that the mask variable is inverted when merging the
local changes into aux_val/aux_mask at the end of aurora_of_parse. So I
now believe your code is correct. ;) I'd appreciate a close look,
nevertheless.

Regards,
Jan

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-22 14:46         ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-22 14:46 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp, linux-arm-kernel, linux-edac, Rob Herring, Mark Rutland,
	Russell King, devicetree, linux-kernel

Chris,

On So, 2017-06-11 at 22:55 +0000, Chris Packham wrote:
> On 09/06/17 20:58, Jan Lübbe wrote:
> > On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> >> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
> >> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
> >> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +       }
> > 
> > Unless I misunderstand the code in __l2c_init(), the mask is used to
> > specify the bits to preserve:
> >          old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
> >          aux &= aux_mask;
> >          aux |= aux_val;
> > 
> >          if (old_aux != aux)
> >                  pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
> >                          old_aux, aux);
> > 
> > So the arm,ecc-disable property will have no effect. This probably also
> > applies to patch 2/4. The existing property *-disable code removes the
> > corresponding bit from the mask.
> 
> Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE 
> and I was probably a little lazy to have used the L2C EVTMON instead of 
> adding AURORA specific ones like you have in your series.
> 
> I'll rebase my series on top of yours and send it direct to you so you 
> can include it in the overall submission.

While picking up your arm,parity-enable and arm,ecc-enable patches for
my series, I noticed that the mask variable is inverted when merging the
local changes into aux_val/aux_mask at the end of aurora_of_parse. So I
now believe your code is correct. ;) I'd appreciate a close look,
nevertheless.

Regards,
Jan
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-22 14:46         ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-22 14:46 UTC (permalink / raw)
  To: Chris Packham
  Cc: bp-Gina5bIWoIWzQB+pC5nmwQ,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-edac-u79uwXL29TY76Z2rM5mHXA, Rob Herring, Mark Rutland,
	Russell King, devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

Chris,

On So, 2017-06-11 at 22:55 +0000, Chris Packham wrote:
> On 09/06/17 20:58, Jan Lübbe wrote:
> > On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> >> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
> >> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
> >> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +       }
> > 
> > Unless I misunderstand the code in __l2c_init(), the mask is used to
> > specify the bits to preserve:
> >          old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
> >          aux &= aux_mask;
> >          aux |= aux_val;
> > 
> >          if (old_aux != aux)
> >                  pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
> >                          old_aux, aux);
> > 
> > So the arm,ecc-disable property will have no effect. This probably also
> > applies to patch 2/4. The existing property *-disable code removes the
> > corresponding bit from the mask.
> 
> Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE 
> and I was probably a little lazy to have used the L2C EVTMON instead of 
> adding AURORA specific ones like you have in your series.
> 
> I'll rebase my series on top of yours and send it direct to you so you 
> can include it in the overall submission.

While picking up your arm,parity-enable and arm,ecc-enable patches for
my series, I noticed that the mask variable is inverted when merging the
local changes into aux_val/aux_mask at the end of aurora_of_parse. So I
now believe your code is correct. ;) I'd appreciate a close look,
nevertheless.

Regards,
Jan
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-22 14:46         ` Jan Lübbe
  0 siblings, 0 replies; 54+ messages in thread
From: Jan Lübbe @ 2017-06-22 14:46 UTC (permalink / raw)
  To: linux-arm-kernel

Chris,

On So, 2017-06-11 at 22:55 +0000, Chris Packham wrote:
> On 09/06/17 20:58, Jan L?bbe wrote:
> > On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
> >> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
> >> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
> >> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
> >> +       }
> > 
> > Unless I misunderstand the code in __l2c_init(), the mask is used to
> > specify the bits to preserve:
> >          old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
> >          aux &= aux_mask;
> >          aux |= aux_val;
> > 
> >          if (old_aux != aux)
> >                  pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
> >                          old_aux, aux);
> > 
> > So the arm,ecc-disable property will have no effect. This probably also
> > applies to patch 2/4. The existing property *-disable code removes the
> > corresponding bit from the mask.
> 
> Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE 
> and I was probably a little lazy to have used the L2C EVTMON instead of 
> adding AURORA specific ones like you have in your series.
> 
> I'll rebase my series on top of yours and send it direct to you so you 
> can include it in the overall submission.

While picking up your arm,parity-enable and arm,ecc-enable patches for
my series, I noticed that the mask variable is inverted when merging the
local changes into aux_val/aux_mask at the end of aurora_of_parse. So I
now believe your code is correct. ;) I'd appreciate a close look,
nevertheless.

Regards,
Jan

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-22 21:43         ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-22 21:43 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp, linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

On 23/06/17 02:37, Jan Lübbe wrote:
> Hi Chris,
> 
> On Fr, 2017-06-09 at 15:14 +0200, Jan Lübbe wrote:
>>> +static void mvebu_init_csrows(struct mem_ctl_info *mci,
>>> +                             struct mvebu_mc_pdata *pdata)
>> [...]
>>> +     devtype = (ctl >> 20) & 0x3;
>>> +     switch (devtype) {
>>> +     case 0x0:
>>> +             dimm->dtype = DEV_X32;
>>> +             break;
>>> +     case 0x2:               /* could be X8 too, but no way to tell
>> */
>>> +             dimm->dtype = DEV_X16;
>>> +             break;
>>> +     case 0x3:
>>> +             dimm->dtype = DEV_X4;
>>> +             break;
>>> +     default:
>>> +             dimm->dtype = DEV_UNKNOWN;
>>> +             break;
>>> +     }
>> This register is documented as reserved? I pulled the X8/X16
>> information from the Address Control Register (CSnStruct bits).
> 
> Do you have more information on how to decode the Bus width?
> 
> It's not clear from the MV78230/78x60 docs:
> - The SDRAM Configuration Register, offset 15 bit is:
>    0 = Half (32 bit data bus)
>    1 = Full (64 bit data bus)

For the integrated version it still is just half and full but half == 16 
and full == 32.

> - The SDRAM Address Control Register, offsets 0-1, 4-5, 8-9 and 12-13
> (for CS 0, 1, 3 and 4):
>    0 = X8
>    1 = X16
>    2 and 3 are not documented

This definition is the same for the integrated version.

> Is this clearer in your documentation, so that we can have the same code
> handle both variants? Otherwise, we'd probably need separate DT
> compatibles.

I think we do need to use the compatible string to decide how to 
interpret full/half.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-22 21:43         ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-22 21:43 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp, linux-arm-kernel, linux-edac, Mauro Carvalho Chehab, linux-kernel

On 23/06/17 02:37, Jan Lübbe wrote:
> Hi Chris,
> 
> On Fr, 2017-06-09 at 15:14 +0200, Jan Lübbe wrote:
>>> +static void mvebu_init_csrows(struct mem_ctl_info *mci,
>>> +                             struct mvebu_mc_pdata *pdata)
>> [...]
>>> +     devtype = (ctl >> 20) & 0x3;
>>> +     switch (devtype) {
>>> +     case 0x0:
>>> +             dimm->dtype = DEV_X32;
>>> +             break;
>>> +     case 0x2:               /* could be X8 too, but no way to tell
>> */
>>> +             dimm->dtype = DEV_X16;
>>> +             break;
>>> +     case 0x3:
>>> +             dimm->dtype = DEV_X4;
>>> +             break;
>>> +     default:
>>> +             dimm->dtype = DEV_UNKNOWN;
>>> +             break;
>>> +     }
>> This register is documented as reserved? I pulled the X8/X16
>> information from the Address Control Register (CSnStruct bits).
> 
> Do you have more information on how to decode the Bus width?
> 
> It's not clear from the MV78230/78x60 docs:
> - The SDRAM Configuration Register, offset 15 bit is:
>    0 = Half (32 bit data bus)
>    1 = Full (64 bit data bus)

For the integrated version it still is just half and full but half == 16 
and full == 32.

> - The SDRAM Address Control Register, offsets 0-1, 4-5, 8-9 and 12-13
> (for CS 0, 1, 3 and 4):
>    0 = X8
>    1 = X16
>    2 and 3 are not documented

This definition is the same for the integrated version.

> Is this clearer in your documentation, so that we can have the same code
> handle both variants? Otherwise, we'd probably need separate DT
> compatibles.

I think we do need to use the compatible string to decide how to 
interpret full/half.
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs
@ 2017-06-22 21:43         ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-22 21:43 UTC (permalink / raw)
  To: linux-arm-kernel

On 23/06/17 02:37, Jan L?bbe wrote:
> Hi Chris,
> 
> On Fr, 2017-06-09 at 15:14 +0200, Jan L?bbe wrote:
>>> +static void mvebu_init_csrows(struct mem_ctl_info *mci,
>>> +                             struct mvebu_mc_pdata *pdata)
>> [...]
>>> +     devtype = (ctl >> 20) & 0x3;
>>> +     switch (devtype) {
>>> +     case 0x0:
>>> +             dimm->dtype = DEV_X32;
>>> +             break;
>>> +     case 0x2:               /* could be X8 too, but no way to tell
>> */
>>> +             dimm->dtype = DEV_X16;
>>> +             break;
>>> +     case 0x3:
>>> +             dimm->dtype = DEV_X4;
>>> +             break;
>>> +     default:
>>> +             dimm->dtype = DEV_UNKNOWN;
>>> +             break;
>>> +     }
>> This register is documented as reserved? I pulled the X8/X16
>> information from the Address Control Register (CSnStruct bits).
> 
> Do you have more information on how to decode the Bus width?
> 
> It's not clear from the MV78230/78x60 docs:
> - The SDRAM Configuration Register, offset 15 bit is:
>    0 = Half (32 bit data bus)
>    1 = Full (64 bit data bus)

For the integrated version it still is just half and full but half == 16 
and full == 32.

> - The SDRAM Address Control Register, offsets 0-1, 4-5, 8-9 and 12-13
> (for CS 0, 1, 3 and 4):
>    0 = X8
>    1 = X16
>    2 and 3 are not documented

This definition is the same for the integrated version.

> Is this clearer in your documentation, so that we can have the same code
> handle both variants? Otherwise, we'd probably need separate DT
> compatibles.

I think we do need to use the compatible string to decide how to 
interpret full/half.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-22 21:50           ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-22 21:50 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp, linux-arm-kernel, linux-edac, Rob Herring, Mark Rutland,
	Russell King, devicetree, linux-kernel

On 23/06/17 02:46, Jan Lübbe wrote:
> Chris,
> 
> On So, 2017-06-11 at 22:55 +0000, Chris Packham wrote:
>> On 09/06/17 20:58, Jan Lübbe wrote:
>>> On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
>>>> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
>>>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
>>>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +       }
>>>
>>> Unless I misunderstand the code in __l2c_init(), the mask is used to
>>> specify the bits to preserve:
>>>           old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
>>>           aux &= aux_mask;
>>>           aux |= aux_val;
>>>
>>>           if (old_aux != aux)
>>>                   pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
>>>                           old_aux, aux);
>>>
>>> So the arm,ecc-disable property will have no effect. This probably also
>>> applies to patch 2/4. The existing property *-disable code removes the
>>> corresponding bit from the mask.
>>
>> Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE
>> and I was probably a little lazy to have used the L2C EVTMON instead of
>> adding AURORA specific ones like you have in your series.
>>
>> I'll rebase my series on top of yours and send it direct to you so you
>> can include it in the overall submission.
> 
> While picking up your arm,parity-enable and arm,ecc-enable patches for
> my series, I noticed that the mask variable is inverted when merging the
> local changes into aux_val/aux_mask at the end of aurora_of_parse. So I
> now believe your code is correct. ;) I'd appreciate a close look,
> nevertheless.

For my part I tested the enable case but I never tested the disable case 
(because u-boot doesn't enable it there's nothing to disable). I can 
probably just poke the register before starting the kernel to confirm 
the disable case works with whatever masking logic we end up with.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC,3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-22 21:50           ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-22 21:50 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp, linux-arm-kernel, linux-edac, Rob Herring, Mark Rutland,
	Russell King, devicetree, linux-kernel

On 23/06/17 02:46, Jan Lübbe wrote:
> Chris,
> 
> On So, 2017-06-11 at 22:55 +0000, Chris Packham wrote:
>> On 09/06/17 20:58, Jan Lübbe wrote:
>>> On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
>>>> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
>>>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
>>>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +       }
>>>
>>> Unless I misunderstand the code in __l2c_init(), the mask is used to
>>> specify the bits to preserve:
>>>           old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
>>>           aux &= aux_mask;
>>>           aux |= aux_val;
>>>
>>>           if (old_aux != aux)
>>>                   pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
>>>                           old_aux, aux);
>>>
>>> So the arm,ecc-disable property will have no effect. This probably also
>>> applies to patch 2/4. The existing property *-disable code removes the
>>> corresponding bit from the mask.
>>
>> Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE
>> and I was probably a little lazy to have used the L2C EVTMON instead of
>> adding AURORA specific ones like you have in your series.
>>
>> I'll rebase my series on top of yours and send it direct to you so you
>> can include it in the overall submission.
> 
> While picking up your arm,parity-enable and arm,ecc-enable patches for
> my series, I noticed that the mask variable is inverted when merging the
> local changes into aux_val/aux_mask at the end of aurora_of_parse. So I
> now believe your code is correct. ;) I'd appreciate a close look,
> nevertheless.

For my part I tested the enable case but I never tested the disable case 
(because u-boot doesn't enable it there's nothing to disable). I can 
probably just poke the register before starting the kernel to confirm 
the disable case works with whatever masking logic we end up with.
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora
@ 2017-06-22 21:50           ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-22 21:50 UTC (permalink / raw)
  To: Jan Lübbe
  Cc: bp-Gina5bIWoIWzQB+pC5nmwQ,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-edac-u79uwXL29TY76Z2rM5mHXA, Rob Herring, Mark Rutland,
	Russell King, devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 23/06/17 02:46, Jan Lübbe wrote:
> Chris,
> 
> On So, 2017-06-11 at 22:55 +0000, Chris Packham wrote:
>> On 09/06/17 20:58, Jan Lübbe wrote:
>>> On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
>>>> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
>>>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
>>>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +       }
>>>
>>> Unless I misunderstand the code in __l2c_init(), the mask is used to
>>> specify the bits to preserve:
>>>           old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
>>>           aux &= aux_mask;
>>>           aux |= aux_val;
>>>
>>>           if (old_aux != aux)
>>>                   pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
>>>                           old_aux, aux);
>>>
>>> So the arm,ecc-disable property will have no effect. This probably also
>>> applies to patch 2/4. The existing property *-disable code removes the
>>> corresponding bit from the mask.
>>
>> Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE
>> and I was probably a little lazy to have used the L2C EVTMON instead of
>> adding AURORA specific ones like you have in your series.
>>
>> I'll rebase my series on top of yours and send it direct to you so you
>> can include it in the overall submission.
> 
> While picking up your arm,parity-enable and arm,ecc-enable patches for
> my series, I noticed that the mask variable is inverted when merging the
> local changes into aux_val/aux_mask at the end of aurora_of_parse. So I
> now believe your code is correct. ;) I'd appreciate a close look,
> nevertheless.

For my part I tested the enable case but I never tested the disable case 
(because u-boot doesn't enable it there's nothing to disable). I can 
probably just poke the register before starting the kernel to confirm 
the disable case works with whatever masking logic we end up with.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [RFC PATCH 3/4] ARM: l2x0: add arm, ecc-enable property for aurora
@ 2017-06-22 21:50           ` Chris Packham
  0 siblings, 0 replies; 54+ messages in thread
From: Chris Packham @ 2017-06-22 21:50 UTC (permalink / raw)
  To: linux-arm-kernel

On 23/06/17 02:46, Jan L?bbe wrote:
> Chris,
> 
> On So, 2017-06-11 at 22:55 +0000, Chris Packham wrote:
>> On 09/06/17 20:58, Jan L?bbe wrote:
>>> On Do, 2017-06-08 at 16:11 +1200, Chris Packham wrote:
>>>> +       if (of_property_read_bool(np, "arm,ecc-enable")) {
>>>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +               val |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +       } else if (of_property_read_bool(np, "arm,ecc-disable")) {
>>>> +               mask |= L2C_AUX_CTRL_EVTMON_ENABLE;
>>>> +       }
>>>
>>> Unless I misunderstand the code in __l2c_init(), the mask is used to
>>> specify the bits to preserve:
>>>           old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
>>>           aux &= aux_mask;
>>>           aux |= aux_val;
>>>
>>>           if (old_aux != aux)
>>>                   pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
>>>                           old_aux, aux);
>>>
>>> So the arm,ecc-disable property will have no effect. This probably also
>>> applies to patch 2/4. The existing property *-disable code removes the
>>> corresponding bit from the mask.
>>
>> Indeed the disable version should be mask &= ~L2C_AUX_CTRL_EVTMON_ENABLE
>> and I was probably a little lazy to have used the L2C EVTMON instead of
>> adding AURORA specific ones like you have in your series.
>>
>> I'll rebase my series on top of yours and send it direct to you so you
>> can include it in the overall submission.
> 
> While picking up your arm,parity-enable and arm,ecc-enable patches for
> my series, I noticed that the mask variable is inverted when merging the
> local changes into aux_val/aux_mask at the end of aurora_of_parse. So I
> now believe your code is correct. ;) I'd appreciate a close look,
> nevertheless.

For my part I tested the enable case but I never tested the disable case 
(because u-boot doesn't enable it there's nothing to disable). I can 
probably just poke the register before starting the kernel to confirm 
the disable case works with whatever masking logic we end up with.

^ permalink raw reply	[flat|nested] 54+ messages in thread

end of thread, other threads:[~2017-06-22 21:50 UTC | newest]

Thread overview: 54+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-08  4:11 [RFC PATCH 0/4] EDAC support for Marvell Armada SoCs Chris Packham
2017-06-08  4:11 ` [RFC PATCH 1/4] EDAC: mvebu: Add driver " Chris Packham
2017-06-08  4:11   ` Chris Packham
2017-06-08  4:11   ` [RFC,1/4] " Chris Packham
2017-06-09 10:39   ` [RFC PATCH 1/4] " Borislav Petkov
2017-06-09 10:39     ` Borislav Petkov
2017-06-09 10:39     ` [RFC,1/4] " Borislav Petkov
2017-06-09 13:14   ` [RFC PATCH 1/4] " Jan Lübbe
2017-06-09 13:14     ` Jan Lübbe
2017-06-09 13:14     ` [RFC,1/4] " Jan Lübbe
2017-06-09 13:56     ` DMA-safety for edac_atomic_scrub on ARM Jan Lübbe
2017-06-09 13:56       ` [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs Jan Lübbe
2017-06-09 16:13       ` DMA-safety for edac_atomic_scrub on ARM Rob Herring
2017-06-09 16:13         ` Rob Herring
2017-06-09 16:13         ` [RFC,1/4] EDAC: mvebu: Add driver for Marvell Armada SoCs Rob Herring
2017-06-11 22:37     ` [RFC PATCH 1/4] " Chris Packham
2017-06-11 22:37       ` Chris Packham
2017-06-11 22:37       ` [RFC,1/4] " Chris Packham
2017-06-22 14:11     ` [RFC PATCH 1/4] " Jan Lübbe
2017-06-22 14:11       ` Jan Lübbe
2017-06-22 14:11       ` [RFC,1/4] " Jan Lübbe
2017-06-22 21:43       ` [RFC PATCH 1/4] " Chris Packham
2017-06-22 21:43         ` Chris Packham
2017-06-22 21:43         ` [RFC,1/4] " Chris Packham
2017-06-08  4:11 ` [RFC PATCH 2/4] ARM: l2x0: support parity-enable/disable on aurora Chris Packham
2017-06-08  4:11   ` Chris Packham
2017-06-08  4:11   ` [RFC,2/4] " Chris Packham
2017-06-08  4:11 ` [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable property for aurora Chris Packham
2017-06-08  4:11   ` Chris Packham
2017-06-08  4:11   ` [RFC,3/4] " Chris Packham
2017-06-09  8:58   ` [RFC PATCH 3/4] " Jan Lübbe
2017-06-09  8:58     ` Jan Lübbe
2017-06-09  8:58     ` Jan Lübbe
2017-06-09  8:58     ` [RFC,3/4] " Jan Lübbe
2017-06-11 22:55     ` [RFC PATCH 3/4] " Chris Packham
2017-06-11 22:55       ` [RFC PATCH 3/4] ARM: l2x0: add arm, ecc-enable " Chris Packham
2017-06-11 22:55       ` [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable " Chris Packham
2017-06-11 22:55       ` [RFC,3/4] " Chris Packham
2017-06-22 14:46       ` [RFC PATCH 3/4] " Jan Lübbe
2017-06-22 14:46         ` Jan Lübbe
2017-06-22 14:46         ` Jan Lübbe
2017-06-22 14:46         ` [RFC,3/4] " Jan Lübbe
2017-06-22 21:50         ` [RFC PATCH 3/4] " Chris Packham
2017-06-22 21:50           ` [RFC PATCH 3/4] ARM: l2x0: add arm, ecc-enable " Chris Packham
2017-06-22 21:50           ` [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable " Chris Packham
2017-06-22 21:50           ` [RFC,3/4] " Chris Packham
2017-06-09 16:29   ` [RFC PATCH 3/4] " Rob Herring
2017-06-09 16:29     ` [RFC PATCH 3/4] ARM: l2x0: add arm, ecc-enable " Rob Herring
2017-06-09 16:29     ` [RFC PATCH 3/4] ARM: l2x0: add arm,ecc-enable " Rob Herring
2017-06-09 16:29     ` [RFC,3/4] " Rob Herring
2017-06-08  4:11 ` [RFC PATCH 4/4] ARM: dts: enable l2c parity and ecc protection on 98dx3236 Chris Packham
2017-06-08  4:11   ` Chris Packham
2017-06-08  4:11   ` Chris Packham
2017-06-08  4:11   ` [RFC,4/4] " Chris Packham

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.