All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
@ 2015-10-27 20:38 dinguyen
  2015-11-19 18:34 ` Borislav Petkov
  0 siblings, 1 reply; 16+ messages in thread
From: dinguyen @ 2015-10-27 20:38 UTC (permalink / raw)
  To: tthayer
  Cc: dinh.linux, dougthompson, bp, mchehab, linux, linux-edac,
	linux-kernel, Dinh Nguyen

From: Thor Thayer <tthayer@opensource.altera.com>

Adding L2 Cache and On-Chip RAM EDAC support for the
Altera SoCs using the EDAC device  model. The SDRAM
controller is using the Memory Controller model.

Each type of ECC is individually configurable.

The SDRAM ECC is a separate Kconfig option because:
1) the SDRAM preparation can take almost 2 seconds on boot and some
customers need a faster boot time.
2) the SDRAM has an ECC initialization dependency on the preloader
which is outside the kernel. It is desirable to be able to turn the
SDRAM on & off separately.

Signed-off-by: Thor Thayer <tthayer@opensource.altera.com>
Signed-off-by: Dinh Nguyen <dinguyen@opensource.altera.com>
---
v7: s/of_get_named_gen_pool/of_gen_pool_get
    Remove #ifdef for EDAC_DEBUG
    Use -ENODEV instead of EPROBE_DEFER

v6: Convert to nested EDAC in device tree. Force L2 cache
    on for L2Cache ECC & remove L2 cache syscon for checking
    enable bit. Update year in header.

v5: No Change

v4: Change mask defines to use BIT().
    Fix comment style to agree with kernel coding style.
    Better printk description for read != write in trigger.
    Remove SysFS debugging message.
    Better dci->mod_name
    Move gen_pool pointer assignment to end of function.
    Invert logic to reduce indent in ocram depenency check.
    Change from dev_err() to edac_printk()
    Replace magic numbers with defines & comments.
    Improve error injection test.
    Change Makefile intermediary name to altr (from alt)

v3: Move OCRAM and L2 cache EDAC functions into altera_edac.c
    instead of separate files.

v2: Fix L2 dependency comments.
---
 drivers/edac/Kconfig       |  16 ++
 drivers/edac/Makefile      |   5 +-
 drivers/edac/altera_edac.c | 488 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 507 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index ef25000..b80b4ad 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -376,6 +376,22 @@ config EDAC_ALTERA_MC
 	  preloader must initialize the SDRAM before loading
 	  the kernel.
 
+config EDAC_ALTERA_L2C
+	bool "Altera L2 Cache EDAC"
+	depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
+	select CACHE_L2X0
+	help
+	  Support for error detection and correction on the
+	  Altera L2 cache Memory for Altera SoCs. This option
+          requires L2 cache so it will force that selection.
+
+config EDAC_ALTERA_OCRAM
+	bool "Altera On-Chip RAM EDAC"
+	depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA && SRAM && GENERIC_ALLOCATOR
+	help
+	  Support for error detection and correction on the
+	  Altera On-Chip RAM Memory for Altera SoCs.
+
 config EDAC_SYNOPSYS
 	tristate "Synopsys DDR Memory Controller"
 	depends on EDAC_MM_EDAC && ARCH_ZYNQ
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index dbf53e0..8f1c6fc 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -67,6 +67,9 @@ obj-$(CONFIG_EDAC_OCTEON_L2C)		+= octeon_edac-l2c.o
 obj-$(CONFIG_EDAC_OCTEON_LMC)		+= octeon_edac-lmc.o
 obj-$(CONFIG_EDAC_OCTEON_PCI)		+= octeon_edac-pci.o
 
-obj-$(CONFIG_EDAC_ALTERA_MC)		+= altera_edac.o
+altr_edac-y				:= altera_edac.o
+obj-$(CONFIG_EDAC_ALTERA_MC)		+= altr_edac.o
+obj-$(CONFIG_EDAC_ALTERA_L2C)		+= altr_edac.o
+obj-$(CONFIG_EDAC_ALTERA_OCRAM)		+= altr_edac.o
 obj-$(CONFIG_EDAC_SYNOPSYS)		+= synopsys_edac.o
 obj-$(CONFIG_EDAC_XGENE)		+= xgene_edac.o
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 9296409..154ac8c 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -17,8 +17,10 @@
  * Adapted from the highbank_mc_edac driver.
  */
 
+#include <asm/cacheflush.h>
 #include <linux/ctype.h>
 #include <linux/edac.h>
+#include <linux/genalloc.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
@@ -34,6 +36,7 @@
 
 #define EDAC_MOD_STR		"altera_edac"
 #define EDAC_VERSION		"1"
+#define EDAC_DEVICE		"ALTR_MEM"
 
 static const struct altr_sdram_prv_data c5_data = {
 	.ecc_ctrl_offset    = CV_CTLCFG_OFST,
@@ -75,6 +78,33 @@ static const struct altr_sdram_prv_data a10_data = {
 	.ue_set_mask        = A10_DIAGINT_TDERRA_MASK,
 };
 
+/************************** EDAC Device Defines **************************/
+
+/* OCRAM ECC Management Group Defines */
+#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET   0x04
+#define ALTR_OCR_ECC_EN_MASK            BIT(0)
+#define ALTR_OCR_ECC_INJS_MASK          BIT(1)
+#define ALTR_OCR_ECC_INJD_MASK          BIT(2)
+#define ALTR_OCR_ECC_SERR_MASK          BIT(3)
+#define ALTR_OCR_ECC_DERR_MASK          BIT(4)
+
+/* L2 ECC Management Group Defines */
+#define ALTR_MAN_GRP_L2_ECC_OFFSET      0x00
+#define ALTR_L2_ECC_EN_MASK             BIT(0)
+#define ALTR_L2_ECC_INJS_MASK           BIT(1)
+#define ALTR_L2_ECC_INJD_MASK           BIT(2)
+
+#define ALTR_UE_TRIGGER_CHAR            'U'   /* Trigger for UE */
+#define ALTR_TRIGGER_READ_WRD_CNT       32    /* Line size x 4 */
+#define ALTR_TRIG_OCRAM_BYTE_SIZE       128   /* Line size x 4 */
+#define ALTR_TRIG_L2C_BYTE_SIZE         4096  /* Full Page */
+
+/*********************** EDAC Memory Controller Functions ****************/
+
+/* The SDRAM controller uses the EDAC Memory Controller framework.       */
+
+#ifdef CONFIG_EDAC_ALTERA_MC
+
 static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
 {
 	struct mem_ctl_info *mci = dev_id;
@@ -504,6 +534,462 @@ static struct platform_driver altr_sdram_edac_driver = {
 
 module_platform_driver(altr_sdram_edac_driver);
 
+#endif		/* #ifdef CONFIG_EDAC_ALTERA_MC */
+/************************* EDAC Parent Probe *************************/
+
+static const struct of_device_id altr_edac_device_of_match[];
+
+static const struct of_device_id altr_edac_of_match[] = {
+	{ .compatible = "altr,edac" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_of_match);
+
+static int altr_edac_probe(struct platform_device *pdev)
+{
+	of_platform_populate(pdev->dev.of_node, altr_edac_device_of_match,
+			     NULL, &pdev->dev);
+	return 0;
+}
+
+static struct platform_driver altr_edac_driver = {
+	.probe =  altr_edac_probe,
+	.driver = {
+		.name = "altr_edac",
+		.of_match_table = altr_edac_of_match,
+	},
+};
+module_platform_driver(altr_edac_driver);
+
+/************************* EDAC Device Functions *************************/
+
+/*
+ * EDAC Device Functions (shared between various IPs).
+ * The discrete memories use the EDAC Device framework. The probe
+ * and error handling functions are very similar between memories
+ * so they are shared. The memory allocation and free for EDAC trigger
+ * testing are different for each memory.
+ */
+
+const struct edac_device_prv_data ocramecc_data;
+const struct edac_device_prv_data l2ecc_data;
+
+struct edac_device_prv_data {
+	int (*setup)(struct platform_device *pdev, void __iomem *base);
+	int ce_clear_mask;
+	int ue_clear_mask;
+	struct edac_dev_sysfs_attribute *eccmgr_sysfs_attr;
+	void * (*alloc_mem)(size_t size, void **other);
+	void (*free_mem)(void *p, size_t size, void *other);
+	int ecc_enable_mask;
+	int ce_set_mask;
+	int ue_set_mask;
+	int trig_alloc_sz;
+};
+
+struct altr_edac_device_dev {
+	void __iomem *base;
+	int sb_irq;
+	int db_irq;
+	const struct edac_device_prv_data *data;
+	char *edac_dev_name;
+};
+
+static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
+{
+	struct edac_device_ctl_info *dci = dev_id;
+	struct altr_edac_device_dev *drvdata = dci->pvt_info;
+	const struct edac_device_prv_data *priv = drvdata->data;
+
+	if (irq == drvdata->sb_irq) {
+		if (priv->ce_clear_mask)
+			writel(priv->ce_clear_mask, drvdata->base);
+		edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name);
+	}
+	if (irq == drvdata->db_irq) {
+		if (priv->ue_clear_mask)
+			writel(priv->ue_clear_mask, drvdata->base);
+		edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name);
+		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+	}
+
+	return IRQ_HANDLED;
+}
+
+ssize_t altr_edac_device_trig(struct edac_device_ctl_info *edac_dci,
+			      const char *buffer, size_t count)
+{
+	u32 *ptemp, i, error_mask;
+	int result = 0;
+	unsigned long flags;
+	struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
+	const struct edac_device_prv_data *priv = drvdata->data;
+	void *generic_ptr = edac_dci->dev;
+
+	if (!priv->alloc_mem)
+		return -ENOMEM;
+
+	/*
+	 * Note that generic_ptr is initialized to the device * but in
+	 * some alloc_functions, this is overridden and returns data.
+	 */
+	ptemp = priv->alloc_mem(priv->trig_alloc_sz, &generic_ptr);
+	if (!ptemp) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "Inject: Buffer Allocation error\n");
+		return -ENOMEM;
+	}
+
+	if (buffer[0] == ALTR_UE_TRIGGER_CHAR)
+		error_mask = priv->ue_set_mask;
+	else
+		error_mask = priv->ce_set_mask;
+
+	edac_printk(KERN_ALERT, EDAC_DEVICE,
+		    "Trigger Error Mask (0x%X)\n", error_mask);
+
+	local_irq_save(flags);
+	/* write ECC corrupted data out. */
+	for (i = 0; i < (priv->trig_alloc_sz / sizeof(*ptemp)); i++) {
+		/* Read data so we're in the correct state */
+		rmb();
+		if (ACCESS_ONCE(ptemp[i]))
+			result = -1;
+		/* Toggle Error bit (it is latched), leave ECC enabled */
+		writel(error_mask, drvdata->base);
+		writel(priv->ecc_enable_mask, drvdata->base);
+		ptemp[i] = i;
+	}
+	/* Ensure it has been written out */
+	wmb();
+	local_irq_restore(flags);
+
+	if (result)
+		edac_printk(KERN_ERR, EDAC_DEVICE, "Mem Not Cleared\n");
+
+	/* Read out written data. ECC error caused here */
+	for (i = 0; i < ALTR_TRIGGER_READ_WRD_CNT; i++)
+		if (ACCESS_ONCE(ptemp[i]) != i)
+			edac_printk(KERN_ERR, EDAC_DEVICE,
+				    "Read doesn't match written data\n");
+
+	if (priv->free_mem)
+		priv->free_mem(ptemp, priv->trig_alloc_sz, generic_ptr);
+
+	return count;
+}
+
+static void altr_set_sysfs_attr(struct edac_device_ctl_info *edac_dci,
+				const struct edac_device_prv_data *priv)
+{
+	struct edac_dev_sysfs_attribute *ecc_attr = priv->eccmgr_sysfs_attr;
+
+	if (ecc_attr)
+		edac_dci->sysfs_attributes = ecc_attr;
+}
+
+static const struct of_device_id altr_edac_device_of_match[] = {
+#ifdef CONFIG_EDAC_ALTERA_L2C
+	{ .compatible = "altr,l2-edac", .data = (void *)&l2ecc_data },
+#endif
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
+	{ .compatible = "altr,ocram-edac", .data = (void *)&ocramecc_data },
+#endif
+	{},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_device_of_match);
+
+/*
+ * altr_edac_device_probe()
+ *	This is a generic EDAC device driver that will support
+ *	various Altera memory devices such as the L2 cache ECC and
+ *	OCRAM ECC as well as the memories for other peripherals.
+ *	Module specific initialization is done by passing the
+ *	function index in the device tree.
+ */
+static int altr_edac_device_probe(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *dci;
+	struct altr_edac_device_dev *drvdata;
+	struct resource *r;
+	int res = 0;
+	struct device_node *np = pdev->dev.of_node;
+	char *ecc_name = (char *)np->name;
+	static int dev_instance;
+
+	if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "Unable to open devm\n");
+		return -ENOMEM;
+	}
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "Unable to get mem resource\n");
+		return -ENODEV;
+	}
+
+	if (!devm_request_mem_region(&pdev->dev, r->start, resource_size(r),
+				     dev_name(&pdev->dev))) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "%s:Error requesting mem region\n", ecc_name);
+		return -EBUSY;
+	}
+
+	dci = edac_device_alloc_ctl_info(sizeof(*drvdata), ecc_name,
+					 1, ecc_name, 1, 0, NULL, 0,
+					 dev_instance++);
+
+	if (!dci) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "%s: Unable to allocate EDAC device\n", ecc_name);
+		return -ENOMEM;
+	}
+
+	drvdata = dci->pvt_info;
+	dci->dev = &pdev->dev;
+	platform_set_drvdata(pdev, dci);
+	drvdata->edac_dev_name = ecc_name;
+
+	drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
+	if (!drvdata->base)
+		goto err;
+
+	/* Get driver specific data for this EDAC device */
+	drvdata->data = of_match_node(altr_edac_device_of_match, np)->data;
+
+	/* Check specific dependencies for the module */
+	if (drvdata->data->setup) {
+		res = drvdata->data->setup(pdev, drvdata->base);
+		if (res < 0)
+			goto err;
+	}
+
+	drvdata->sb_irq = platform_get_irq(pdev, 0);
+	res = devm_request_irq(&pdev->dev, drvdata->sb_irq,
+			       altr_edac_device_handler,
+			       0, dev_name(&pdev->dev), dci);
+	if (res < 0)
+		goto err;
+
+	drvdata->db_irq = platform_get_irq(pdev, 1);
+	res = devm_request_irq(&pdev->dev, drvdata->db_irq,
+			       altr_edac_device_handler,
+			       0, dev_name(&pdev->dev), dci);
+	if (res < 0)
+		goto err;
+
+	dci->mod_name = "Altera EDAC Memory";
+	dci->dev_name = drvdata->edac_dev_name;
+
+	altr_set_sysfs_attr(dci, drvdata->data);
+
+	if (edac_device_add_device(dci))
+		goto err;
+
+	devres_close_group(&pdev->dev, NULL);
+
+	return 0;
+err:
+	edac_printk(KERN_ERR, EDAC_DEVICE,
+		    "%s:Error setting up EDAC device: %d\n", ecc_name, res);
+	devres_release_group(&pdev->dev, NULL);
+	edac_device_free_ctl_info(dci);
+
+	return res;
+}
+
+static int altr_edac_device_remove(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
+
+	edac_device_del_device(&pdev->dev);
+	edac_device_free_ctl_info(dci);
+
+	return 0;
+}
+
+static struct platform_driver altr_edac_device_driver = {
+	.probe =  altr_edac_device_probe,
+	.remove = altr_edac_device_remove,
+	.driver = {
+		.name = "altr_edac_device",
+		.of_match_table = altr_edac_device_of_match,
+	},
+};
+module_platform_driver(altr_edac_device_driver);
+
+/*********************** OCRAM EDAC Device Functions *********************/
+
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
+
+static void *ocram_alloc_mem(size_t size, void **other)
+{
+	struct device_node *np;
+	struct gen_pool *gp;
+	void *sram_addr;
+
+	np = of_find_compatible_node(NULL, NULL, "altr,ocram-edac");
+	if (!np)
+		return NULL;
+
+	gp = of_gen_pool_get(np, "iram", 0);
+	if (!gp) {
+		of_node_put(np);
+		return NULL;
+	}
+	of_node_put(np);
+
+	sram_addr = (void *)gen_pool_alloc(gp, size / sizeof(size_t));
+	if (!sram_addr)
+		return NULL;
+
+	memset(sram_addr, 0, size);
+	wmb();	/* Ensure data is written out */
+
+	*other = gp;	/* Remember this handle for freeing  later */
+
+	return sram_addr;
+}
+
+static void ocram_free_mem(void *p, size_t size, void *other)
+{
+	gen_pool_free((struct gen_pool *)other, (u32)p, size / sizeof(size_t));
+}
+
+static struct edac_dev_sysfs_attribute altr_ocr_sysfs_attributes[] = {
+	{
+		.attr = { .name = "altr_ocram_trigger",
+			  .mode = (S_IRUGO | S_IWUSR) },
+		.show = NULL,
+		.store = altr_edac_device_trig
+	},
+	{
+		.attr = {.name = NULL }
+	}
+};
+
+/*
+ * altr_ocram_dependencies()
+ *	Test for OCRAM cache ECC dependencies upon entry because
+ *	platform specific startup should have initialized the
+ *	On-Chip RAM memory and enabled the ECC.
+ *	Can't turn on ECC here because accessing un-initialized
+ *	memory will cause CE/UE errors possibly causing an ABORT.
+ */
+static int altr_ocram_dependencies(struct platform_device *pdev,
+				   void __iomem *base)
+{
+	u32 control;
+
+	control = readl(base) & ALTR_OCR_ECC_EN_MASK;
+	if (control)
+		return 0;
+
+	edac_printk(KERN_ERR, EDAC_DEVICE,
+		    "OCRAM: No ECC present or ECC disabled.\n");
+	return -ENODEV;
+}
+
+const struct edac_device_prv_data ocramecc_data = {
+	.setup = altr_ocram_dependencies,
+	.ce_clear_mask = (ALTR_OCR_ECC_EN_MASK | ALTR_OCR_ECC_SERR_MASK),
+	.ue_clear_mask = (ALTR_OCR_ECC_EN_MASK | ALTR_OCR_ECC_DERR_MASK),
+	.eccmgr_sysfs_attr = altr_ocr_sysfs_attributes,
+	.alloc_mem = ocram_alloc_mem,
+	.free_mem = ocram_free_mem,
+	.ecc_enable_mask = ALTR_OCR_ECC_EN_MASK,
+	.ce_set_mask = (ALTR_OCR_ECC_EN_MASK | ALTR_OCR_ECC_INJS_MASK),
+	.ue_set_mask = (ALTR_OCR_ECC_EN_MASK | ALTR_OCR_ECC_INJD_MASK),
+	.trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE,
+};
+
+#endif	/* #ifdef CONFIG_EDAC_ALTERA_OCRAM */
+
+/********************* L2 Cache EDAC Device Functions ********************/
+
+#ifdef CONFIG_EDAC_ALTERA_L2C
+
+static void *l2_alloc_mem(size_t size, void **other)
+{
+	struct device *dev = *other;
+	void *ptemp = devm_kzalloc(dev, size, GFP_KERNEL);
+
+	if (!ptemp)
+		return NULL;
+
+	/* Make sure everything is written out */
+	wmb();
+
+	/*
+	 * Clean all cache levels up to LoC (includes L2)
+	 * This ensures the corrupted data is written into
+	 * L2 cache for readback test (which causes ECC error).
+	 */
+	flush_cache_all();
+
+	return ptemp;
+}
+
+static void l2_free_mem(void *p, size_t size, void *other)
+{
+	struct device *dev = other;
+
+	if (dev && p)
+		devm_kfree(dev, p);
+}
+
+static struct edac_dev_sysfs_attribute altr_l2_sysfs_attributes[] = {
+	{
+		.attr = { .name = "altr_l2_trigger",
+			  .mode = (S_IRUGO | S_IWUSR) },
+		.show = NULL,
+		.store = altr_edac_device_trig
+	},
+	{
+		.attr = {.name = NULL }
+	}
+};
+
+/*
+ * altr_l2_dependencies()
+ *	Test for L2 cache ECC dependencies upon entry because
+ *	platform specific startup should have initialized the L2
+ *	memory and enabled the ECC.
+ *	Bail if ECC is not enabled.
+ *	Note that L2 Cache Enable is forced at build time.
+ */
+static int altr_l2_dependencies(struct platform_device *pdev,
+				void __iomem *base)
+{
+	u32 control;
+
+	control = readl(base) & ALTR_L2_ECC_EN_MASK;
+	if (!control) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "L2: No ECC present, or ECC disabled\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+const struct edac_device_prv_data l2ecc_data = {
+	.setup = altr_l2_dependencies,
+	.ce_clear_mask = 0,
+	.ue_clear_mask = 0,
+	.eccmgr_sysfs_attr = altr_l2_sysfs_attributes,
+	.alloc_mem = l2_alloc_mem,
+	.free_mem = l2_free_mem,
+	.ecc_enable_mask = ALTR_L2_ECC_EN_MASK,
+	.ce_set_mask = (ALTR_L2_ECC_EN_MASK | ALTR_L2_ECC_INJS_MASK),
+	.ue_set_mask = (ALTR_L2_ECC_EN_MASK | ALTR_L2_ECC_INJD_MASK),
+	.trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+};
+
+#endif	/* #ifdef CONFIG_EDAC_ALTERA_L2C */
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Thor Thayer");
-MODULE_DESCRIPTION("EDAC Driver for Altera SDRAM Controller");
+MODULE_DESCRIPTION("EDAC Driver for Altera Memories");
-- 
2.4.5


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2015-10-27 20:38 [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support dinguyen
@ 2015-11-19 18:34 ` Borislav Petkov
  2016-01-04 17:17   ` Dinh Nguyen
  0 siblings, 1 reply; 16+ messages in thread
From: Borislav Petkov @ 2015-11-19 18:34 UTC (permalink / raw)
  To: dinguyen
  Cc: tthayer, dinh.linux, dougthompson, mchehab, linux, linux-edac,
	linux-kernel

On Tue, Oct 27, 2015 at 03:38:12PM -0500, dinguyen@opensource.altera.com wrote:
> From: Thor Thayer <tthayer@opensource.altera.com>
> 
> Adding L2 Cache and On-Chip RAM EDAC support for the
> Altera SoCs using the EDAC device  model. The SDRAM
> controller is using the Memory Controller model.
> 
> Each type of ECC is individually configurable.
> 
> The SDRAM ECC is a separate Kconfig option because:
> 1) the SDRAM preparation can take almost 2 seconds on boot and some
> customers need a faster boot time.
> 2) the SDRAM has an ECC initialization dependency on the preloader
> which is outside the kernel. It is desirable to be able to turn the
> SDRAM on & off separately.
> 
> Signed-off-by: Thor Thayer <tthayer@opensource.altera.com>
> Signed-off-by: Dinh Nguyen <dinguyen@opensource.altera.com>
> ---
> v7: s/of_get_named_gen_pool/of_gen_pool_get
>     Remove #ifdef for EDAC_DEBUG
>     Use -ENODEV instead of EPROBE_DEFER
> 
> v6: Convert to nested EDAC in device tree. Force L2 cache
>     on for L2Cache ECC & remove L2 cache syscon for checking
>     enable bit. Update year in header.
> 
> v5: No Change
> 
> v4: Change mask defines to use BIT().
>     Fix comment style to agree with kernel coding style.
>     Better printk description for read != write in trigger.
>     Remove SysFS debugging message.
>     Better dci->mod_name
>     Move gen_pool pointer assignment to end of function.
>     Invert logic to reduce indent in ocram depenency check.
>     Change from dev_err() to edac_printk()
>     Replace magic numbers with defines & comments.
>     Improve error injection test.
>     Change Makefile intermediary name to altr (from alt)
> 
> v3: Move OCRAM and L2 cache EDAC functions into altera_edac.c
>     instead of separate files.
> 
> v2: Fix L2 dependency comments.
> ---
>  drivers/edac/Kconfig       |  16 ++
>  drivers/edac/Makefile      |   5 +-
>  drivers/edac/altera_edac.c | 488 ++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 507 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index ef25000..b80b4ad 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -376,6 +376,22 @@ config EDAC_ALTERA_MC
>  	  preloader must initialize the SDRAM before loading
>  	  the kernel.
>  
> +config EDAC_ALTERA_L2C
> +	bool "Altera L2 Cache EDAC"
> +	depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
> +	select CACHE_L2X0
> +	help
> +	  Support for error detection and correction on the
> +	  Altera L2 cache Memory for Altera SoCs. This option
> +          requires L2 cache so it will force that selection.
> +
> +config EDAC_ALTERA_OCRAM
> +	bool "Altera On-Chip RAM EDAC"
> +	depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA && SRAM && GENERIC_ALLOCATOR
> +	help
> +	  Support for error detection and correction on the
> +	  Altera On-Chip RAM Memory for Altera SoCs.
> +
>  config EDAC_SYNOPSYS
>  	tristate "Synopsys DDR Memory Controller"
>  	depends on EDAC_MM_EDAC && ARCH_ZYNQ
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index dbf53e0..8f1c6fc 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -67,6 +67,9 @@ obj-$(CONFIG_EDAC_OCTEON_L2C)		+= octeon_edac-l2c.o
>  obj-$(CONFIG_EDAC_OCTEON_LMC)		+= octeon_edac-lmc.o
>  obj-$(CONFIG_EDAC_OCTEON_PCI)		+= octeon_edac-pci.o
>  
> -obj-$(CONFIG_EDAC_ALTERA_MC)		+= altera_edac.o
> +altr_edac-y				:= altera_edac.o
> +obj-$(CONFIG_EDAC_ALTERA_MC)		+= altr_edac.o
> +obj-$(CONFIG_EDAC_ALTERA_L2C)		+= altr_edac.o
> +obj-$(CONFIG_EDAC_ALTERA_OCRAM)		+= altr_edac.o

What are those supposed to accomplish?

>  obj-$(CONFIG_EDAC_SYNOPSYS)		+= synopsys_edac.o
>  obj-$(CONFIG_EDAC_XGENE)		+= xgene_edac.o
> diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
> index 9296409..154ac8c 100644
> --- a/drivers/edac/altera_edac.c
> +++ b/drivers/edac/altera_edac.c
> @@ -17,8 +17,10 @@
>   * Adapted from the highbank_mc_edac driver.
>   */
>  
> +#include <asm/cacheflush.h>
>  #include <linux/ctype.h>
>  #include <linux/edac.h>
> +#include <linux/genalloc.h>
>  #include <linux/interrupt.h>
>  #include <linux/kernel.h>
>  #include <linux/mfd/syscon.h>
> @@ -34,6 +36,7 @@
>  
>  #define EDAC_MOD_STR		"altera_edac"
>  #define EDAC_VERSION		"1"
> +#define EDAC_DEVICE		"ALTR_MEM"

Let's simply call it "Altera" - it is more human-friendly.

>  static const struct altr_sdram_prv_data c5_data = {
>  	.ecc_ctrl_offset    = CV_CTLCFG_OFST,
> @@ -75,6 +78,33 @@ static const struct altr_sdram_prv_data a10_data = {
>  	.ue_set_mask        = A10_DIAGINT_TDERRA_MASK,
>  };
>  
> +/************************** EDAC Device Defines **************************/
> +
> +/* OCRAM ECC Management Group Defines */
> +#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET   0x04
> +#define ALTR_OCR_ECC_EN_MASK            BIT(0)
> +#define ALTR_OCR_ECC_INJS_MASK          BIT(1)
> +#define ALTR_OCR_ECC_INJD_MASK          BIT(2)
> +#define ALTR_OCR_ECC_SERR_MASK          BIT(3)
> +#define ALTR_OCR_ECC_DERR_MASK          BIT(4)
> +
> +/* L2 ECC Management Group Defines */
> +#define ALTR_MAN_GRP_L2_ECC_OFFSET      0x00
> +#define ALTR_L2_ECC_EN_MASK             BIT(0)
> +#define ALTR_L2_ECC_INJS_MASK           BIT(1)
> +#define ALTR_L2_ECC_INJD_MASK           BIT(2)

Single bit masks? You don't need to call them "_MASK" - simply remove
it.

> +#define ALTR_UE_TRIGGER_CHAR            'U'   /* Trigger for UE */
> +#define ALTR_TRIGGER_READ_WRD_CNT       32    /* Line size x 4 */
> +#define ALTR_TRIG_OCRAM_BYTE_SIZE       128   /* Line size x 4 */
> +#define ALTR_TRIG_L2C_BYTE_SIZE         4096  /* Full Page */
> +
> +/*********************** EDAC Memory Controller Functions ****************/
> +
> +/* The SDRAM controller uses the EDAC Memory Controller framework.       */
> +
> +#ifdef CONFIG_EDAC_ALTERA_MC
> +
>  static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
>  {
>  	struct mem_ctl_info *mci = dev_id;
> @@ -504,6 +534,462 @@ static struct platform_driver altr_sdram_edac_driver = {
>  
>  module_platform_driver(altr_sdram_edac_driver);
>  
> +#endif		/* #ifdef CONFIG_EDAC_ALTERA_MC */

#endif /* CONFIG_EDAC_ALTERA_MC */

is the usual syntax for those.

> +/************************* EDAC Parent Probe *************************/
> +
> +static const struct of_device_id altr_edac_device_of_match[];
> +
> +static const struct of_device_id altr_edac_of_match[] = {
> +	{ .compatible = "altr,edac" },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, altr_edac_of_match);
> +
> +static int altr_edac_probe(struct platform_device *pdev)
> +{
> +	of_platform_populate(pdev->dev.of_node, altr_edac_device_of_match,
> +			     NULL, &pdev->dev);
> +	return 0;
> +}
> +
> +static struct platform_driver altr_edac_driver = {
> +	.probe =  altr_edac_probe,
> +	.driver = {
> +		.name = "altr_edac",
> +		.of_match_table = altr_edac_of_match,
> +	},
> +};
> +module_platform_driver(altr_edac_driver);

WARNING: DT compatible string "altr,edac" appears un-documented -- check ./Documentation/devicetree/bindings/
#176: FILE: drivers/edac/altera_edac.c:543:
+       { .compatible = "altr,edac" },

WARNING: DT compatible string "altr,l2-edac" appears un-documented -- check ./Documentation/devicetree/bindings/
#326: FILE: drivers/edac/altera_edac.c:693:
+       { .compatible = "altr,l2-edac", .data = (void *)&l2ecc_data },

WARNING: DT compatible string "altr,ocram-edac" appears un-documented -- check ./Documentation/devicetree/bindings/
#329: FILE: drivers/edac/altera_edac.c:696:
+       { .compatible = "altr,ocram-edac", .data = (void *)&ocramecc_data },

> +
> +/************************* EDAC Device Functions *************************/
> +
> +/*
> + * EDAC Device Functions (shared between various IPs).
> + * The discrete memories use the EDAC Device framework. The probe
> + * and error handling functions are very similar between memories
> + * so they are shared. The memory allocation and free for EDAC trigger

						and freeing

> + * testing are different for each memory.
> + */
> +
> +const struct edac_device_prv_data ocramecc_data;
> +const struct edac_device_prv_data l2ecc_data;
> +
> +struct edac_device_prv_data {
> +	int (*setup)(struct platform_device *pdev, void __iomem *base);
> +	int ce_clear_mask;
> +	int ue_clear_mask;
> +	struct edac_dev_sysfs_attribute *eccmgr_sysfs_attr;
> +	void * (*alloc_mem)(size_t size, void **other);
> +	void (*free_mem)(void *p, size_t size, void *other);
> +	int ecc_enable_mask;
> +	int ce_set_mask;
> +	int ue_set_mask;
> +	int trig_alloc_sz;
> +};
> +
> +struct altr_edac_device_dev {
> +	void __iomem *base;
> +	int sb_irq;
> +	int db_irq;
> +	const struct edac_device_prv_data *data;
> +	char *edac_dev_name;
> +};
> +
> +static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
> +{
> +	struct edac_device_ctl_info *dci = dev_id;
> +	struct altr_edac_device_dev *drvdata = dci->pvt_info;
> +	const struct edac_device_prv_data *priv = drvdata->data;
> +
> +	if (irq == drvdata->sb_irq) {
> +		if (priv->ce_clear_mask)
> +			writel(priv->ce_clear_mask, drvdata->base);
> +		edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name);
> +	}
> +	if (irq == drvdata->db_irq) {
> +		if (priv->ue_clear_mask)
> +			writel(priv->ue_clear_mask, drvdata->base);
> +		edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name);
> +		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
> +	}
> +
> +	return IRQ_HANDLED;
> +}
> +
> +ssize_t altr_edac_device_trig(struct edac_device_ctl_info *edac_dci,
> +			      const char *buffer, size_t count)

Is that an error injection function? If so, it should be behind
CONFIG_EDAC_DEBUG or an altera-specific Kconfig item which people can
enable - you don't want people to be able to inject errors on production
systems.

Also, those injection facilities should be in debugfs and not sysfs -
look at xgene_edac for an example.

I'll stop here - that's enough TODO for now :-)

Thanks.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2015-11-19 18:34 ` Borislav Petkov
@ 2016-01-04 17:17   ` Dinh Nguyen
  2016-01-04 19:46     ` Borislav Petkov
  0 siblings, 1 reply; 16+ messages in thread
From: Dinh Nguyen @ 2016-01-04 17:17 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: tthayer, dinh.linux, dougthompson, mchehab, linux, linux-edac,
	linux-kernel

On 11/19/2015 12:34 PM, Borislav Petkov wrote:
> On Tue, Oct 27, 2015 at 03:38:12PM -0500, dinguyen@opensource.altera.com wrote:
>> From: Thor Thayer <tthayer@opensource.altera.com>
>>
>> Adding L2 Cache and On-Chip RAM EDAC support for the
>> Altera SoCs using the EDAC device  model. The SDRAM
>> controller is using the Memory Controller model.
>>
>> Each type of ECC is individually configurable.
>>
>> The SDRAM ECC is a separate Kconfig option because:
>> 1) the SDRAM preparation can take almost 2 seconds on boot and some
>> customers need a faster boot time.
>> 2) the SDRAM has an ECC initialization dependency on the preloader
>> which is outside the kernel. It is desirable to be able to turn the
>> SDRAM on & off separately.
>>
>> Signed-off-by: Thor Thayer <tthayer@opensource.altera.com>
>> Signed-off-by: Dinh Nguyen <dinguyen@opensource.altera.com>
>> ---
>> v7: s/of_get_named_gen_pool/of_gen_pool_get
>>     Remove #ifdef for EDAC_DEBUG
>>     Use -ENODEV instead of EPROBE_DEFER
>>
>> v6: Convert to nested EDAC in device tree. Force L2 cache
>>     on for L2Cache ECC & remove L2 cache syscon for checking
>>     enable bit. Update year in header.
>>
>> v5: No Change
>>
>> v4: Change mask defines to use BIT().
>>     Fix comment style to agree with kernel coding style.
>>     Better printk description for read != write in trigger.
>>     Remove SysFS debugging message.
>>     Better dci->mod_name
>>     Move gen_pool pointer assignment to end of function.
>>     Invert logic to reduce indent in ocram depenency check.
>>     Change from dev_err() to edac_printk()
>>     Replace magic numbers with defines & comments.
>>     Improve error injection test.
>>     Change Makefile intermediary name to altr (from alt)
>>
>> v3: Move OCRAM and L2 cache EDAC functions into altera_edac.c
>>     instead of separate files.
>>
>> v2: Fix L2 dependency comments.
>> ---
>>  drivers/edac/Kconfig       |  16 ++
>>  drivers/edac/Makefile      |   5 +-
>>  drivers/edac/altera_edac.c | 488 ++++++++++++++++++++++++++++++++++++++++++++-
>>  3 files changed, 507 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
>> index ef25000..b80b4ad 100644
>> --- a/drivers/edac/Kconfig
>> +++ b/drivers/edac/Kconfig

[snip]

>> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
>> index dbf53e0..8f1c6fc 100644
>> --- a/drivers/edac/Makefile
>> +++ b/drivers/edac/Makefile
>> @@ -67,6 +67,9 @@ obj-$(CONFIG_EDAC_OCTEON_L2C)		+= octeon_edac-l2c.o
>>  obj-$(CONFIG_EDAC_OCTEON_LMC)		+= octeon_edac-lmc.o
>>  obj-$(CONFIG_EDAC_OCTEON_PCI)		+= octeon_edac-pci.o
>>  
>> -obj-$(CONFIG_EDAC_ALTERA_MC)		+= altera_edac.o
>> +altr_edac-y				:= altera_edac.o
>> +obj-$(CONFIG_EDAC_ALTERA_MC)		+= altr_edac.o
>> +obj-$(CONFIG_EDAC_ALTERA_L2C)		+= altr_edac.o
>> +obj-$(CONFIG_EDAC_ALTERA_OCRAM)		+= altr_edac.o
> 
> What are those supposed to accomplish?
> 

We tried to jam the L2 and OCRAM EDAC functionality in the same
altr_edac.c file. It looks like it might be clean if we split out the L2
and OCRAM functions into their appropriate files(altr_edac_l2.c and
altr_edac_ocram.c). Do you agree?

>>  obj-$(CONFIG_EDAC_SYNOPSYS)		+= synopsys_edac.o
>>  obj-$(CONFIG_EDAC_XGENE)		+= xgene_edac.o
>> diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
>> index 9296409..154ac8c 100644
>> --- a/drivers/edac/altera_edac.c
>> +++ b/drivers/edac/altera_edac.c
>> @@ -17,8 +17,10 @@
>>   * Adapted from the highbank_mc_edac driver.
>>   */
>>  
>> +#include <asm/cacheflush.h>
>>  #include <linux/ctype.h>
>>  #include <linux/edac.h>
>> +#include <linux/genalloc.h>
>>  #include <linux/interrupt.h>
>>  #include <linux/kernel.h>
>>  #include <linux/mfd/syscon.h>
>> @@ -34,6 +36,7 @@
>>  
>>  #define EDAC_MOD_STR		"altera_edac"
>>  #define EDAC_VERSION		"1"
>> +#define EDAC_DEVICE		"ALTR_MEM"
> 
> Let's simply call it "Altera" - it is more human-friendly.
> 

Ok.

>>  static const struct altr_sdram_prv_data c5_data = {
>>  	.ecc_ctrl_offset    = CV_CTLCFG_OFST,
>> @@ -75,6 +78,33 @@ static const struct altr_sdram_prv_data a10_data = {
>>  	.ue_set_mask        = A10_DIAGINT_TDERRA_MASK,
>>  };
>>  
>> +/************************** EDAC Device Defines **************************/
>> +
>> +/* OCRAM ECC Management Group Defines */
>> +#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET   0x04
>> +#define ALTR_OCR_ECC_EN_MASK            BIT(0)
>> +#define ALTR_OCR_ECC_INJS_MASK          BIT(1)
>> +#define ALTR_OCR_ECC_INJD_MASK          BIT(2)
>> +#define ALTR_OCR_ECC_SERR_MASK          BIT(3)
>> +#define ALTR_OCR_ECC_DERR_MASK          BIT(4)
>> +
>> +/* L2 ECC Management Group Defines */
>> +#define ALTR_MAN_GRP_L2_ECC_OFFSET      0x00
>> +#define ALTR_L2_ECC_EN_MASK             BIT(0)
>> +#define ALTR_L2_ECC_INJS_MASK           BIT(1)
>> +#define ALTR_L2_ECC_INJD_MASK           BIT(2)
> 
> Single bit masks? You don't need to call them "_MASK" - simply remove
> it.
> 

Ok.

>> +#define ALTR_UE_TRIGGER_CHAR            'U'   /* Trigger for UE */
>> +#define ALTR_TRIGGER_READ_WRD_CNT       32    /* Line size x 4 */
>> +#define ALTR_TRIG_OCRAM_BYTE_SIZE       128   /* Line size x 4 */
>> +#define ALTR_TRIG_L2C_BYTE_SIZE         4096  /* Full Page */
>> +
>> +/*********************** EDAC Memory Controller Functions ****************/
>> +
>> +/* The SDRAM controller uses the EDAC Memory Controller framework.       */
>> +
>> +#ifdef CONFIG_EDAC_ALTERA_MC
>> +
>>  static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
>>  {
>>  	struct mem_ctl_info *mci = dev_id;
>> @@ -504,6 +534,462 @@ static struct platform_driver altr_sdram_edac_driver = {
>>  
>>  module_platform_driver(altr_sdram_edac_driver);
>>  
>> +#endif		/* #ifdef CONFIG_EDAC_ALTERA_MC */
> 
> #endif /* CONFIG_EDAC_ALTERA_MC */
> 
> is the usual syntax for those.
> 

Ok..

>> +/************************* EDAC Parent Probe *************************/
>> +
>> +static const struct of_device_id altr_edac_device_of_match[];
>> +
>> +static const struct of_device_id altr_edac_of_match[] = {
>> +	{ .compatible = "altr,edac" },
>> +	{},
>> +};
>> +MODULE_DEVICE_TABLE(of, altr_edac_of_match);
>> +
>> +static int altr_edac_probe(struct platform_device *pdev)
>> +{
>> +	of_platform_populate(pdev->dev.of_node, altr_edac_device_of_match,
>> +			     NULL, &pdev->dev);
>> +	return 0;
>> +}
>> +
>> +static struct platform_driver altr_edac_driver = {
>> +	.probe =  altr_edac_probe,
>> +	.driver = {
>> +		.name = "altr_edac",
>> +		.of_match_table = altr_edac_of_match,
>> +	},
>> +};
>> +module_platform_driver(altr_edac_driver);
> 
> WARNING: DT compatible string "altr,edac" appears un-documented -- check ./Documentation/devicetree/bindings/
> #176: FILE: drivers/edac/altera_edac.c:543:
> +       { .compatible = "altr,edac" },
> 
> WARNING: DT compatible string "altr,l2-edac" appears un-documented -- check ./Documentation/devicetree/bindings/
> #326: FILE: drivers/edac/altera_edac.c:693:
> +       { .compatible = "altr,l2-edac", .data = (void *)&l2ecc_data },
> 
> WARNING: DT compatible string "altr,ocram-edac" appears un-documented -- check ./Documentation/devicetree/bindings/
> #329: FILE: drivers/edac/altera_edac.c:696:
> +       { .compatible = "altr,ocram-edac", .data = (void *)&ocramecc_data },
> 

These bindings were in a separate patch that you were not CC'd. I'll
keep you on the entire patch set in the future.


>> +
>> +/************************* EDAC Device Functions *************************/
>> +
>> +/*
>> + * EDAC Device Functions (shared between various IPs).
>> + * The discrete memories use the EDAC Device framework. The probe
>> + * and error handling functions are very similar between memories
>> + * so they are shared. The memory allocation and free for EDAC trigger
> 
> 						and freeing
> 
>> + * testing are different for each memory.
>> + */
>> +
>> +const struct edac_device_prv_data ocramecc_data;
>> +const struct edac_device_prv_data l2ecc_data;
>> +
>> +struct edac_device_prv_data {
>> +	int (*setup)(struct platform_device *pdev, void __iomem *base);
>> +	int ce_clear_mask;
>> +	int ue_clear_mask;
>> +	struct edac_dev_sysfs_attribute *eccmgr_sysfs_attr;
>> +	void * (*alloc_mem)(size_t size, void **other);
>> +	void (*free_mem)(void *p, size_t size, void *other);
>> +	int ecc_enable_mask;
>> +	int ce_set_mask;
>> +	int ue_set_mask;
>> +	int trig_alloc_sz;
>> +};
>> +
>> +struct altr_edac_device_dev {
>> +	void __iomem *base;
>> +	int sb_irq;
>> +	int db_irq;
>> +	const struct edac_device_prv_data *data;
>> +	char *edac_dev_name;
>> +};
>> +
>> +static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
>> +{
>> +	struct edac_device_ctl_info *dci = dev_id;
>> +	struct altr_edac_device_dev *drvdata = dci->pvt_info;
>> +	const struct edac_device_prv_data *priv = drvdata->data;
>> +
>> +	if (irq == drvdata->sb_irq) {
>> +		if (priv->ce_clear_mask)
>> +			writel(priv->ce_clear_mask, drvdata->base);
>> +		edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name);
>> +	}
>> +	if (irq == drvdata->db_irq) {
>> +		if (priv->ue_clear_mask)
>> +			writel(priv->ue_clear_mask, drvdata->base);
>> +		edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name);
>> +		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
>> +	}
>> +
>> +	return IRQ_HANDLED;
>> +}
>> +
>> +ssize_t altr_edac_device_trig(struct edac_device_ctl_info *edac_dci,
>> +			      const char *buffer, size_t count)
> 
> Is that an error injection function? If so, it should be behind
> CONFIG_EDAC_DEBUG or an altera-specific Kconfig item which people can
> enable - you don't want people to be able to inject errors on production
> systems.
> 
> Also, those injection facilities should be in debugfs and not sysfs -
> look at xgene_edac for an example.

Yes, there is an injection, and it is using sysfs. Will have to update.

> 
> I'll stop here - that's enough TODO for now :-)
> 

Yes, I'll address all of the above comments in v9.

Thanks for the review.

Dinh


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 17:17   ` Dinh Nguyen
@ 2016-01-04 19:46     ` Borislav Petkov
  2016-01-04 20:04       ` Dinh Nguyen
  0 siblings, 1 reply; 16+ messages in thread
From: Borislav Petkov @ 2016-01-04 19:46 UTC (permalink / raw)
  To: Dinh Nguyen
  Cc: tthayer, dinh.linux, dougthompson, mchehab, linux, linux-edac,
	linux-kernel

On Mon, Jan 04, 2016 at 11:17:29AM -0600, Dinh Nguyen wrote:
> We tried to jam the L2 and OCRAM EDAC functionality in the same
> altr_edac.c file. It looks like it might be clean if we split out the L2
> and OCRAM functions into their appropriate files(altr_edac_l2.c and
> altr_edac_ocram.c). Do you agree?

"Clean" in what sense? To me clean is when there's a single compilation
unit altera_edac.c which contains all Altera-specific code.

> These bindings were in a separate patch that you were not CC'd. I'll
> keep you on the entire patch set in the future.

Yes, I believe the devicetree definitions need to go hand-in-hand with
its user(s).

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 19:46     ` Borislav Petkov
@ 2016-01-04 20:04       ` Dinh Nguyen
  2016-01-04 20:30         ` Borislav Petkov
  0 siblings, 1 reply; 16+ messages in thread
From: Dinh Nguyen @ 2016-01-04 20:04 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: tthayer, dinh.linux, dougthompson, mchehab, linux, linux-edac,
	linux-kernel

On 01/04/2016 01:46 PM, Borislav Petkov wrote:
> On Mon, Jan 04, 2016 at 11:17:29AM -0600, Dinh Nguyen wrote:
>> We tried to jam the L2 and OCRAM EDAC functionality in the same
>> altr_edac.c file. It looks like it might be clean if we split out the L2
>> and OCRAM functions into their appropriate files(altr_edac_l2.c and
>> altr_edac_ocram.c). Do you agree?
> 
> "Clean" in what sense? To me clean is when there's a single compilation
> unit altera_edac.c which contains all Altera-specific code.
> 

altr_edac.c originally added support for SDRAM. Now we're adding support
for L2 and OCRAM into the same file by using #ifdef
CONFIG_EDAC_ALTERA_OCRAM and CONFIG_EDAC_ALTERA_L2C. So "clean" was to
move the l2 and ocram implementation into separate files.

Dinh

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 20:04       ` Dinh Nguyen
@ 2016-01-04 20:30         ` Borislav Petkov
  2016-01-04 20:46           ` Dinh Nguyen
  0 siblings, 1 reply; 16+ messages in thread
From: Borislav Petkov @ 2016-01-04 20:30 UTC (permalink / raw)
  To: Dinh Nguyen
  Cc: tthayer, dinh.linux, dougthompson, mchehab, linux, linux-edac,
	linux-kernel

On Mon, Jan 04, 2016 at 02:04:08PM -0600, Dinh Nguyen wrote:
> altr_edac.c originally added support for SDRAM. Now we're adding support
> for L2 and OCRAM into the same file by using #ifdef
> CONFIG_EDAC_ALTERA_OCRAM and CONFIG_EDAC_ALTERA_L2C. So "clean" was to
> move the l2 and ocram implementation into separate files.

So what's wrong with adding those to the same file and using different
registration functions? Like xgene_edac does, for example.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 20:30         ` Borislav Petkov
@ 2016-01-04 20:46           ` Dinh Nguyen
  2016-01-04 20:59             ` Borislav Petkov
  0 siblings, 1 reply; 16+ messages in thread
From: Dinh Nguyen @ 2016-01-04 20:46 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: tthayer, dinh.linux, dougthompson, mchehab, linux, linux-edac,
	linux-kernel

On 01/04/2016 02:30 PM, Borislav Petkov wrote:
> On Mon, Jan 04, 2016 at 02:04:08PM -0600, Dinh Nguyen wrote:
>> altr_edac.c originally added support for SDRAM. Now we're adding support
>> for L2 and OCRAM into the same file by using #ifdef
>> CONFIG_EDAC_ALTERA_OCRAM and CONFIG_EDAC_ALTERA_L2C. So "clean" was to
>> move the l2 and ocram implementation into separate files.
> 
> So what's wrong with adding those to the same file and using different
> registration functions? Like xgene_edac does, for example.
> 

I don't see a way for the xgene to manually build for each configuration
using Kconfig? For SoCFGPA, we would like to keep the option to build
for each type of ECC.

Dinh

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 20:59             ` Borislav Petkov
@ 2016-01-04 20:55               ` Dinh Nguyen
  2016-01-04 21:07                 ` Borislav Petkov
  0 siblings, 1 reply; 16+ messages in thread
From: Dinh Nguyen @ 2016-01-04 20:55 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: tthayer, dinh.linux, dougthompson, mchehab, linux, linux-edac,
	linux-kernel

On 01/04/2016 02:59 PM, Borislav Petkov wrote:
> On Mon, Jan 04, 2016 at 02:46:26PM -0600, Dinh Nguyen wrote:
>> I don't see a way for the xgene to manually build for each configuration
>> using Kconfig? For SoCFGPA, we would like to keep the option to build
>> for each type of ECC.
> 
> xgene builds everything in and unconditionally.
> 

Right. So for us, if we build in SDRAM ECC unconditionally, there is a
requirement with the bootloader to turn on ECC and scrub the memory.
This can take up to ~2 seconds and some may not want that.

Dinh

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 20:46           ` Dinh Nguyen
@ 2016-01-04 20:59             ` Borislav Petkov
  2016-01-04 20:55               ` Dinh Nguyen
  0 siblings, 1 reply; 16+ messages in thread
From: Borislav Petkov @ 2016-01-04 20:59 UTC (permalink / raw)
  To: Dinh Nguyen
  Cc: tthayer, dinh.linux, dougthompson, mchehab, linux, linux-edac,
	linux-kernel

On Mon, Jan 04, 2016 at 02:46:26PM -0600, Dinh Nguyen wrote:
> I don't see a way for the xgene to manually build for each configuration
> using Kconfig? For SoCFGPA, we would like to keep the option to build
> for each type of ECC.

xgene builds everything in and unconditionally.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 20:55               ` Dinh Nguyen
@ 2016-01-04 21:07                 ` Borislav Petkov
  2016-01-04 21:33                   ` Thor Thayer
  0 siblings, 1 reply; 16+ messages in thread
From: Borislav Petkov @ 2016-01-04 21:07 UTC (permalink / raw)
  To: Dinh Nguyen
  Cc: tthayer, dinh.linux, dougthompson, mchehab, linux, linux-edac,
	linux-kernel

On Mon, Jan 04, 2016 at 02:55:43PM -0600, Dinh Nguyen wrote:
> Right. So for us, if we build in SDRAM ECC unconditionally, there is a
> requirement with the bootloader to turn on ECC and scrub the memory.

Huh, how does a built-in piece of code cause the bootloader to do
something?!?

And how would the bootloader know what's in the kernel? The bootloader
runs first and hands off to the kernel...

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 21:07                 ` Borislav Petkov
@ 2016-01-04 21:33                   ` Thor Thayer
  2016-01-04 22:01                     ` Borislav Petkov
  0 siblings, 1 reply; 16+ messages in thread
From: Thor Thayer @ 2016-01-04 21:33 UTC (permalink / raw)
  To: Borislav Petkov, Dinh Nguyen
  Cc: dinh.linux, dougthompson, mchehab, linux, linux-edac, linux-kernel



On 01/04/2016 03:07 PM, Borislav Petkov wrote:
> On Mon, Jan 04, 2016 at 02:55:43PM -0600, Dinh Nguyen wrote:
>> Right. So for us, if we build in SDRAM ECC unconditionally, there is a
>> requirement with the bootloader to turn on ECC and scrub the memory.
>
> Huh, how does a built-in piece of code cause the bootloader to do
> something?!?
>
> And how would the bootloader know what's in the kernel? The bootloader
> runs first and hands off to the kernel...
>
Hi Boris,

The decision about ECC or non-ECC SDRAM is made before building the 
Linux image and must be matched to the appropriate bootloader (ECC or 
non-ECC).

If ECC is desired for SDRAM, the bootloader enables SDRAM ECC and then 
initializes the memory contents (scrub) before the Linux image is loaded 
into SDRAM.

The ECC syndromes are calculated and stored in SDRAM only when SDRAM ECC 
is enabled and the SDRAM data is written (in the bootloader case, this 
is the Linux image). If we suddenly switched ECC on during Linux 
initialization, we'd be flooded with ECC errors since the ECC syndromes 
won't match the data for the Linux image.

The scrubbing process takes more time to boot which some of our 
customers don't want. This is what Dinh was referring to.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 21:33                   ` Thor Thayer
@ 2016-01-04 22:01                     ` Borislav Petkov
  2016-01-04 23:42                       ` Thor Thayer
  0 siblings, 1 reply; 16+ messages in thread
From: Borislav Petkov @ 2016-01-04 22:01 UTC (permalink / raw)
  To: Thor Thayer
  Cc: Dinh Nguyen, dinh.linux, dougthompson, mchehab, linux,
	linux-edac, linux-kernel

On Mon, Jan 04, 2016 at 03:33:23PM -0600, Thor Thayer wrote:
> The decision about ECC or non-ECC SDRAM is made before building the Linux
> image and must be matched to the appropriate bootloader (ECC or non-ECC).
> 
> If ECC is desired for SDRAM, the bootloader enables SDRAM ECC and then
> initializes the memory contents (scrub) before the Linux image is loaded
> into SDRAM.
> 
> The ECC syndromes are calculated and stored in SDRAM only when SDRAM ECC is
> enabled and the SDRAM data is written (in the bootloader case, this is the
> Linux image). If we suddenly switched ECC on during Linux initialization,
> we'd be flooded with ECC errors since the ECC syndromes won't match the data
> for the Linux image.
> 
> The scrubbing process takes more time to boot which some of our customers
> don't want. This is what Dinh was referring to.

So that still doesn't have any effect on what's compiled in the EDAC
module, AFAICT. You simply build everything in and depending on
whether ECC is enabled or not in the bootloader, altera_edac behaves
accordingly. On a system with ECC *not* enabled, it would simply have
the SDRAM ECC functionality inactive.

This is no different than an x86 system where you enter the BIOS and
enable or disable ECC. The EDAC module queries whether ECC has been
enabled or not and behaves accordingly.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 22:01                     ` Borislav Petkov
@ 2016-01-04 23:42                       ` Thor Thayer
  2016-01-05 10:58                         ` Borislav Petkov
  0 siblings, 1 reply; 16+ messages in thread
From: Thor Thayer @ 2016-01-04 23:42 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dinh Nguyen, dinh.linux, dougthompson, mchehab, linux,
	linux-edac, linux-kernel



On 01/04/2016 04:01 PM, Borislav Petkov wrote:
> On Mon, Jan 04, 2016 at 03:33:23PM -0600, Thor Thayer wrote:
>> The decision about ECC or non-ECC SDRAM is made before building the Linux
>> image and must be matched to the appropriate bootloader (ECC or non-ECC).
>>
>> If ECC is desired for SDRAM, the bootloader enables SDRAM ECC and then
>> initializes the memory contents (scrub) before the Linux image is loaded
>> into SDRAM.
>>
>> The ECC syndromes are calculated and stored in SDRAM only when SDRAM ECC is
>> enabled and the SDRAM data is written (in the bootloader case, this is the
>> Linux image). If we suddenly switched ECC on during Linux initialization,
>> we'd be flooded with ECC errors since the ECC syndromes won't match the data
>> for the Linux image.
>>
>> The scrubbing process takes more time to boot which some of our customers
>> don't want. This is what Dinh was referring to.
>
> So that still doesn't have any effect on what's compiled in the EDAC
> module, AFAICT. You simply build everything in and depending on
> whether ECC is enabled or not in the bootloader, altera_edac behaves
> accordingly. On a system with ECC *not* enabled, it would simply have
> the SDRAM ECC functionality inactive.
>
> This is no different than an x86 system where you enter the BIOS and
> enable or disable ECC. The EDAC module queries whether ECC has been
> enabled or not and behaves accordingly.
>
OK. I see your point for SDRAM.

However, in the case of OCRAM and L2 cache ECC, we want to be able to 
enable them individually which is what the following does.

 >> -obj-$(CONFIG_EDAC_ALTERA_MC)		+= altera_edac.o
 >> +altr_edac-y				:= altera_edac.o
 >> +obj-$(CONFIG_EDAC_ALTERA_MC)		+= altr_edac.o
 >> +obj-$(CONFIG_EDAC_ALTERA_L2C)		+= altr_edac.o
 >> +obj-$(CONFIG_EDAC_ALTERA_OCRAM)		+= altr_edac.o
 >
 > What are those supposed to accomplish?
 >

and then the defines are also used to conditionally include the L2 or 
OCRAM ECC functions because everything is in one file.

However, the highbank and octeon edacs are split into separate files for 
L2 which Dinh pointed may be cleaner for individual control.

Thanks,

Thor

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-04 23:42                       ` Thor Thayer
@ 2016-01-05 10:58                         ` Borislav Petkov
  2016-01-05 20:37                           ` Thor Thayer
  0 siblings, 1 reply; 16+ messages in thread
From: Borislav Petkov @ 2016-01-05 10:58 UTC (permalink / raw)
  To: Thor Thayer
  Cc: Dinh Nguyen, dinh.linux, dougthompson, mchehab, linux,
	linux-edac, linux-kernel

On Mon, Jan 04, 2016 at 05:42:40PM -0600, Thor Thayer wrote:
> and then the defines are also used to conditionally include the L2 or OCRAM
> ECC functions because everything is in one file.

So?

You don't have to do those funny games in the Makefile. Instead, you
have your main CONFIG_EDAC_ALTERA_MC option and all the other CONFIG_*
options depend on it. The ifdeffery in altera_edac.c then takes care of
what needs to be enabled or not.

However(!), your driver is not huge or something. So I still don't
understand why you need that split and those additional config options
and why not keep it all together in one file. What is the compelling use
case for that split and additional complexity?

> However, the highbank and octeon edacs are split into separate files for L2
> which Dinh pointed may be cleaner for individual control.

I should've nacked that split at the time.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-05 10:58                         ` Borislav Petkov
@ 2016-01-05 20:37                           ` Thor Thayer
  2016-01-05 21:09                             ` Borislav Petkov
  0 siblings, 1 reply; 16+ messages in thread
From: Thor Thayer @ 2016-01-05 20:37 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Dinh Nguyen, dinh.linux, dougthompson, mchehab, linux,
	linux-edac, linux-kernel



On 01/05/2016 04:58 AM, Borislav Petkov wrote:
> On Mon, Jan 04, 2016 at 05:42:40PM -0600, Thor Thayer wrote:
>> and then the defines are also used to conditionally include the L2 or OCRAM
>> ECC functions because everything is in one file.
>
> So?
>
> You don't have to do those funny games in the Makefile. Instead, you
> have your main CONFIG_EDAC_ALTERA_MC option and all the other CONFIG_*
> options depend on it. The ifdeffery in altera_edac.c then takes care of
> what needs to be enabled or not.
>
> However(!), your driver is not huge or something. So I still don't
> understand why you need that split and those additional config options
> and why not keep it all together in one file. What is the compelling use
> case for that split and additional complexity?
>
>> However, the highbank and octeon edacs are split into separate files for L2
>> which Dinh pointed may be cleaner for individual control.
>
> I should've nacked that split at the time.
>
OK, Thanks for the clarification.

The CONFIG_EDAC_ALTERA_MC is a little confusing because it refers to the 
Memory Controller (SDRAM) and uses that in the menu string(Altera SDRAM 
Memory Controller EDAC).

Would it be confusing to rename this CONFIG_EDAC_ALTERA, update the 
SDRAM code to check the ECC Enable bit instead of this config option and 
update the string in the menu?

I'll keep this as one file and implement your suggested changes and 
resubmit.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support
  2016-01-05 20:37                           ` Thor Thayer
@ 2016-01-05 21:09                             ` Borislav Petkov
  0 siblings, 0 replies; 16+ messages in thread
From: Borislav Petkov @ 2016-01-05 21:09 UTC (permalink / raw)
  To: Thor Thayer
  Cc: Dinh Nguyen, dinh.linux, dougthompson, mchehab, linux,
	linux-edac, linux-kernel

On Tue, Jan 05, 2016 at 02:37:48PM -0600, Thor Thayer wrote:
> The CONFIG_EDAC_ALTERA_MC is a little confusing because it refers to the
> Memory Controller (SDRAM) and uses that in the menu string(Altera SDRAM
> Memory Controller EDAC).
> 
> Would it be confusing to rename this CONFIG_EDAC_ALTERA, update the SDRAM
> code to check the ECC Enable bit instead of this config option and update
> the string in the menu?

Sounds good to me.

> I'll keep this as one file and implement your suggested changes and
> resubmit.

Thanks.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-01-05 21:09 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-27 20:38 [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support dinguyen
2015-11-19 18:34 ` Borislav Petkov
2016-01-04 17:17   ` Dinh Nguyen
2016-01-04 19:46     ` Borislav Petkov
2016-01-04 20:04       ` Dinh Nguyen
2016-01-04 20:30         ` Borislav Petkov
2016-01-04 20:46           ` Dinh Nguyen
2016-01-04 20:59             ` Borislav Petkov
2016-01-04 20:55               ` Dinh Nguyen
2016-01-04 21:07                 ` Borislav Petkov
2016-01-04 21:33                   ` Thor Thayer
2016-01-04 22:01                     ` Borislav Petkov
2016-01-04 23:42                       ` Thor Thayer
2016-01-05 10:58                         ` Borislav Petkov
2016-01-05 20:37                           ` Thor Thayer
2016-01-05 21:09                             ` Borislav Petkov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.