linux-edac.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hanna Hawa <hhhawa@amazon.com>
To: <robh+dt@kernel.org>, <mark.rutland@arm.com>, <bp@alien8.de>,
	<mchehab@kernel.org>, <james.morse@arm.com>,
	<davem@davemloft.net>, <gregkh@linuxfoundation.org>,
	<nicolas.ferre@microchip.com>, <paulmck@linux.ibm.com>,
	<dwmw@amazon.co.uk>, <benh@amazon.com>
Cc: <ronenk@amazon.com>, <talel@amazon.com>, <jonnyc@amazon.com>,
	<hanochu@amazon.com>, <hhhawa@amazon.com>,
	<linux-edac@vger.kernel.org>, <devicetree@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Subject: [PATCH 2/2] edac: add support for Amazon's Annapurna Labs EDAC
Date: Thu, 30 May 2019 13:15:29 +0300	[thread overview]
Message-ID: <1559211329-13098-3-git-send-email-hhhawa@amazon.com> (raw)
In-Reply-To: <1559211329-13098-1-git-send-email-hhhawa@amazon.com>

Add support for error detection and correction for Amazon's Annapurna
Labs SoCs for L1/L2 caches.

Amazon's Annapurna Labs SoCs based on ARM CA57 and CA72, the driver
support both cortex based on compatible string.

Signed-off-by: Hanna Hawa <hhhawa@amazon.com>
---
 MAINTAINERS                        |   7 +
 drivers/edac/Kconfig               |   9 ++
 drivers/edac/Makefile              |   1 +
 drivers/edac/amazon_al_ca57_edac.c | 283 +++++++++++++++++++++++++++++++++++++
 4 files changed, 300 insertions(+)
 create mode 100644 drivers/edac/amazon_al_ca57_edac.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 5cfbea4..87fab6a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5645,6 +5645,13 @@ S:	Supported
 F:	Documentation/filesystems/ecryptfs.txt
 F:	fs/ecryptfs/
 
+EDAC-AMAZON-AL
+M:	Hanna Hawa <hhhawa@amazon.com>
+L:	linux-edac@vger.kernel.org
+S:	Supported
+F:	drivers/edac/amazon_al_ca57_edac.c
+F:	Documentation/devicetree/bindings/edac/amazon-al-edac.txt
+
 EDAC-AMD64
 M:	Borislav Petkov <bp@alien8.de>
 L:	linux-edac@vger.kernel.org
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 5e2e034..1a982f8 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -504,4 +504,13 @@ config EDAC_ASPEED
 	  First, ECC must be configured in the bootloader. Then, this driver
 	  will expose error counters via the EDAC kernel framework.
 
+config EDAC_AMAZON_AL
+	tristate "Amazon AL EDAC"
+	depends on ARCH_ALPINE
+	help
+	  Support for error detection and correction for
+	  Amazon's Annapurna Labs SoCs.
+
+	  This driver detect errors on L1/L2 caches.
+
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 89ad4a84..7e08974 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -84,3 +84,4 @@ obj-$(CONFIG_EDAC_XGENE)		+= xgene_edac.o
 obj-$(CONFIG_EDAC_TI)			+= ti_edac.o
 obj-$(CONFIG_EDAC_QCOM)			+= qcom_edac.o
 obj-$(CONFIG_EDAC_ASPEED)		+= aspeed_edac.o
+obj-$(CONFIG_EDAC_AMAZON_AL)		+= amazon_al_ca57_edac.o
diff --git a/drivers/edac/amazon_al_ca57_edac.c b/drivers/edac/amazon_al_ca57_edac.c
new file mode 100644
index 0000000..08237c0
--- /dev/null
+++ b/drivers/edac/amazon_al_ca57_edac.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ */
+
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "edac_device.h"
+#include "edac_module.h"
+
+#define DRV_NAME				"al_cache_edac"
+
+/* Same bit assignments of CPUMERRSR_EL1 and L2MERRSR_EL1 in ARM CA57/CA72 */
+#define ARM_CA57_CPUMERRSR_INDEX_OFF		(0)
+#define ARM_CA57_CPUMERRSR_INDEX_MASK		(0x3FFFF)
+#define ARM_CA57_CPUMERRSR_BANK_WAY_OFF		(18)
+#define ARM_CA57_CPUMERRSR_BANK_WAY_MASK	(0x1F)
+#define ARM_CA57_CPUMERRSR_RAM_ID_OFF		(24)
+#define ARM_CA57_CPUMERRSR_RAM_ID_MASK		(0x7F)
+#define  ARM_CA57_L1_I_TAG_RAM			0x00
+#define  ARM_CA57_L1_I_DATA_RAM			0x01
+#define  ARM_CA57_L1_D_TAG_RAM			0x08
+#define  ARM_CA57_L1_D_DATA_RAM			0x09
+#define  ARM_CA57_TLB_RAM			0x18
+#define ARM_CA57_CPUMERRSR_VALID_OFF		(31)
+#define ARM_CA57_CPUMERRSR_VALID_MASK		(0x1)
+#define ARM_CA57_CPUMERRSR_REPEAT_OFF		(32)
+#define ARM_CA57_CPUMERRSR_REPEAT_MASK		(0xFF)
+#define ARM_CA57_CPUMERRSR_OTHER_OFF		(40)
+#define ARM_CA57_CPUMERRSR_OTHER_MASK		(0xFF)
+#define ARM_CA57_CPUMERRSR_FATAL_OFF		(63)
+#define ARM_CA57_CPUMERRSR_FATAL_MASK		(0x1)
+
+#define ARM_CA57_L2MERRSR_INDEX_OFF		(0)
+#define ARM_CA57_L2MERRSR_INDEX_MASK		(0x3FFFF)
+#define ARM_CA57_L2MERRSR_CPUID_WAY_OFF		(18)
+#define ARM_CA57_L2MERRSR_CPUID_WAY_MASK	(0xF)
+#define ARM_CA57_L2MERRSR_RAMID_OFF		(24)
+#define ARM_CA57_L2MERRSR_RAMID_MASK		(0x7F)
+#define  ARM_CA57_L2_TAG_RAM			0x10
+#define  ARM_CA57_L2_DATA_RAM			0x11
+#define  ARM_CA57_L2_SNOOP_RAM			0x12
+#define  ARM_CA57_L2_DIRTY_RAM			0x14
+#define  ARM_CA57_L2_INC_PLRU_RAM		0x18
+#define ARM_CA57_L2MERRSR_VALID_OFF		(31)
+#define ARM_CA57_L2MERRSR_VALID_MASK		(0x1)
+#define ARM_CA57_L2MERRSR_REPEAT_OFF		(32)
+#define ARM_CA57_L2MERRSR_REPEAT_MASK		(0xFF)
+#define ARM_CA57_L2MERRSR_OTHER_OFF		(40)
+#define ARM_CA57_L2MERRSR_OTHER_MASK		(0xFF)
+#define ARM_CA57_L2MERRSR_FATAL_OFF		(63)
+#define ARM_CA57_L2MERRSR_FATAL_MASK		(0x1)
+
+static inline u64 read_cpumerrsr_el1(void)
+{
+	u64 val;
+
+	asm volatile("mrs %0, s3_1_c15_c2_2" : "=r" (val));
+
+	return val;
+}
+
+static inline void write_cpumerrsr_el1(u64 val)
+{
+	asm volatile("msr s3_1_c15_c2_2, %0" :: "r" (val));
+}
+
+static inline u64 read_l2merrsr_el1(void)
+{
+	u64 val;
+
+	asm volatile("mrs %0, s3_1_c15_c2_3" : "=r" (val));
+
+	return val;
+}
+
+static inline void write_l2merrsr_el1(u64 val)
+{
+	asm volatile("msr s3_1_c15_c2_3, %0" :: "r" (val));
+}
+
+static void al_a57_edac_cpumerrsr(void *arg)
+{
+	struct edac_device_ctl_info *edac_dev =
+		(struct edac_device_ctl_info *)arg;
+	int cpu;
+	u32 index, way, ramid, repeat, other, fatal;
+	u64 val = read_cpumerrsr_el1();
+
+	/* Return if no valid error */
+	if (!((val >> ARM_CA57_CPUMERRSR_VALID_OFF) &
+	      ARM_CA57_CPUMERRSR_VALID_MASK))
+		return;
+
+	cpu = smp_processor_id();
+	index = (val >> ARM_CA57_CPUMERRSR_INDEX_OFF) &
+		ARM_CA57_CPUMERRSR_INDEX_MASK;
+	way = (val >> ARM_CA57_CPUMERRSR_BANK_WAY_OFF) &
+		ARM_CA57_CPUMERRSR_BANK_WAY_MASK;
+	ramid = (val >> ARM_CA57_CPUMERRSR_RAM_ID_OFF) &
+		ARM_CA57_CPUMERRSR_RAM_ID_MASK;
+	repeat = (val >> ARM_CA57_CPUMERRSR_REPEAT_OFF) &
+		ARM_CA57_CPUMERRSR_REPEAT_MASK;
+	other = (val >> ARM_CA57_CPUMERRSR_OTHER_OFF) &
+		ARM_CA57_CPUMERRSR_OTHER_MASK;
+	fatal = (val >> ARM_CA57_CPUMERRSR_FATAL_OFF) &
+		ARM_CA57_CPUMERRSR_FATAL_MASK;
+
+	edac_device_handle_ce(edac_dev, 0, 0, "L2 Error");
+	edac_printk(KERN_CRIT, DRV_NAME, "CPU%d L1 %serror detected\n",
+		    cpu, (fatal) ? "Fatal " : "");
+	edac_printk(KERN_CRIT, DRV_NAME, "RAMID=");
+
+	switch (ramid) {
+	case ARM_CA57_L1_I_TAG_RAM:
+		pr_cont("'L1-I Tag RAM' index=%d way=%d", index, way);
+		break;
+	case ARM_CA57_L1_I_DATA_RAM:
+		pr_cont("'L1-I Data RAM' index=%d bank= %d", index, way);
+		break;
+	case ARM_CA57_L1_D_TAG_RAM:
+		pr_cont("'L1-D Tag RAM' index=%d way=%d", index, way);
+		break;
+	case ARM_CA57_L1_D_DATA_RAM:
+		pr_cont("'L1-D Data RAM' index=%d bank=%d", index, way);
+		break;
+	case ARM_CA57_TLB_RAM:
+		pr_cont("'TLB RAM' index=%d", index);
+		break;
+	default:
+		pr_cont("'unknown'");
+		break;
+	}
+
+	pr_cont(", repeat=%d, other=%d (CPUMERRSR_EL1=0x%llx)\n", repeat, other,
+		val);
+
+	write_cpumerrsr_el1(0);
+}
+
+static void al_a57_edac_l2merrsr(void *arg)
+{
+	struct edac_device_ctl_info *edac_dev =
+		(struct edac_device_ctl_info *)arg;
+	int cpu;
+	u32 index, way, ramid, repeat, other, fatal;
+	u64 val = read_l2merrsr_el1();
+
+	/* Return if no valid error */
+	if (!((val >> ARM_CA57_L2MERRSR_VALID_OFF) &
+	      ARM_CA57_L2MERRSR_VALID_MASK))
+		return;
+
+	cpu = smp_processor_id();
+	index = (val >> ARM_CA57_L2MERRSR_INDEX_OFF) &
+		ARM_CA57_L2MERRSR_INDEX_MASK;
+	way = (val >> ARM_CA57_L2MERRSR_CPUID_WAY_OFF) &
+		ARM_CA57_L2MERRSR_CPUID_WAY_MASK;
+	ramid = (val >> ARM_CA57_L2MERRSR_RAMID_OFF) &
+		ARM_CA57_L2MERRSR_RAMID_MASK;
+	repeat = (val >> ARM_CA57_L2MERRSR_REPEAT_OFF) &
+		ARM_CA57_L2MERRSR_REPEAT_MASK;
+	other = (val >> ARM_CA57_L2MERRSR_OTHER_OFF) &
+		ARM_CA57_L2MERRSR_OTHER_MASK;
+	fatal = (val >> ARM_CA57_L2MERRSR_FATAL_OFF) &
+		ARM_CA57_L2MERRSR_FATAL_MASK;
+
+	edac_device_handle_ce(edac_dev, 0, 0, "L2 Error");
+	edac_printk(KERN_CRIT, DRV_NAME, "CPU%d L2 %serror detected\n",
+		    cpu, (fatal) ? "Fatal " : "");
+	edac_printk(KERN_CRIT, DRV_NAME, "RAMID=");
+
+	switch (ramid) {
+	case ARM_CA57_L2_TAG_RAM:
+		pr_cont("'L2 Tag RAM'");
+		break;
+	case ARM_CA57_L2_DATA_RAM:
+		pr_cont("'L2 Data RAM'");
+		break;
+	case ARM_CA57_L2_SNOOP_RAM:
+		pr_cont("'L2 Snoop RAM'");
+		break;
+	case ARM_CA57_L2_DIRTY_RAM:
+		pr_cont("'L2 Dirty RAM'");
+		break;
+	case ARM_CA57_L2_INC_PLRU_RAM:
+		pr_cont("'L2 Inclusion PLRU RAM'");
+		break;
+	default:
+		pr_cont("'unknown'");
+		break;
+	}
+
+	pr_cont(", cpuid/way=%d, repeat=%d, other=%d (L2MERRSR_EL1=0x%llx)\n",
+		way, repeat, other, val);
+
+	write_l2merrsr_el1(0);
+}
+
+static void al_a57_edac_check(struct edac_device_ctl_info *edac_dev)
+{
+	int cpu, cluster, last_cluster = -1;
+
+	/*
+	 * Use get_online_cpus/put_online_cpus to prevent the online CPU map
+	 * changing while reads the L1/L2 error status
+	 */
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		/* Check L1 errors */
+		smp_call_function_single(cpu, al_a57_edac_cpumerrsr, edac_dev,
+					 0);
+		cluster = topology_physical_package_id(cpu);
+		/* Only single CPU will read the L2 error status */
+		if (cluster != last_cluster) {
+			smp_call_function_single(cpu, al_a57_edac_l2merrsr,
+						 edac_dev, 0);
+			last_cluster = cluster;
+		}
+	}
+	put_online_cpus();
+}
+
+static int al_a57_edac_probe(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edac_dev;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	/* Polling mode is supported */
+	edac_op_state = EDAC_OPSTATE_POLL;
+
+	edac_dev = edac_device_alloc_ctl_info(0, DRV_NAME, 1, "L", 2, 1, NULL,
+					      0, edac_device_alloc_index());
+	if (IS_ERR(edac_dev))
+		return -ENOMEM;
+
+	edac_dev->edac_check = al_a57_edac_check;
+	edac_dev->dev = dev;
+	edac_dev->mod_name = dev_name(dev);
+	edac_dev->dev_name = dev_name(dev);
+	edac_dev->ctl_name = dev_name(dev);
+	platform_set_drvdata(pdev, edac_dev);
+
+	ret = edac_device_add_device(edac_dev);
+	if (ret)
+		edac_device_free_ctl_info(edac_dev);
+
+	return ret;
+}
+
+static int al_a57_edac_remove(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edac_dev = platform_get_drvdata(pdev);
+
+	edac_device_del_device(edac_dev->dev);
+	edac_device_free_ctl_info(edac_dev);
+
+	return 0;
+}
+
+static const struct of_device_id al_a57_edac_of_match[] = {
+	{ .compatible = "amazon,al-cortex-a57-edac" },
+	{ .compatible = "amazon,al-cortex-a72-edac" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, al_a57_edac_of_match);
+
+static struct platform_driver al_a57_edac_driver = {
+	.probe = al_a57_edac_probe,
+	.remove = al_a57_edac_remove,
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table = al_a57_edac_of_match,
+	},
+};
+module_platform_driver(al_a57_edac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Hanna Hawa <hhhawa@amazon.com>");
-- 
2.7.4


  parent reply	other threads:[~2019-05-30 10:16 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-30 10:15 [PATCH 0/2] Add support for Amazon's Annapurna Labs EDAC for L1/L2 Hanna Hawa
2019-05-30 10:15 ` [PATCH 1/2] dt-bindings: EDAC: add Amazon Annapurna Labs EDAC binding Hanna Hawa
2019-05-30 11:54   ` Greg KH
2019-05-31  0:35     ` Borislav Petkov
2019-05-30 10:15 ` Hanna Hawa [this message]
2019-05-30 11:57   ` [PATCH 2/2] edac: add support for Amazon's Annapurna Labs EDAC Greg KH
2019-05-30 12:52     ` hhhawa
2019-05-30 13:04       ` Joe Perches
2019-05-30 18:19   ` Boris Petkov
2019-05-31  1:15     ` Herrenschmidt, Benjamin
2019-05-31  5:14       ` Borislav Petkov
2019-06-05 15:13         ` James Morse
2019-06-06  7:53         ` Hawa, Hanna
2019-06-06 10:03           ` Borislav Petkov
2019-06-06 10:33           ` James Morse
2019-06-06 11:22             ` Borislav Petkov
2019-06-06 11:37             ` Shenhar, Talel
2019-06-07 15:11               ` James Morse
2019-06-08  0:22                 ` Benjamin Herrenschmidt
2019-06-08  0:16             ` Benjamin Herrenschmidt
2019-06-08  9:05               ` Borislav Petkov
2019-06-11  5:50                 ` Benjamin Herrenschmidt
2019-06-11  7:21                   ` Benjamin Herrenschmidt
2019-06-11 11:56                     ` Borislav Petkov
2019-06-11 22:25                       ` Benjamin Herrenschmidt
2019-06-12  3:48                         ` Borislav Petkov
2019-06-12  8:29                           ` Benjamin Herrenschmidt
2019-06-12 10:42                             ` Borislav Petkov
2019-06-12 23:54                               ` Benjamin Herrenschmidt
2019-06-13  7:44                                 ` Borislav Petkov
2019-06-14 10:53                                 ` Borislav Petkov
2019-06-12 10:42                             ` Mauro Carvalho Chehab
2019-06-12 11:00                               ` Borislav Petkov
2019-06-12 11:42                                 ` Mauro Carvalho Chehab
2019-06-12 11:57                                   ` Benjamin Herrenschmidt
2019-06-12 12:25                                     ` Borislav Petkov
2019-06-12 12:35                                       ` Hawa, Hanna
2019-06-12 15:34                                         ` Borislav Petkov
2019-06-12 23:57                                       ` Benjamin Herrenschmidt
2019-06-12 23:56                                 ` Benjamin Herrenschmidt
2019-06-11  7:29                   ` Hawa, Hanna
2019-06-11 11:59                     ` Borislav Petkov
2019-06-11 11:47                   ` Borislav Petkov
2019-06-03  6:56       ` Hawa, Hanna
2019-06-05 15:16   ` James Morse
2019-06-11 19:56     ` Hawa, Hanna
2019-06-13 17:05       ` James Morse
2019-06-14 10:49         ` James Morse
2019-06-17 13:00         ` Hawa, Hanna
2019-06-19 17:22           ` James Morse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1559211329-13098-3-git-send-email-hhhawa@amazon.com \
    --to=hhhawa@amazon.com \
    --cc=benh@amazon.com \
    --cc=bp@alien8.de \
    --cc=davem@davemloft.net \
    --cc=devicetree@vger.kernel.org \
    --cc=dwmw@amazon.co.uk \
    --cc=gregkh@linuxfoundation.org \
    --cc=hanochu@amazon.com \
    --cc=james.morse@arm.com \
    --cc=jonnyc@amazon.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mchehab@kernel.org \
    --cc=nicolas.ferre@microchip.com \
    --cc=paulmck@linux.ibm.com \
    --cc=robh+dt@kernel.org \
    --cc=ronenk@amazon.com \
    --cc=talel@amazon.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).