All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] edac: armv8_edac: Add ARMv8 EDAC driver
@ 2018-08-02 20:49 ` Tyler Baicar
  0 siblings, 0 replies; 8+ messages in thread
From: Tyler Baicar @ 2018-08-02 20:49 UTC (permalink / raw)
  To: linux-arm-kernel, linux-edac; +Cc: Tyler Baicar

Add ARMv8 EDAC driver to detect and report errors that are reported
through the ARMv8.2 RAS extensions.

This is by no means complete hence the RFC. At minimum, this should
also have support to check the SRs of each CPU during boot time to
check for anything pending. Also, the SEI code flow could be improved
to separate the check for uncontained errors and panic ASAP before
doing any of this reporting.

This is also limited to SR based RAS extension nodes. There is not
currently a way to detect memory mapped RAS extension nodes. The
initialization which prints the FR registers only does so for a
single CPU, so the assumption is that all CPUs are identical from
a RAS extension node perspective.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
---
 arch/arm64/include/asm/ras.h | 26 +++++++++++++++++++
 arch/arm64/kernel/Makefile   |  1 +
 arch/arm64/kernel/ras.c      | 54 ++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/traps.c    |  4 +++
 arch/arm64/mm/fault.c        |  4 +++
 drivers/edac/Kconfig         |  9 +++++++
 drivers/edac/Makefile        |  2 ++
 drivers/edac/armv8_edac.c    | 59 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/armv8_edac.h   | 20 +++++++++++++++
 9 files changed, 179 insertions(+)
 create mode 100644 arch/arm64/include/asm/ras.h
 create mode 100644 arch/arm64/kernel/ras.c
 create mode 100644 drivers/edac/armv8_edac.c
 create mode 100644 include/linux/armv8_edac.h

diff --git a/arch/arm64/include/asm/ras.h b/arch/arm64/include/asm/ras.h
new file mode 100644
index 0000000..29b30e8
--- /dev/null
+++ b/arch/arm64/include/asm/ras.h
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef ARM64_RAS_H
+#define ARM64_RAS_H
+
+bool arch_cpu_has_ras_extensions(void);
+
+u64 arch_cpu_num_ras_nodes(void);
+
+void arch_report_ras_error_on_node(unsigned int cpu_num, unsigned int i);
+
+void arch_ras_node_setup(unsigned int i);
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 0025f869..c7998f4 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -55,6 +55,7 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 arm64-obj-$(CONFIG_ARM_SDE_INTERFACE)	+= sdei.o
 arm64-obj-$(CONFIG_ARM64_SSBD)		+= ssbd.o
+arm64-obj-$(CONFIG_EDAC_ARMV8)		+= ras.o
 
 obj-y					+= $(arm64-obj-y) vdso/ probes/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
new file mode 100644
index 0000000..2cb0265
--- /dev/null
+++ b/arch/arm64/kernel/ras.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+
+#include <asm/ras.h>
+
+bool arch_cpu_has_ras_extensions(void)
+{
+	return this_cpu_has_cap(ARM64_HAS_RAS_EXTN);
+}
+
+u64 arch_cpu_num_ras_nodes(void)
+{
+	return read_sysreg_s(SYS_ERRIDR_EL1);
+}
+
+void arch_report_ras_error_on_node(unsigned int cpu_num, unsigned int i)
+{
+	u64 status;
+
+	write_sysreg_s(i, SYS_ERRSELR_EL1);
+	status = read_sysreg_s(SYS_ERXSTATUS_EL1);
+	if (status) {
+		pr_err("cpu #%u: ERR%uSTATUS = 0x%llx\n", cpu_num, i, status);
+		pr_err("cpu #%u: ERR%uCTLR   = 0x%llx\n",
+			cpu_num, i, read_sysreg_s(SYS_ERXCTLR_EL1));
+		pr_err("cpu #%u: ERR%uADDR   = 0x%llx\n",
+			cpu_num, i, read_sysreg_s(SYS_ERXADDR_EL1));
+		pr_err("cpu #%u: ERR%uMISC0  = 0x%llx\n",
+			cpu_num, i, read_sysreg_s(SYS_ERXMISC0_EL1));
+		pr_err("cpu #%u: ERR%uMISC1  = 0x%llx\n",
+			cpu_num, i, read_sysreg_s(SYS_ERXMISC1_EL1));
+	}
+}
+
+void arch_ras_node_setup(unsigned int i)
+{
+	write_sysreg_s(i, SYS_ERRSELR_EL1);
+	pr_info("ERR%uFR = 0x%llx\n", i, read_sysreg_s(SYS_ERXFR_EL1));
+}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index d399d45..53ed974 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -35,6 +35,7 @@
 #include <linux/sizes.h>
 #include <linux/syscalls.h>
 #include <linux/mm_types.h>
+#include <linux/armv8_edac.h>
 
 #include <asm/atomic.h>
 #include <asm/bug.h>
@@ -731,6 +732,9 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
 {
 	nmi_enter();
 
+	if (IS_ENABLED(CONFIG_EDAC_ARMV8))
+		armv8_edac_report_error();
+
 	/* non-RAS errors are not containable */
 	if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
 		arm64_serror_panic(regs, esr);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index b8eecc7..49debe7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -32,6 +32,7 @@
 #include <linux/perf_event.h>
 #include <linux/preempt.h>
 #include <linux/hugetlb.h>
+#include <linux/armv8_edac.h>
 
 #include <asm/bug.h>
 #include <asm/cmpxchg.h>
@@ -641,6 +642,9 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 
 		ghes_notify_sea();
 
+		if (IS_ENABLED(CONFIG_EDAC_ARMV8))
+			armv8_edac_report_error();
+
 		if (interrupts_enabled(regs))
 			nmi_exit();
 	}
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 57304b2..da0281d 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -74,6 +74,15 @@ config EDAC_GHES
 
 	  In doubt, say 'Y'.
 
+config EDAC_ARMV8
+	bool "ARMv8 RAS extension based error reporting"
+	depends on (EDAC=y) && ARM64_RAS_EXTN
+	default y
+	help
+	  Support for error reporting through the ARMv8.2 RAS extension. The
+	  ARMv8.2 RAS extension provides an architected way of reporting
+	  hardware errors on ARM systems.
+
 config EDAC_AMD64
 	tristate "AMD64 (Opteron, Athlon64)"
 	depends on AMD_NB && EDAC_DECODE_MCE
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 02b43a7..001820d 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -50,6 +50,8 @@ amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
 
 obj-$(CONFIG_EDAC_AMD64)		+= amd64_edac_mod.o
 
+obj-$(CONFIG_EDAC_ARMV8)		+= armv8_edac.o
+
 obj-$(CONFIG_EDAC_PASEMI)		+= pasemi_edac.o
 
 mpc85xx_edac_mod-y			:= fsl_ddr_edac.o mpc85xx_edac.o
diff --git a/drivers/edac/armv8_edac.c b/drivers/edac/armv8_edac.c
new file mode 100644
index 0000000..9df3dd8
--- /dev/null
+++ b/drivers/edac/armv8_edac.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/armv8_edac.h>
+#include <linux/kernel.h>
+#include <linux/edac.h>
+
+#include <asm/ras.h>
+
+void armv8_edac_report_error(void)
+{
+	unsigned int cpu_num, i;
+	u64 num_nodes;
+
+	cpu_num = get_cpu();
+	if (!arch_cpu_has_ras_extensions())
+		return;
+
+	num_nodes = arch_cpu_num_ras_nodes();
+
+	for (i = 0; i < num_nodes; i++)
+		arch_report_ras_error_on_node(cpu_num, i);
+	put_cpu();
+}
+
+static int __init armv8_edac_init(void)
+{
+	unsigned int i;
+	u64 num_nodes;
+
+	get_cpu();
+	if (!arch_cpu_has_ras_extensions()) {
+		put_cpu();
+		return -ENOTSUPP;
+	}
+
+	pr_err("CPU has RAS extension\n");
+	num_nodes = arch_cpu_num_ras_nodes();
+
+	for (i = 0; i < num_nodes; i++) {
+		arch_ras_node_setup(i);
+	}
+
+	put_cpu();
+	return 0;
+}
+
+early_initcall(armv8_edac_init);
diff --git a/include/linux/armv8_edac.h b/include/linux/armv8_edac.h
new file mode 100644
index 0000000..222f7ea
--- /dev/null
+++ b/include/linux/armv8_edac.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef LINUX_ARMV8_EDAC_H
+#define LINUX_ARMV8_EDAC_H
+
+void armv8_edac_report_error(void);
+
+#endif

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH] edac: armv8_edac: Add ARMv8 EDAC driver
@ 2018-08-02 20:49 ` Tyler Baicar
  0 siblings, 0 replies; 8+ messages in thread
From: Tyler Baicar @ 2018-08-02 20:49 UTC (permalink / raw)
  To: linux-arm-kernel

Add ARMv8 EDAC driver to detect and report errors that are reported
through the ARMv8.2 RAS extensions.

This is by no means complete hence the RFC. At minimum, this should
also have support to check the SRs of each CPU during boot time to
check for anything pending. Also, the SEI code flow could be improved
to separate the check for uncontained errors and panic ASAP before
doing any of this reporting.

This is also limited to SR based RAS extension nodes. There is not
currently a way to detect memory mapped RAS extension nodes. The
initialization which prints the FR registers only does so for a
single CPU, so the assumption is that all CPUs are identical from
a RAS extension node perspective.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
---
 arch/arm64/include/asm/ras.h | 26 +++++++++++++++++++
 arch/arm64/kernel/Makefile   |  1 +
 arch/arm64/kernel/ras.c      | 54 ++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/traps.c    |  4 +++
 arch/arm64/mm/fault.c        |  4 +++
 drivers/edac/Kconfig         |  9 +++++++
 drivers/edac/Makefile        |  2 ++
 drivers/edac/armv8_edac.c    | 59 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/armv8_edac.h   | 20 +++++++++++++++
 9 files changed, 179 insertions(+)
 create mode 100644 arch/arm64/include/asm/ras.h
 create mode 100644 arch/arm64/kernel/ras.c
 create mode 100644 drivers/edac/armv8_edac.c
 create mode 100644 include/linux/armv8_edac.h

diff --git a/arch/arm64/include/asm/ras.h b/arch/arm64/include/asm/ras.h
new file mode 100644
index 0000000..29b30e8
--- /dev/null
+++ b/arch/arm64/include/asm/ras.h
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef ARM64_RAS_H
+#define ARM64_RAS_H
+
+bool arch_cpu_has_ras_extensions(void);
+
+u64 arch_cpu_num_ras_nodes(void);
+
+void arch_report_ras_error_on_node(unsigned int cpu_num, unsigned int i);
+
+void arch_ras_node_setup(unsigned int i);
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 0025f869..c7998f4 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -55,6 +55,7 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 arm64-obj-$(CONFIG_ARM_SDE_INTERFACE)	+= sdei.o
 arm64-obj-$(CONFIG_ARM64_SSBD)		+= ssbd.o
+arm64-obj-$(CONFIG_EDAC_ARMV8)		+= ras.o
 
 obj-y					+= $(arm64-obj-y) vdso/ probes/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
new file mode 100644
index 0000000..2cb0265
--- /dev/null
+++ b/arch/arm64/kernel/ras.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+
+#include <asm/ras.h>
+
+bool arch_cpu_has_ras_extensions(void)
+{
+	return this_cpu_has_cap(ARM64_HAS_RAS_EXTN);
+}
+
+u64 arch_cpu_num_ras_nodes(void)
+{
+	return read_sysreg_s(SYS_ERRIDR_EL1);
+}
+
+void arch_report_ras_error_on_node(unsigned int cpu_num, unsigned int i)
+{
+	u64 status;
+
+	write_sysreg_s(i, SYS_ERRSELR_EL1);
+	status = read_sysreg_s(SYS_ERXSTATUS_EL1);
+	if (status) {
+		pr_err("cpu #%u: ERR%uSTATUS = 0x%llx\n", cpu_num, i, status);
+		pr_err("cpu #%u: ERR%uCTLR   = 0x%llx\n",
+			cpu_num, i, read_sysreg_s(SYS_ERXCTLR_EL1));
+		pr_err("cpu #%u: ERR%uADDR   = 0x%llx\n",
+			cpu_num, i, read_sysreg_s(SYS_ERXADDR_EL1));
+		pr_err("cpu #%u: ERR%uMISC0  = 0x%llx\n",
+			cpu_num, i, read_sysreg_s(SYS_ERXMISC0_EL1));
+		pr_err("cpu #%u: ERR%uMISC1  = 0x%llx\n",
+			cpu_num, i, read_sysreg_s(SYS_ERXMISC1_EL1));
+	}
+}
+
+void arch_ras_node_setup(unsigned int i)
+{
+	write_sysreg_s(i, SYS_ERRSELR_EL1);
+	pr_info("ERR%uFR = 0x%llx\n", i, read_sysreg_s(SYS_ERXFR_EL1));
+}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index d399d45..53ed974 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -35,6 +35,7 @@
 #include <linux/sizes.h>
 #include <linux/syscalls.h>
 #include <linux/mm_types.h>
+#include <linux/armv8_edac.h>
 
 #include <asm/atomic.h>
 #include <asm/bug.h>
@@ -731,6 +732,9 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
 {
 	nmi_enter();
 
+	if (IS_ENABLED(CONFIG_EDAC_ARMV8))
+		armv8_edac_report_error();
+
 	/* non-RAS errors are not containable */
 	if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
 		arm64_serror_panic(regs, esr);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index b8eecc7..49debe7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -32,6 +32,7 @@
 #include <linux/perf_event.h>
 #include <linux/preempt.h>
 #include <linux/hugetlb.h>
+#include <linux/armv8_edac.h>
 
 #include <asm/bug.h>
 #include <asm/cmpxchg.h>
@@ -641,6 +642,9 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 
 		ghes_notify_sea();
 
+		if (IS_ENABLED(CONFIG_EDAC_ARMV8))
+			armv8_edac_report_error();
+
 		if (interrupts_enabled(regs))
 			nmi_exit();
 	}
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 57304b2..da0281d 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -74,6 +74,15 @@ config EDAC_GHES
 
 	  In doubt, say 'Y'.
 
+config EDAC_ARMV8
+	bool "ARMv8 RAS extension based error reporting"
+	depends on (EDAC=y) && ARM64_RAS_EXTN
+	default y
+	help
+	  Support for error reporting through the ARMv8.2 RAS extension. The
+	  ARMv8.2 RAS extension provides an architected way of reporting
+	  hardware errors on ARM systems.
+
 config EDAC_AMD64
 	tristate "AMD64 (Opteron, Athlon64)"
 	depends on AMD_NB && EDAC_DECODE_MCE
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 02b43a7..001820d 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -50,6 +50,8 @@ amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
 
 obj-$(CONFIG_EDAC_AMD64)		+= amd64_edac_mod.o
 
+obj-$(CONFIG_EDAC_ARMV8)		+= armv8_edac.o
+
 obj-$(CONFIG_EDAC_PASEMI)		+= pasemi_edac.o
 
 mpc85xx_edac_mod-y			:= fsl_ddr_edac.o mpc85xx_edac.o
diff --git a/drivers/edac/armv8_edac.c b/drivers/edac/armv8_edac.c
new file mode 100644
index 0000000..9df3dd8
--- /dev/null
+++ b/drivers/edac/armv8_edac.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/armv8_edac.h>
+#include <linux/kernel.h>
+#include <linux/edac.h>
+
+#include <asm/ras.h>
+
+void armv8_edac_report_error(void)
+{
+	unsigned int cpu_num, i;
+	u64 num_nodes;
+
+	cpu_num = get_cpu();
+	if (!arch_cpu_has_ras_extensions())
+		return;
+
+	num_nodes = arch_cpu_num_ras_nodes();
+
+	for (i = 0; i < num_nodes; i++)
+		arch_report_ras_error_on_node(cpu_num, i);
+	put_cpu();
+}
+
+static int __init armv8_edac_init(void)
+{
+	unsigned int i;
+	u64 num_nodes;
+
+	get_cpu();
+	if (!arch_cpu_has_ras_extensions()) {
+		put_cpu();
+		return -ENOTSUPP;
+	}
+
+	pr_err("CPU has RAS extension\n");
+	num_nodes = arch_cpu_num_ras_nodes();
+
+	for (i = 0; i < num_nodes; i++) {
+		arch_ras_node_setup(i);
+	}
+
+	put_cpu();
+	return 0;
+}
+
+early_initcall(armv8_edac_init);
diff --git a/include/linux/armv8_edac.h b/include/linux/armv8_edac.h
new file mode 100644
index 0000000..222f7ea
--- /dev/null
+++ b/include/linux/armv8_edac.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef LINUX_ARMV8_EDAC_H
+#define LINUX_ARMV8_EDAC_H
+
+void armv8_edac_report_error(void);
+
+#endif
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC] edac: armv8_edac: Add ARMv8 EDAC driver
  2018-08-02 20:49 ` [RFC PATCH] " Tyler Baicar
@ 2018-08-16  7:35 ` Borislav Petkov
  -1 siblings, 0 replies; 8+ messages in thread
From: Borislav Petkov @ 2018-08-16  7:35 UTC (permalink / raw)
  To: Tyler Baicar; +Cc: linux-arm-kernel, linux-edac, James Morse

+ James.

On Thu, Aug 02, 2018 at 04:49:50PM -0400, Tyler Baicar wrote:
> Add ARMv8 EDAC driver to detect and report errors that are reported
> through the ARMv8.2 RAS extensions.
> 
> This is by no means complete hence the RFC. At minimum, this should
> also have support to check the SRs of each CPU during boot time to
> check for anything pending. Also, the SEI code flow could be improved
> to separate the check for uncontained errors and panic ASAP before
> doing any of this reporting.
> 
> This is also limited to SR based RAS extension nodes. There is not
> currently a way to detect memory mapped RAS extension nodes. The
> initialization which prints the FR registers only does so for a
> single CPU, so the assumption is that all CPUs are identical from
> a RAS extension node perspective.
> 
> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
> ---
>  arch/arm64/include/asm/ras.h | 26 +++++++++++++++++++
>  arch/arm64/kernel/Makefile   |  1 +
>  arch/arm64/kernel/ras.c      | 54 ++++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/traps.c    |  4 +++
>  arch/arm64/mm/fault.c        |  4 +++
>  drivers/edac/Kconfig         |  9 +++++++
>  drivers/edac/Makefile        |  2 ++
>  drivers/edac/armv8_edac.c    | 59 ++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/armv8_edac.h   | 20 +++++++++++++++
>  9 files changed, 179 insertions(+)
>  create mode 100644 arch/arm64/include/asm/ras.h
>  create mode 100644 arch/arm64/kernel/ras.c
>  create mode 100644 drivers/edac/armv8_edac.c
>  create mode 100644 include/linux/armv8_edac.h

Remember to always run your stuff through checkpatch to catch legit issues:

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#48: 
new file mode 100644

Yes it does - I need a MAINTAINERS entry for the edac driver so that you
get all the bug reports. :)

And then more:

WARNING: Missing or malformed SPDX-License-Identifier tag in line 1
#53: FILE: arch/arm64/include/asm/ras.h:1:
+// SPDX-License-Identifier: GPL-2.0

WARNING: please write a paragraph that describes the config symbol fully
#203: FILE: drivers/edac/Kconfig:77:
+config EDAC_ARMV8

WARNING: braces {} are not necessary for single statement blocks
#284: FILE: drivers/edac/armv8_edac.c:51:
+	for (i = 0; i < num_nodes; i++) {
+		arch_ras_node_setup(i);
+	}

WARNING: Missing or malformed SPDX-License-Identifier tag in line 1
#299: FILE: include/linux/armv8_edac.h:1:
+// SPDX-License-Identifier: GPL-2.0

> diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
> new file mode 100644
> index 0000000..2cb0265
> --- /dev/null
> +++ b/arch/arm64/kernel/ras.c
> @@ -0,0 +1,54 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2018, The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/cpu.h>
> +#include <linux/smp.h>
> +
> +#include <asm/ras.h>
> +
> +bool arch_cpu_has_ras_extensions(void)
> +{
> +	return this_cpu_has_cap(ARM64_HAS_RAS_EXTN);
> +}
> +
> +u64 arch_cpu_num_ras_nodes(void)
> +{
> +	return read_sysreg_s(SYS_ERRIDR_EL1);
> +}
> +
> +void arch_report_ras_error_on_node(unsigned int cpu_num, unsigned int i)
> +{
> +	u64 status;
> +
> +	write_sysreg_s(i, SYS_ERRSELR_EL1);
> +	status = read_sysreg_s(SYS_ERXSTATUS_EL1);
> +	if (status) {
> +		pr_err("cpu #%u: ERR%uSTATUS = 0x%llx\n", cpu_num, i, status);
> +		pr_err("cpu #%u: ERR%uCTLR   = 0x%llx\n",
> +			cpu_num, i, read_sysreg_s(SYS_ERXCTLR_EL1));
> +		pr_err("cpu #%u: ERR%uADDR   = 0x%llx\n",
> +			cpu_num, i, read_sysreg_s(SYS_ERXADDR_EL1));
> +		pr_err("cpu #%u: ERR%uMISC0  = 0x%llx\n",
> +			cpu_num, i, read_sysreg_s(SYS_ERXMISC0_EL1));
> +		pr_err("cpu #%u: ERR%uMISC1  = 0x%llx\n",
> +			cpu_num, i, read_sysreg_s(SYS_ERXMISC1_EL1));
> +	}
> +}
> +
> +void arch_ras_node_setup(unsigned int i)
> +{
> +	write_sysreg_s(i, SYS_ERRSELR_EL1);
> +	pr_info("ERR%uFR = 0x%llx\n", i, read_sysreg_s(SYS_ERXFR_EL1));
> +}
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index d399d45..53ed974 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -35,6 +35,7 @@
>  #include <linux/sizes.h>
>  #include <linux/syscalls.h>
>  #include <linux/mm_types.h>
> +#include <linux/armv8_edac.h>
>  
>  #include <asm/atomic.h>
>  #include <asm/bug.h>
> @@ -731,6 +732,9 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
>  {
>  	nmi_enter();
>  
> +	if (IS_ENABLED(CONFIG_EDAC_ARMV8))
> +		armv8_edac_report_error();

I would strongly advise against that - calling driver code from arch
core code. You've made armv8_edac bool as it cannot be a module because
of it, which is silly. I was lucky that we were able to remove that ugly
dependency from x86 traps code. Hint: use a notifier and this way the
edac driver can be a module too.

And looking at armv8_edac.c - this is making the whole error reporting
unnecessarily complicated. Why? Why aren't you doing everything in
ras.c?!

I don't see the need for adding anything to drivers/edac/ *at* *all*!

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC PATCH] edac: armv8_edac: Add ARMv8 EDAC driver
@ 2018-08-16  7:35 ` Borislav Petkov
  0 siblings, 0 replies; 8+ messages in thread
From: Borislav Petkov @ 2018-08-16  7:35 UTC (permalink / raw)
  To: linux-arm-kernel

+ James.

On Thu, Aug 02, 2018 at 04:49:50PM -0400, Tyler Baicar wrote:
> Add ARMv8 EDAC driver to detect and report errors that are reported
> through the ARMv8.2 RAS extensions.
> 
> This is by no means complete hence the RFC. At minimum, this should
> also have support to check the SRs of each CPU during boot time to
> check for anything pending. Also, the SEI code flow could be improved
> to separate the check for uncontained errors and panic ASAP before
> doing any of this reporting.
> 
> This is also limited to SR based RAS extension nodes. There is not
> currently a way to detect memory mapped RAS extension nodes. The
> initialization which prints the FR registers only does so for a
> single CPU, so the assumption is that all CPUs are identical from
> a RAS extension node perspective.
> 
> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
> ---
>  arch/arm64/include/asm/ras.h | 26 +++++++++++++++++++
>  arch/arm64/kernel/Makefile   |  1 +
>  arch/arm64/kernel/ras.c      | 54 ++++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/traps.c    |  4 +++
>  arch/arm64/mm/fault.c        |  4 +++
>  drivers/edac/Kconfig         |  9 +++++++
>  drivers/edac/Makefile        |  2 ++
>  drivers/edac/armv8_edac.c    | 59 ++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/armv8_edac.h   | 20 +++++++++++++++
>  9 files changed, 179 insertions(+)
>  create mode 100644 arch/arm64/include/asm/ras.h
>  create mode 100644 arch/arm64/kernel/ras.c
>  create mode 100644 drivers/edac/armv8_edac.c
>  create mode 100644 include/linux/armv8_edac.h

Remember to always run your stuff through checkpatch to catch legit issues:

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#48: 
new file mode 100644

Yes it does - I need a MAINTAINERS entry for the edac driver so that you
get all the bug reports. :)

And then more:

WARNING: Missing or malformed SPDX-License-Identifier tag in line 1
#53: FILE: arch/arm64/include/asm/ras.h:1:
+// SPDX-License-Identifier: GPL-2.0

WARNING: please write a paragraph that describes the config symbol fully
#203: FILE: drivers/edac/Kconfig:77:
+config EDAC_ARMV8

WARNING: braces {} are not necessary for single statement blocks
#284: FILE: drivers/edac/armv8_edac.c:51:
+	for (i = 0; i < num_nodes; i++) {
+		arch_ras_node_setup(i);
+	}

WARNING: Missing or malformed SPDX-License-Identifier tag in line 1
#299: FILE: include/linux/armv8_edac.h:1:
+// SPDX-License-Identifier: GPL-2.0

> diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
> new file mode 100644
> index 0000000..2cb0265
> --- /dev/null
> +++ b/arch/arm64/kernel/ras.c
> @@ -0,0 +1,54 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2018, The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/cpu.h>
> +#include <linux/smp.h>
> +
> +#include <asm/ras.h>
> +
> +bool arch_cpu_has_ras_extensions(void)
> +{
> +	return this_cpu_has_cap(ARM64_HAS_RAS_EXTN);
> +}
> +
> +u64 arch_cpu_num_ras_nodes(void)
> +{
> +	return read_sysreg_s(SYS_ERRIDR_EL1);
> +}
> +
> +void arch_report_ras_error_on_node(unsigned int cpu_num, unsigned int i)
> +{
> +	u64 status;
> +
> +	write_sysreg_s(i, SYS_ERRSELR_EL1);
> +	status = read_sysreg_s(SYS_ERXSTATUS_EL1);
> +	if (status) {
> +		pr_err("cpu #%u: ERR%uSTATUS = 0x%llx\n", cpu_num, i, status);
> +		pr_err("cpu #%u: ERR%uCTLR   = 0x%llx\n",
> +			cpu_num, i, read_sysreg_s(SYS_ERXCTLR_EL1));
> +		pr_err("cpu #%u: ERR%uADDR   = 0x%llx\n",
> +			cpu_num, i, read_sysreg_s(SYS_ERXADDR_EL1));
> +		pr_err("cpu #%u: ERR%uMISC0  = 0x%llx\n",
> +			cpu_num, i, read_sysreg_s(SYS_ERXMISC0_EL1));
> +		pr_err("cpu #%u: ERR%uMISC1  = 0x%llx\n",
> +			cpu_num, i, read_sysreg_s(SYS_ERXMISC1_EL1));
> +	}
> +}
> +
> +void arch_ras_node_setup(unsigned int i)
> +{
> +	write_sysreg_s(i, SYS_ERRSELR_EL1);
> +	pr_info("ERR%uFR = 0x%llx\n", i, read_sysreg_s(SYS_ERXFR_EL1));
> +}
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index d399d45..53ed974 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -35,6 +35,7 @@
>  #include <linux/sizes.h>
>  #include <linux/syscalls.h>
>  #include <linux/mm_types.h>
> +#include <linux/armv8_edac.h>
>  
>  #include <asm/atomic.h>
>  #include <asm/bug.h>
> @@ -731,6 +732,9 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
>  {
>  	nmi_enter();
>  
> +	if (IS_ENABLED(CONFIG_EDAC_ARMV8))
> +		armv8_edac_report_error();

I would strongly advise against that - calling driver code from arch
core code. You've made armv8_edac bool as it cannot be a module because
of it, which is silly. I was lucky that we were able to remove that ugly
dependency from x86 traps code. Hint: use a notifier and this way the
edac driver can be a module too.

And looking at armv8_edac.c - this is making the whole error reporting
unnecessarily complicated. Why? Why aren't you doing everything in
ras.c?!

I don't see the need for adding anything to drivers/edac/ *at* *all*!

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC] edac: armv8_edac: Add ARMv8 EDAC driver
  2018-08-16  7:35 ` [RFC PATCH] " Borislav Petkov
@ 2018-08-16 21:24 ` Tyler Baicar
  -1 siblings, 0 replies; 8+ messages in thread
From: Tyler Baicar @ 2018-08-16 21:24 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Tyler Baicar, arm-mail-list, linux-edac, James Morse

On Thu, Aug 16, 2018 at 3:35 AM, Borislav Petkov <bp@alien8.de> wrote:
>
> Remember to always run your stuff through checkpatch to catch legit issues:
>

Sorry about that, I will clean those up!

>
>> diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
>> new file mode 100644
>> index 0000000..2cb0265
>> --- /dev/null
>> +++ b/arch/arm64/kernel/ras.c
>> @@ -731,6 +732,9 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
>>  {
>>       nmi_enter();
>>
>> +     if (IS_ENABLED(CONFIG_EDAC_ARMV8))
>> +             armv8_edac_report_error();
>
> I would strongly advise against that - calling driver code from arch
> core code. You've made armv8_edac bool as it cannot be a module because
> of it, which is silly. I was lucky that we were able to remove that ugly
> dependency from x86 traps code. Hint: use a notifier and this way the
> edac driver can be a module too.
>
> And looking at armv8_edac.c - this is making the whole error reporting
> unnecessarily complicated. Why? Why aren't you doing everything in
> ras.c?!
>
> I don't see the need for adding anything to drivers/edac/ *at* *all*!
>

I agree with this for now, but I wanted to tie it into edac from the start since
this driver will have extended capabilities in the future. This
current patch only
deals with the RAS extension nodes accessible by the CPU SRs; however,
there will be memory mapped error nodes for things like platform memory.
These will then need to go through the proper software intervention similar
to the handling in other EDAC drivers (e.g. ghes_edac_report_mem_error).
For these we will probably also like to tie them into the DIMM counters that
are exposed through the EDAC sysfs nodes. That was the main motivation
for creating this as an armv8_edac driver :)

Thanks,
Tyler

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC PATCH] edac: armv8_edac: Add ARMv8 EDAC driver
@ 2018-08-16 21:24 ` Tyler Baicar
  0 siblings, 0 replies; 8+ messages in thread
From: Tyler Baicar @ 2018-08-16 21:24 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 16, 2018 at 3:35 AM, Borislav Petkov <bp@alien8.de> wrote:
>
> Remember to always run your stuff through checkpatch to catch legit issues:
>

Sorry about that, I will clean those up!

>
>> diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
>> new file mode 100644
>> index 0000000..2cb0265
>> --- /dev/null
>> +++ b/arch/arm64/kernel/ras.c
>> @@ -731,6 +732,9 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
>>  {
>>       nmi_enter();
>>
>> +     if (IS_ENABLED(CONFIG_EDAC_ARMV8))
>> +             armv8_edac_report_error();
>
> I would strongly advise against that - calling driver code from arch
> core code. You've made armv8_edac bool as it cannot be a module because
> of it, which is silly. I was lucky that we were able to remove that ugly
> dependency from x86 traps code. Hint: use a notifier and this way the
> edac driver can be a module too.
>
> And looking at armv8_edac.c - this is making the whole error reporting
> unnecessarily complicated. Why? Why aren't you doing everything in
> ras.c?!
>
> I don't see the need for adding anything to drivers/edac/ *at* *all*!
>

I agree with this for now, but I wanted to tie it into edac from the start since
this driver will have extended capabilities in the future. This
current patch only
deals with the RAS extension nodes accessible by the CPU SRs; however,
there will be memory mapped error nodes for things like platform memory.
These will then need to go through the proper software intervention similar
to the handling in other EDAC drivers (e.g. ghes_edac_report_mem_error).
For these we will probably also like to tie them into the DIMM counters that
are exposed through the EDAC sysfs nodes. That was the main motivation
for creating this as an armv8_edac driver :)

Thanks,
Tyler

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC] edac: armv8_edac: Add ARMv8 EDAC driver
  2018-08-16 21:24 ` [RFC PATCH] " Tyler Baicar
@ 2018-08-17 13:18 ` Borislav Petkov
  -1 siblings, 0 replies; 8+ messages in thread
From: Borislav Petkov @ 2018-08-17 13:18 UTC (permalink / raw)
  To: Tyler Baicar; +Cc: Tyler Baicar, arm-mail-list, linux-edac, James Morse

On Thu, Aug 16, 2018 at 05:24:29PM -0400, Tyler Baicar wrote:
> I agree with this for now, but I wanted to tie it into edac from the start since
> this driver will have extended capabilities in the future.

We'll cross that bridge when we get to it so please add the EDAC bits
only when there is something concrete to add.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC PATCH] edac: armv8_edac: Add ARMv8 EDAC driver
@ 2018-08-17 13:18 ` Borislav Petkov
  0 siblings, 0 replies; 8+ messages in thread
From: Borislav Petkov @ 2018-08-17 13:18 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Aug 16, 2018 at 05:24:29PM -0400, Tyler Baicar wrote:
> I agree with this for now, but I wanted to tie it into edac from the start since
> this driver will have extended capabilities in the future.

We'll cross that bridge when we get to it so please add the EDAC bits
only when there is something concrete to add.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-08-17 13:18 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-17 13:18 [RFC] edac: armv8_edac: Add ARMv8 EDAC driver Borislav Petkov
2018-08-17 13:18 ` [RFC PATCH] " Borislav Petkov
  -- strict thread matches above, loose matches on Subject: below --
2018-08-16 21:24 [RFC] " Tyler Baicar
2018-08-16 21:24 ` [RFC PATCH] " Tyler Baicar
2018-08-16  7:35 [RFC] " Borislav Petkov
2018-08-16  7:35 ` [RFC PATCH] " Borislav Petkov
2018-08-02 20:49 [RFC] " Tyler Baicar
2018-08-02 20:49 ` [RFC PATCH] " Tyler Baicar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.