All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tyler Baicar <tbaicar@codeaurora.org>
To: christoffer.dall@linaro.org, marc.zyngier@arm.com,
	pbonzini@redhat.com, rkrcmar@redhat.com, linux@armlinux.org.uk,
	catalin.marinas@arm.com, will.deacon@arm.com, rjw@rjwysocki.net,
	lenb@kernel.org, matt@codeblueprint.co.uk,
	robert.moore@intel.com, lv.zheng@intel.com, nkaje@codeaurora.org,
	zjzhang@codeaurora.org, mark.rutland@arm.com,
	james.morse@arm.com, akpm@linux-foundation.org,
	eun.taik.lee@samsung.com, sandeepa.s.prabhu@gmail.com,
	labbott@redhat.com, shijie.huang@arm.com,
	rruigrok@codeaurora.org, paul.gortmaker@windriver.com,
	tn@semihalf.com, fu.wei@linaro.org, rostedt@goodmis.org,
	bristot@redhat.com, linux-arm-kernel@lists.infradead.org,
	kvmarm@lists.cs.columbia.edu, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
	linux-efi@vger.kernel.org
Cc: Tyler Baicar <tbaicar@codeaurora.org>
Subject: [PATCH V9 08/10] ras: acpi / apei: generate trace event for unrecognized CPER section
Date: Wed, 15 Feb 2017 10:44:50 -0700	[thread overview]
Message-ID: <1487180692-16732-9-git-send-email-tbaicar@codeaurora.org> (raw)
In-Reply-To: <1487180692-16732-1-git-send-email-tbaicar@codeaurora.org>

UEFI spec allows for non-standard section in Common Platform Error
Record. This is defined in section N.2.3 of UEFI version 2.5.

Currently if the CPER section's type (UUID) does not match with
any section type that the kernel knows how to parse, trace event
is not generated for such section. And thus user is not able to know
happening of such hardware error, including error record of
non-standard section.

This commit generates a trace event which contains raw error data
for unrecognized CPER section.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
---
 drivers/acpi/apei/ghes.c | 24 ++++++++++++++++++++++--
 drivers/ras/ras.c        |  1 +
 include/ras/ras_event.h  | 45 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9f105ce..be365e2 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -44,11 +44,13 @@
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/nmi.h>
+#include <linux/uuid.h>
 
 #include <acpi/actbl1.h>
 #include <acpi/ghes.h>
 #include <acpi/apei.h>
 #include <asm/tlbflush.h>
+#include <ras/ras_event.h>
 
 #include "apei-internal.h"
 
@@ -453,11 +455,21 @@ static void ghes_do_proc(struct ghes *ghes,
 {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
+	uuid_le sec_type;
+	uuid_le *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
 		sec_sev = ghes_severity(gdata->error_severity);
-		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		sec_type = *(uuid_le *)gdata->section_type;
+
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+			fru_id = (uuid_le *)gdata->fru_id;
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+			fru_text = gdata->fru_text;
+
+		if (!uuid_le_cmp(sec_type,
 				 CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
 
@@ -468,7 +480,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_handle_memory_failure(gdata, sev);
 		}
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		else if (!uuid_le_cmp(sec_type,
 				      CPER_SEC_PCIE)) {
 			struct cper_sec_pcie *pcie_err;
 
@@ -501,6 +513,14 @@ static void ghes_do_proc(struct ghes *ghes,
 
 		}
 #endif
+#ifdef CONFIG_RAS
+		else if(trace_unknown_sec_event_enabled()) {
+			void *unknown_err = acpi_hest_generic_data_payload(gdata);
+			trace_unknown_sec_event(&sec_type,
+					fru_id, fru_text, sec_sev,
+					unknown_err, gdata->error_data_length);
+		}
+#endif
 	}
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index b67dd36..fb2500b 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -27,3 +27,4 @@ static int __init ras_init(void)
 EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 #endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(unknown_sec_event);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 1791a12..5861b6f 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -162,6 +162,51 @@
 );
 
 /*
+ * Unknown Section Report
+ *
+ * This event is generated when hardware detected a hardware
+ * error event, which may be of non-standard section as defined
+ * in UEFI spec appendix "Common Platform Error Record", or may
+ * be of sections for which TRACE_EVENT is not defined.
+ *
+ */
+TRACE_EVENT(unknown_sec_event,
+
+	TP_PROTO(const uuid_le *sec_type,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 const u8 sev,
+		 const u8 *err,
+		 const u32 len),
+
+	TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
+
+	TP_STRUCT__entry(
+		__array(char, sec_type, 16)
+		__array(char, fru_id, 16)
+		__string(fru_text, fru_text)
+		__field(u8, sev)
+		__field(u32, len)
+		__dynamic_array(u8, buf, len)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->sec_type, sec_type, sizeof(uuid_le));
+		memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
+		__assign_str(fru_text, fru_text);
+		__entry->sev = sev;
+		__entry->len = len;
+		memcpy(__get_dynamic_array(buf), err, len);
+	),
+
+	TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s",
+		  __entry->sev, __entry->sec_type,
+		  __entry->fru_id, __get_str(fru_text),
+		  __entry->len,
+		  __print_hex(__get_dynamic_array(buf), __entry->len))
+);
+
+/*
  * PCIe AER Trace event
  *
  * These events are generated when hardware detects a corrected or
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

WARNING: multiple messages have this Message-ID (diff)
From: Tyler Baicar <tbaicar@codeaurora.org>
To: christoffer.dall@linaro.org, marc.zyngier@arm.com,
	pbonzini@redhat.com, rkrcmar@redhat.com, linux@armlinux.org.uk,
	catalin.marinas@arm.com, will.deacon@arm.com, rjw@rjwysocki.net,
	lenb@kernel.org, matt@codeblueprint.co.uk,
	robert.moore@intel.com, lv.zheng@intel.com, nkaje@codeaurora.org,
	zjzhang@codeaurora.org, mark.rutland@arm.com,
	james.morse@arm.com, akpm@linux-foundation.org,
	eun.taik.lee@samsung.com, sandeepa.s.prabhu@gmail.com,
	labbott@redhat.com, shijie.huang@arm.com,
	rruigrok@codeaurora.org, paul.gortmaker@windriver.com,
	tn@semihalf.com, fu.wei@linaro.org, rostedt@goodmis.org,
	bristot@redhat.com, linux-arm-kernel@lists.infradead.org,
	kvmarm@lists.cs.columbia.edu, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
	linux-efi@vger.kernel.org, devel@acpica.org,
	Suzuki.Poulose@arm.com, punit.agrawal@arm.com, astone@redhat.com,
	harba@codeaurora.org, hanjun.guo@linaro.org,
	john.garry@huawei.com, shiju.jose@huawei.com
Cc: Tyler Baicar <tbaicar@codeaurora.org>
Subject: [PATCH V9 08/10] ras: acpi / apei: generate trace event for unrecognized CPER section
Date: Wed, 15 Feb 2017 10:44:50 -0700	[thread overview]
Message-ID: <1487180692-16732-9-git-send-email-tbaicar@codeaurora.org> (raw)
In-Reply-To: <1487180692-16732-1-git-send-email-tbaicar@codeaurora.org>

UEFI spec allows for non-standard section in Common Platform Error
Record. This is defined in section N.2.3 of UEFI version 2.5.

Currently if the CPER section's type (UUID) does not match with
any section type that the kernel knows how to parse, trace event
is not generated for such section. And thus user is not able to know
happening of such hardware error, including error record of
non-standard section.

This commit generates a trace event which contains raw error data
for unrecognized CPER section.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
---
 drivers/acpi/apei/ghes.c | 24 ++++++++++++++++++++++--
 drivers/ras/ras.c        |  1 +
 include/ras/ras_event.h  | 45 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9f105ce..be365e2 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -44,11 +44,13 @@
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/nmi.h>
+#include <linux/uuid.h>
 
 #include <acpi/actbl1.h>
 #include <acpi/ghes.h>
 #include <acpi/apei.h>
 #include <asm/tlbflush.h>
+#include <ras/ras_event.h>
 
 #include "apei-internal.h"
 
@@ -453,11 +455,21 @@ static void ghes_do_proc(struct ghes *ghes,
 {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
+	uuid_le sec_type;
+	uuid_le *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
 		sec_sev = ghes_severity(gdata->error_severity);
-		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		sec_type = *(uuid_le *)gdata->section_type;
+
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+			fru_id = (uuid_le *)gdata->fru_id;
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+			fru_text = gdata->fru_text;
+
+		if (!uuid_le_cmp(sec_type,
 				 CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
 
@@ -468,7 +480,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_handle_memory_failure(gdata, sev);
 		}
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		else if (!uuid_le_cmp(sec_type,
 				      CPER_SEC_PCIE)) {
 			struct cper_sec_pcie *pcie_err;
 
@@ -501,6 +513,14 @@ static void ghes_do_proc(struct ghes *ghes,
 
 		}
 #endif
+#ifdef CONFIG_RAS
+		else if(trace_unknown_sec_event_enabled()) {
+			void *unknown_err = acpi_hest_generic_data_payload(gdata);
+			trace_unknown_sec_event(&sec_type,
+					fru_id, fru_text, sec_sev,
+					unknown_err, gdata->error_data_length);
+		}
+#endif
 	}
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index b67dd36..fb2500b 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -27,3 +27,4 @@ static int __init ras_init(void)
 EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 #endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(unknown_sec_event);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 1791a12..5861b6f 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -162,6 +162,51 @@
 );
 
 /*
+ * Unknown Section Report
+ *
+ * This event is generated when hardware detected a hardware
+ * error event, which may be of non-standard section as defined
+ * in UEFI spec appendix "Common Platform Error Record", or may
+ * be of sections for which TRACE_EVENT is not defined.
+ *
+ */
+TRACE_EVENT(unknown_sec_event,
+
+	TP_PROTO(const uuid_le *sec_type,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 const u8 sev,
+		 const u8 *err,
+		 const u32 len),
+
+	TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
+
+	TP_STRUCT__entry(
+		__array(char, sec_type, 16)
+		__array(char, fru_id, 16)
+		__string(fru_text, fru_text)
+		__field(u8, sev)
+		__field(u32, len)
+		__dynamic_array(u8, buf, len)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->sec_type, sec_type, sizeof(uuid_le));
+		memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
+		__assign_str(fru_text, fru_text);
+		__entry->sev = sev;
+		__entry->len = len;
+		memcpy(__get_dynamic_array(buf), err, len);
+	),
+
+	TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s",
+		  __entry->sev, __entry->sec_type,
+		  __entry->fru_id, __get_str(fru_text),
+		  __entry->len,
+		  __print_hex(__get_dynamic_array(buf), __entry->len))
+);
+
+/*
  * PCIe AER Trace event
  *
  * These events are generated when hardware detects a corrected or
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

WARNING: multiple messages have this Message-ID (diff)
From: Tyler Baicar <tbaicar@codeaurora.org>
To: christoffer.dall@linaro.org, marc.zyngier@arm.com,
	pbonzini@redhat.com, rkrcmar@redhat.com, linux@armlinux.org.uk,
	catalin.marinas@arm.com, will.deacon@arm.com, rjw@rjwysocki.net,
	lenb@kernel.org, matt@codeblueprint.co.uk,
	robert.moore@intel.com, lv.zheng@intel.com, nkaje@codeaurora.org,
	zjzhang@codeaurora.org, mark.rutland@arm.com,
	james.morse@arm.com, akpm@linux-foundation.org,
	eun.taik.lee@samsung.com, sandeepa.s.prabhu@gmail.com,
	labbott@redhat.com, shijie.huang@arm.com,
	rruigrok@codeaurora.org, paul.gortmaker@windriver.com,
	tn@semihalf.com, fu.wei@linaro.org, rostedt@goodmis.org,
	bristot@redhat.com, linux-arm-kernel@lists.infradead.org,
	kvmarm@lists.cs.columbia.edu, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
	linux-efi@vger.kernel.org,
Cc: Tyler Baicar <tbaicar@codeaurora.org>
Subject: [PATCH V9 08/10] ras: acpi / apei: generate trace event for unrecognized CPER section
Date: Wed, 15 Feb 2017 10:44:50 -0700	[thread overview]
Message-ID: <1487180692-16732-9-git-send-email-tbaicar@codeaurora.org> (raw)
In-Reply-To: <1487180692-16732-1-git-send-email-tbaicar@codeaurora.org>

UEFI spec allows for non-standard section in Common Platform Error
Record. This is defined in section N.2.3 of UEFI version 2.5.

Currently if the CPER section's type (UUID) does not match with
any section type that the kernel knows how to parse, trace event
is not generated for such section. And thus user is not able to know
happening of such hardware error, including error record of
non-standard section.

This commit generates a trace event which contains raw error data
for unrecognized CPER section.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
---
 drivers/acpi/apei/ghes.c | 24 ++++++++++++++++++++++--
 drivers/ras/ras.c        |  1 +
 include/ras/ras_event.h  | 45 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9f105ce..be365e2 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -44,11 +44,13 @@
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/nmi.h>
+#include <linux/uuid.h>
 
 #include <acpi/actbl1.h>
 #include <acpi/ghes.h>
 #include <acpi/apei.h>
 #include <asm/tlbflush.h>
+#include <ras/ras_event.h>
 
 #include "apei-internal.h"
 
@@ -453,11 +455,21 @@ static void ghes_do_proc(struct ghes *ghes,
 {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
+	uuid_le sec_type;
+	uuid_le *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
 		sec_sev = ghes_severity(gdata->error_severity);
-		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		sec_type = *(uuid_le *)gdata->section_type;
+
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+			fru_id = (uuid_le *)gdata->fru_id;
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+			fru_text = gdata->fru_text;
+
+		if (!uuid_le_cmp(sec_type,
 				 CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
 
@@ -468,7 +480,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_handle_memory_failure(gdata, sev);
 		}
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		else if (!uuid_le_cmp(sec_type,
 				      CPER_SEC_PCIE)) {
 			struct cper_sec_pcie *pcie_err;
 
@@ -501,6 +513,14 @@ static void ghes_do_proc(struct ghes *ghes,
 
 		}
 #endif
+#ifdef CONFIG_RAS
+		else if(trace_unknown_sec_event_enabled()) {
+			void *unknown_err = acpi_hest_generic_data_payload(gdata);
+			trace_unknown_sec_event(&sec_type,
+					fru_id, fru_text, sec_sev,
+					unknown_err, gdata->error_data_length);
+		}
+#endif
 	}
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index b67dd36..fb2500b 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -27,3 +27,4 @@ static int __init ras_init(void)
 EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 #endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(unknown_sec_event);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 1791a12..5861b6f 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -162,6 +162,51 @@
 );
 
 /*
+ * Unknown Section Report
+ *
+ * This event is generated when hardware detected a hardware
+ * error event, which may be of non-standard section as defined
+ * in UEFI spec appendix "Common Platform Error Record", or may
+ * be of sections for which TRACE_EVENT is not defined.
+ *
+ */
+TRACE_EVENT(unknown_sec_event,
+
+	TP_PROTO(const uuid_le *sec_type,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 const u8 sev,
+		 const u8 *err,
+		 const u32 len),
+
+	TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
+
+	TP_STRUCT__entry(
+		__array(char, sec_type, 16)
+		__array(char, fru_id, 16)
+		__string(fru_text, fru_text)
+		__field(u8, sev)
+		__field(u32, len)
+		__dynamic_array(u8, buf, len)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->sec_type, sec_type, sizeof(uuid_le));
+		memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
+		__assign_str(fru_text, fru_text);
+		__entry->sev = sev;
+		__entry->len = len;
+		memcpy(__get_dynamic_array(buf), err, len);
+	),
+
+	TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s",
+		  __entry->sev, __entry->sec_type,
+		  __entry->fru_id, __get_str(fru_text),
+		  __entry->len,
+		  __print_hex(__get_dynamic_array(buf), __entry->len))
+);
+
+/*
  * PCIe AER Trace event
  *
  * These events are generated when hardware detects a corrected or
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

WARNING: multiple messages have this Message-ID (diff)
From: tbaicar@codeaurora.org (Tyler Baicar)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH V9 08/10] ras: acpi / apei: generate trace event for unrecognized CPER section
Date: Wed, 15 Feb 2017 10:44:50 -0700	[thread overview]
Message-ID: <1487180692-16732-9-git-send-email-tbaicar@codeaurora.org> (raw)
In-Reply-To: <1487180692-16732-1-git-send-email-tbaicar@codeaurora.org>

UEFI spec allows for non-standard section in Common Platform Error
Record. This is defined in section N.2.3 of UEFI version 2.5.

Currently if the CPER section's type (UUID) does not match with
any section type that the kernel knows how to parse, trace event
is not generated for such section. And thus user is not able to know
happening of such hardware error, including error record of
non-standard section.

This commit generates a trace event which contains raw error data
for unrecognized CPER section.

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
---
 drivers/acpi/apei/ghes.c | 24 ++++++++++++++++++++++--
 drivers/ras/ras.c        |  1 +
 include/ras/ras_event.h  | 45 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9f105ce..be365e2 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -44,11 +44,13 @@
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/nmi.h>
+#include <linux/uuid.h>
 
 #include <acpi/actbl1.h>
 #include <acpi/ghes.h>
 #include <acpi/apei.h>
 #include <asm/tlbflush.h>
+#include <ras/ras_event.h>
 
 #include "apei-internal.h"
 
@@ -453,11 +455,21 @@ static void ghes_do_proc(struct ghes *ghes,
 {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
+	uuid_le sec_type;
+	uuid_le *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
 		sec_sev = ghes_severity(gdata->error_severity);
-		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		sec_type = *(uuid_le *)gdata->section_type;
+
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+			fru_id = (uuid_le *)gdata->fru_id;
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+			fru_text = gdata->fru_text;
+
+		if (!uuid_le_cmp(sec_type,
 				 CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
 
@@ -468,7 +480,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_handle_memory_failure(gdata, sev);
 		}
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		else if (!uuid_le_cmp(sec_type,
 				      CPER_SEC_PCIE)) {
 			struct cper_sec_pcie *pcie_err;
 
@@ -501,6 +513,14 @@ static void ghes_do_proc(struct ghes *ghes,
 
 		}
 #endif
+#ifdef CONFIG_RAS
+		else if(trace_unknown_sec_event_enabled()) {
+			void *unknown_err = acpi_hest_generic_data_payload(gdata);
+			trace_unknown_sec_event(&sec_type,
+					fru_id, fru_text, sec_sev,
+					unknown_err, gdata->error_data_length);
+		}
+#endif
 	}
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index b67dd36..fb2500b 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -27,3 +27,4 @@ static int __init ras_init(void)
 EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 #endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(unknown_sec_event);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 1791a12..5861b6f 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -162,6 +162,51 @@
 );
 
 /*
+ * Unknown Section Report
+ *
+ * This event is generated when hardware detected a hardware
+ * error event, which may be of non-standard section as defined
+ * in UEFI spec appendix "Common Platform Error Record", or may
+ * be of sections for which TRACE_EVENT is not defined.
+ *
+ */
+TRACE_EVENT(unknown_sec_event,
+
+	TP_PROTO(const uuid_le *sec_type,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 const u8 sev,
+		 const u8 *err,
+		 const u32 len),
+
+	TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
+
+	TP_STRUCT__entry(
+		__array(char, sec_type, 16)
+		__array(char, fru_id, 16)
+		__string(fru_text, fru_text)
+		__field(u8, sev)
+		__field(u32, len)
+		__dynamic_array(u8, buf, len)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->sec_type, sec_type, sizeof(uuid_le));
+		memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
+		__assign_str(fru_text, fru_text);
+		__entry->sev = sev;
+		__entry->len = len;
+		memcpy(__get_dynamic_array(buf), err, len);
+	),
+
+	TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s",
+		  __entry->sev, __entry->sec_type,
+		  __entry->fru_id, __get_str(fru_text),
+		  __entry->len,
+		  __print_hex(__get_dynamic_array(buf), __entry->len))
+);
+
+/*
  * PCIe AER Trace event
  *
  * These events are generated when hardware detects a corrected or
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

  parent reply	other threads:[~2017-02-15 17:44 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-15 17:44 [PATCH V9 00/10] Add UEFI 2.6 and ACPI 6.1 updates for RAS on ARM64 Tyler Baicar
2017-02-15 17:44 ` Tyler Baicar
2017-02-15 17:44 ` Tyler Baicar
2017-02-15 17:44 ` Tyler Baicar
2017-02-15 17:44 ` [PATCH V9 01/10] acpi: apei: read ack upon ghes record consumption Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44 ` [PATCH V9 02/10] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1 Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44 ` [PATCH V9 03/10] efi: parse ARM processor error Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44 ` [PATCH V9 04/10] arm64: exception: handle Synchronous External Abort Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44 ` [PATCH V9 05/10] acpi: apei: handle SEA notification type for ARMv8 Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 20:52   ` kbuild test robot
2017-02-15 20:52     ` kbuild test robot
2017-02-15 20:52     ` kbuild test robot
2017-02-15 20:52     ` kbuild test robot
2017-02-15 17:44 ` [PATCH V9 06/10] acpi: apei: panic OS with fatal error status block Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44 ` [PATCH V9 07/10] efi: print unrecognized CPER section Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44 ` Tyler Baicar [this message]
2017-02-15 17:44   ` [PATCH V9 08/10] ras: acpi / apei: generate trace event for " Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44 ` [PATCH V9 09/10] trace, ras: add ARM processor error trace event Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:47   ` Steven Rostedt
2017-02-15 17:47     ` Steven Rostedt
2017-02-15 17:47     ` Steven Rostedt
2017-02-15 17:51     ` Baicar, Tyler
2017-02-15 17:51       ` Baicar, Tyler
2017-02-15 17:51       ` Baicar, Tyler
2017-02-15 17:44 ` [PATCH V9 10/10] arm/arm64: KVM: add guest SEA support Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar
2017-02-15 17:44   ` Tyler Baicar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1487180692-16732-9-git-send-email-tbaicar@codeaurora.org \
    --to=tbaicar@codeaurora.org \
    --cc=akpm@linux-foundation.org \
    --cc=bristot@redhat.com \
    --cc=catalin.marinas@arm.com \
    --cc=christoffer.dall@linaro.org \
    --cc=eun.taik.lee@samsung.com \
    --cc=fu.wei@linaro.org \
    --cc=james.morse@arm.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=labbott@redhat.com \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@armlinux.org.uk \
    --cc=lv.zheng@intel.com \
    --cc=marc.zyngier@arm.com \
    --cc=mark.rutland@arm.com \
    --cc=matt@codeblueprint.co.uk \
    --cc=nkaje@codeaurora.org \
    --cc=paul.gortmaker@windriver.com \
    --cc=pbonzini@redhat.com \
    --cc=rjw@rjwysocki.net \
    --cc=rkrcmar@redhat.com \
    --cc=robert.moore@intel.com \
    --cc=rostedt@goodmis.org \
    --cc=rruigrok@codeaurora.org \
    --cc=sandeepa.s.prabhu@gmail.com \
    --cc=shijie.huang@arm.com \
    --cc=tn@semihalf.com \
    --cc=will.deacon@arm.com \
    --cc=zjzhang@codeaurora.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.