All of lore.kernel.org
 help / color / mirror / Atom feed
* New eMCA trace event interface V4
@ 2014-06-11  8:34 Chen, Gong
  2014-06-11  8:34 ` [PATCH 1/7 v5] trace, RAS: Add basic RAS trace event Chen, Gong
                   ` (8 more replies)
  0 siblings, 9 replies; 32+ messages in thread
From: Chen, Gong @ 2014-06-11  8:34 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab, rostedt; +Cc: linux-acpi

v4 -> v3: shrink trace size & adjust trace format.
v3 -> v2: adjust RAS subsystem format & bunch of minor adjustments.
v2 -> v1: merge the comments from Tony Luck & Borislav Petkov.


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 1/7 v5] trace, RAS: Add basic RAS trace event
  2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
@ 2014-06-11  8:34 ` Chen, Gong
  2014-06-11 18:59   ` Borislav Petkov
  2014-06-11  8:34 ` [PATCH 2/7 v3] trace, AER: Move trace into unified interface Chen, Gong
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 32+ messages in thread
From: Chen, Gong @ 2014-06-11  8:34 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab, rostedt; +Cc: linux-acpi, Chen, Gong

To avoid confuision and conflict of usage for RAS related trace event,
add an unified RAS trace event stub.

v5 -> v4: remove explicit RAS menuconfig.
v4 -> v3: change dependency rule of RAS_TRACE.
v3 -> v2: fix dependency in Kconfig.
v2 -> v1: adjust Kconfig to take RAS as a separate subsystem.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/Kconfig        |  2 ++
 drivers/Makefile       |  1 +
 drivers/edac/Kconfig   |  1 +
 drivers/edac/edac_mc.c |  3 ---
 drivers/ras/Kconfig    |  6 ++++++
 drivers/ras/Makefile   |  1 +
 drivers/ras/ras.c      | 12 ++++++++++++
 7 files changed, 23 insertions(+), 3 deletions(-)
 create mode 100644 drivers/ras/Kconfig
 create mode 100644 drivers/ras/Makefile
 create mode 100644 drivers/ras/ras.c

diff --git a/drivers/Kconfig b/drivers/Kconfig
index 0e87a34..4e6e66c 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -176,4 +176,6 @@ source "drivers/powercap/Kconfig"
 
 source "drivers/mcb/Kconfig"
 
+source "drivers/ras/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index f98b50d..65c32b1 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -158,3 +158,4 @@ obj-$(CONFIG_NTB)		+= ntb/
 obj-$(CONFIG_FMC)		+= fmc/
 obj-$(CONFIG_POWERCAP)		+= powercap/
 obj-$(CONFIG_MCB)		+= mcb/
+obj-$(CONFIG_RAS)		+= ras/
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 878f090..1589a86 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -72,6 +72,7 @@ config EDAC_MCE_INJ
 
 config EDAC_MM_EDAC
 	tristate "Main Memory EDAC (Error Detection And Correction) reporting"
+	select RAS_TRACE
 	help
 	  Some systems are able to detect and correct errors in main
 	  memory.  EDAC can report statistics on memory error
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 2c694b5..9f134823 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -33,9 +33,6 @@
 #include <asm/edac.h>
 #include "edac_core.h"
 #include "edac_module.h"
-
-#define CREATE_TRACE_POINTS
-#define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
 /* lock to memory controller's control array */
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig
new file mode 100644
index 0000000..85febfd
--- /dev/null
+++ b/drivers/ras/Kconfig
@@ -0,0 +1,6 @@
+config RAS_TRACE
+	def_bool n
+	select RAS
+
+config RAS
+	bool
diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
new file mode 100644
index 0000000..223e806
--- /dev/null
+++ b/drivers/ras/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_RAS) += ras.o
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
new file mode 100644
index 0000000..b0c6ed1
--- /dev/null
+++ b/drivers/ras/ras.c
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Authors:
+ *	Chen, Gong <gong.chen@linux.intel.com>
+ */
+
+#define CREATE_TRACE_POINTS
+#define TRACE_INCLUDE_PATH ../../include/ras
+#include <ras/ras_event.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
-- 
2.0.0.rc2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 2/7 v3] trace, AER: Move trace into unified interface
  2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
  2014-06-11  8:34 ` [PATCH 1/7 v5] trace, RAS: Add basic RAS trace event Chen, Gong
@ 2014-06-11  8:34 ` Chen, Gong
  2014-06-11 19:00   ` Borislav Petkov
  2014-06-11  8:34 ` [PATCH 3/7 v5] CPER: Adjust code flow of some functions Chen, Gong
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 32+ messages in thread
From: Chen, Gong @ 2014-06-11  8:34 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab, rostedt; +Cc: linux-acpi, Chen, Gong

AER uses a separate trace interface by now. To make it
consistent, move it into unified RAS trace interface.

v3 -> v2: change dependency rule of RAS_TRACE.
v2 -> v1: remove unnecessary dependency in drivers/ras/Kconfig.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/pci/pcie/aer/Kconfig           |  1 +
 drivers/pci/pcie/aer/aerdrv_errprint.c |  4 +-
 include/ras/ras_event.h                | 64 ++++++++++++++++++++++++++++
 include/trace/events/ras.h             | 77 ----------------------------------
 4 files changed, 66 insertions(+), 80 deletions(-)
 delete mode 100644 include/trace/events/ras.h

diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig
index 50e94e0..c611384 100644
--- a/drivers/pci/pcie/aer/Kconfig
+++ b/drivers/pci/pcie/aer/Kconfig
@@ -5,6 +5,7 @@
 config PCIEAER
 	boolean "Root Port Advanced Error Reporting support"
 	depends on PCIEPORTBUS
+	select RAS_TRACE
 	default y
 	help
 	  This enables PCI Express Root Port Advanced Error Reporting
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 34ff702..73e73b7 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -22,9 +22,7 @@
 #include <linux/cper.h>
 
 #include "aerdrv.h"
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/ras.h>
+#include <ras/ras_event.h>
 
 #define AER_AGENT_RECEIVER		0
 #define AER_AGENT_REQUESTER		1
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 21cdb0b..acbcbb8 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -8,6 +8,7 @@
 #include <linux/tracepoint.h>
 #include <linux/edac.h>
 #include <linux/ktime.h>
+#include <linux/aer.h>
 
 /*
  * Hardware Events Report
@@ -94,6 +95,69 @@ TRACE_EVENT(mc_event,
 		  __get_str(driver_detail))
 );
 
+/*
+ * PCIe AER Trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event on a PCIe device. The event report has
+ * the following structure:
+ *
+ * char * dev_name -	The name of the slot where the device resides
+ *			([domain:]bus:device.function).
+ * u32 status -		Either the correctable or uncorrectable register
+ *			indicating what error or errors have been seen
+ * u8 severity -	error severity 0:NONFATAL 1:FATAL 2:CORRECTED
+ */
+
+#define aer_correctable_errors		\
+	{BIT(0),	"Receiver Error"},		\
+	{BIT(6),	"Bad TLP"},			\
+	{BIT(7),	"Bad DLLP"},			\
+	{BIT(8),	"RELAY_NUM Rollover"},		\
+	{BIT(12),	"Replay Timer Timeout"},	\
+	{BIT(13),	"Advisory Non-Fatal"}
+
+#define aer_uncorrectable_errors		\
+	{BIT(4),	"Data Link Protocol"},		\
+	{BIT(12),	"Poisoned TLP"},		\
+	{BIT(13),	"Flow Control Protocol"},	\
+	{BIT(14),	"Completion Timeout"},		\
+	{BIT(15),	"Completer Abort"},		\
+	{BIT(16),	"Unexpected Completion"},	\
+	{BIT(17),	"Receiver Overflow"},		\
+	{BIT(18),	"Malformed TLP"},		\
+	{BIT(19),	"ECRC"},			\
+	{BIT(20),	"Unsupported Request"}
+
+TRACE_EVENT(aer_event,
+	TP_PROTO(const char *dev_name,
+		 const u32 status,
+		 const u8 severity),
+
+	TP_ARGS(dev_name, status, severity),
+
+	TP_STRUCT__entry(
+		__string(	dev_name,	dev_name	)
+		__field(	u32,		status		)
+		__field(	u8,		severity	)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev_name, dev_name);
+		__entry->status		= status;
+		__entry->severity	= severity;
+	),
+
+	TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
+		__get_str(dev_name),
+		__entry->severity == AER_CORRECTABLE ? "Corrected" :
+			__entry->severity == AER_FATAL ?
+			"Fatal" : "Uncorrected, non-fatal",
+		__entry->severity == AER_CORRECTABLE ?
+		__print_flags(__entry->status, "|", aer_correctable_errors) :
+		__print_flags(__entry->status, "|", aer_uncorrectable_errors))
+);
+
 #endif /* _TRACE_HW_EVENT_MC_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
deleted file mode 100644
index 1c875ad..0000000
--- a/include/trace/events/ras.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM ras
-
-#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_AER_H
-
-#include <linux/tracepoint.h>
-#include <linux/aer.h>
-
-
-/*
- * PCIe AER Trace event
- *
- * These events are generated when hardware detects a corrected or
- * uncorrected event on a PCIe device. The event report has
- * the following structure:
- *
- * char * dev_name -	The name of the slot where the device resides
- *			([domain:]bus:device.function).
- * u32 status -		Either the correctable or uncorrectable register
- *			indicating what error or errors have been seen
- * u8 severity -	error severity 0:NONFATAL 1:FATAL 2:CORRECTED
- */
-
-#define aer_correctable_errors		\
-	{BIT(0),	"Receiver Error"},		\
-	{BIT(6),	"Bad TLP"},			\
-	{BIT(7),	"Bad DLLP"},			\
-	{BIT(8),	"RELAY_NUM Rollover"},		\
-	{BIT(12),	"Replay Timer Timeout"},	\
-	{BIT(13),	"Advisory Non-Fatal"}
-
-#define aer_uncorrectable_errors		\
-	{BIT(4),	"Data Link Protocol"},		\
-	{BIT(12),	"Poisoned TLP"},		\
-	{BIT(13),	"Flow Control Protocol"},	\
-	{BIT(14),	"Completion Timeout"},		\
-	{BIT(15),	"Completer Abort"},		\
-	{BIT(16),	"Unexpected Completion"},	\
-	{BIT(17),	"Receiver Overflow"},		\
-	{BIT(18),	"Malformed TLP"},		\
-	{BIT(19),	"ECRC"},			\
-	{BIT(20),	"Unsupported Request"}
-
-TRACE_EVENT(aer_event,
-	TP_PROTO(const char *dev_name,
-		 const u32 status,
-		 const u8 severity),
-
-	TP_ARGS(dev_name, status, severity),
-
-	TP_STRUCT__entry(
-		__string(	dev_name,	dev_name	)
-		__field(	u32,		status		)
-		__field(	u8,		severity	)
-	),
-
-	TP_fast_assign(
-		__assign_str(dev_name, dev_name);
-		__entry->status		= status;
-		__entry->severity	= severity;
-	),
-
-	TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
-		__get_str(dev_name),
-		__entry->severity == AER_CORRECTABLE ? "Corrected" :
-			__entry->severity == AER_FATAL ?
-			"Fatal" : "Uncorrected, non-fatal",
-		__entry->severity == AER_CORRECTABLE ?
-		__print_flags(__entry->status, "|", aer_correctable_errors) :
-		__print_flags(__entry->status, "|", aer_uncorrectable_errors))
-);
-
-#endif /* _TRACE_AER_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
-- 
2.0.0.rc2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 3/7 v5] CPER: Adjust code flow of some functions
  2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
  2014-06-11  8:34 ` [PATCH 1/7 v5] trace, RAS: Add basic RAS trace event Chen, Gong
  2014-06-11  8:34 ` [PATCH 2/7 v3] trace, AER: Move trace into unified interface Chen, Gong
@ 2014-06-11  8:34 ` Chen, Gong
  2014-06-11  8:34 ` [PATCH 4/7 v2] RAS, debugfs: Add debugfs interface for RAS subsystem Chen, Gong
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 32+ messages in thread
From: Chen, Gong @ 2014-06-11  8:34 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab, rostedt; +Cc: linux-acpi, Chen, Gong

Some codes can be reorganzied as a common function for other usages.

v5 -> v4: minor adjustment for mem/DIMM error record organization.
v4 -> v3: minor adjustment to make output format more gracefully.
v3 -> v2: Fix a bug when calculating string length & minor fix.
v2 -> v1: Use scnprintf to simplify codes.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/firmware/efi/cper.c | 155 ++++++++++++++++++++++++++++----------------
 include/linux/cper.h        |   9 +++
 2 files changed, 109 insertions(+), 55 deletions(-)

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 1491dd4..83b56b61 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -34,6 +34,9 @@
 #include <linux/aer.h>
 
 #define INDENT_SP	" "
+
+static char rcd_decode_str[CPER_REC_LEN];
+
 /*
  * CPER record ID need to be unique even after reboot, because record
  * ID is used as index for ERST storage, while CPER records from
@@ -50,18 +53,19 @@ u64 cper_next_record_id(void)
 }
 EXPORT_SYMBOL_GPL(cper_next_record_id);
 
-static const char *cper_severity_strs[] = {
+static const char * const severity_strs[] = {
 	"recoverable",
 	"fatal",
 	"corrected",
 	"info",
 };
 
-static const char *cper_severity_str(unsigned int severity)
+const char *cper_severity_str(unsigned int severity)
 {
-	return severity < ARRAY_SIZE(cper_severity_strs) ?
-		cper_severity_strs[severity] : "unknown";
+	return severity < ARRAY_SIZE(severity_strs) ?
+		severity_strs[severity] : "unknown";
 }
+EXPORT_SYMBOL_GPL(cper_severity_str);
 
 /*
  * cper_print_bits - print strings for set bits
@@ -100,32 +104,32 @@ void cper_print_bits(const char *pfx, unsigned int bits,
 		printk("%s\n", buf);
 }
 
-static const char * const cper_proc_type_strs[] = {
+static const char * const proc_type_strs[] = {
 	"IA32/X64",
 	"IA64",
 };
 
-static const char * const cper_proc_isa_strs[] = {
+static const char * const proc_isa_strs[] = {
 	"IA32",
 	"IA64",
 	"X64",
 };
 
-static const char * const cper_proc_error_type_strs[] = {
+static const char * const proc_error_type_strs[] = {
 	"cache error",
 	"TLB error",
 	"bus error",
 	"micro-architectural error",
 };
 
-static const char * const cper_proc_op_strs[] = {
+static const char * const proc_op_strs[] = {
 	"unknown or generic",
 	"data read",
 	"data write",
 	"instruction execution",
 };
 
-static const char * const cper_proc_flag_strs[] = {
+static const char * const proc_flag_strs[] = {
 	"restartable",
 	"precise IP",
 	"overflow",
@@ -137,26 +141,26 @@ static void cper_print_proc_generic(const char *pfx,
 {
 	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
 		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
-		       proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
-		       cper_proc_type_strs[proc->proc_type] : "unknown");
+		       proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
+		       proc_type_strs[proc->proc_type] : "unknown");
 	if (proc->validation_bits & CPER_PROC_VALID_ISA)
 		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
-		       proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
-		       cper_proc_isa_strs[proc->proc_isa] : "unknown");
+		       proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
+		       proc_isa_strs[proc->proc_isa] : "unknown");
 	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
 		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
 		cper_print_bits(pfx, proc->proc_error_type,
-				cper_proc_error_type_strs,
-				ARRAY_SIZE(cper_proc_error_type_strs));
+				proc_error_type_strs,
+				ARRAY_SIZE(proc_error_type_strs));
 	}
 	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
 		printk("%s""operation: %d, %s\n", pfx, proc->operation,
-		       proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
-		       cper_proc_op_strs[proc->operation] : "unknown");
+		       proc->operation < ARRAY_SIZE(proc_op_strs) ?
+		       proc_op_strs[proc->operation] : "unknown");
 	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
 		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
-		cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
-				ARRAY_SIZE(cper_proc_flag_strs));
+		cper_print_bits(pfx, proc->flags, proc_flag_strs,
+				ARRAY_SIZE(proc_flag_strs));
 	}
 	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
 		printk("%s""level: %d\n", pfx, proc->level);
@@ -177,7 +181,7 @@ static void cper_print_proc_generic(const char *pfx,
 		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
 }
 
-static const char *cper_mem_err_type_strs[] = {
+static const char * const mem_err_type_strs[] = {
 	"unknown",
 	"no error",
 	"single-bit ECC",
@@ -196,58 +200,99 @@ static const char *cper_mem_err_type_strs[] = {
 	"physical memory map-out event",
 };
 
-static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
+const char *cper_mem_err_type_str(unsigned int etype)
 {
-	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
-		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
-	if (mem->validation_bits & CPER_MEM_VALID_PA)
-		printk("%s""physical_address: 0x%016llx\n",
-		       pfx, mem->physical_addr);
-	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
-		printk("%s""physical_address_mask: 0x%016llx\n",
-		       pfx, mem->physical_addr_mask);
+	return etype < ARRAY_SIZE(mem_err_type_strs) ?
+		mem_err_type_strs[etype] : "unknown";
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
+
+int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
+{
+	u32 len, n;
+
+	if (!msg)
+		return 0;
+
+	n = 0;
+	len = CPER_REC_LEN - 1;
 	if (mem->validation_bits & CPER_MEM_VALID_NODE)
-		pr_debug("node: %d\n", mem->node);
+		n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
 	if (mem->validation_bits & CPER_MEM_VALID_CARD)
-		pr_debug("card: %d\n", mem->card);
+		n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
 	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
-		pr_debug("module: %d\n", mem->module);
+		n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
 	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
-		pr_debug("rank: %d\n", mem->rank);
+		n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
 	if (mem->validation_bits & CPER_MEM_VALID_BANK)
-		pr_debug("bank: %d\n", mem->bank);
+		n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
 	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
-		pr_debug("device: %d\n", mem->device);
+		n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
 	if (mem->validation_bits & CPER_MEM_VALID_ROW)
-		pr_debug("row: %d\n", mem->row);
+		n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
 	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
-		pr_debug("column: %d\n", mem->column);
+		n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
 	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
-		pr_debug("bit_position: %d\n", mem->bit_pos);
+		n += scnprintf(msg + n, len - n, "bit_position: %d ",
+			       mem->bit_pos);
 	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
-		pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id);
+		n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
+			       mem->requestor_id);
 	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
-		pr_debug("responder_id: 0x%016llx\n", mem->responder_id);
+		n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
+			       mem->responder_id);
 	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
-		pr_debug("target_id: 0x%016llx\n", mem->target_id);
+		scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
+			  mem->target_id);
+
+	msg[n] = '\0';
+	return n;
+}
+
+int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
+{
+	u32 len, n;
+	const char *bank = NULL, *device = NULL;
+
+	if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
+		return 0;
+
+	n = 0;
+	len = CPER_REC_LEN - 1;
+	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
+	if (bank && device)
+		n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
+	else
+		n = snprintf(msg, len,
+			     "DIMM location: not present. DMI handle: 0x%.4x ",
+			     mem->mem_dev_handle);
+
+	msg[n] = '\0';
+	return n;
+}
+
+static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
+{
+	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
+		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
+	if (mem->validation_bits & CPER_MEM_VALID_PA)
+		printk("%s""physical_address: 0x%016llx\n",
+		       pfx, mem->physical_addr);
+	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+		printk("%s""physical_address_mask: 0x%016llx\n",
+		       pfx, mem->physical_addr_mask);
+	if (cper_mem_err_location(mem, rcd_decode_str))
+		printk("%s%s\n", pfx, rcd_decode_str);
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 		u8 etype = mem->error_type;
 		printk("%s""error_type: %d, %s\n", pfx, etype,
-		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
-		       cper_mem_err_type_strs[etype] : "unknown");
-	}
-	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
-		const char *bank = NULL, *device = NULL;
-		dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
-		if (bank != NULL && device != NULL)
-			printk("%s""DIMM location: %s %s", pfx, bank, device);
-		else
-			printk("%s""DIMM DMI handle: 0x%.4x",
-			       pfx, mem->mem_dev_handle);
+		       cper_mem_err_type_str(etype));
 	}
+	if (cper_dimm_err_location(mem, rcd_decode_str))
+		printk("%s%s\n", pfx, rcd_decode_str);
 }
 
-static const char *cper_pcie_port_type_strs[] = {
+static const char * const pcie_port_type_strs[] = {
 	"PCIe end point",
 	"legacy PCI end point",
 	"unknown",
@@ -266,8 +311,8 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 {
 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
-		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
-		       cper_pcie_port_type_strs[pcie->port_type] : "unknown");
+		       pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
+		       pcie_port_type_strs[pcie->port_type] : "unknown");
 	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
 		printk("%s""version: %d.%d\n", pfx,
 		       pcie->version.major, pcie->version.minor);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 2fc0ec3..ed088b9 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -36,6 +36,13 @@
 #define CPER_RECORD_REV				0x0100
 
 /*
+ * CPER record length contains the CPER fields which are relevant for further
+ * handling of a memory error in userspace (we don't carry all the fields
+ * defined in the UEFI spec because some of them don't make any sense.)
+ * Currently, a length of 256 should be more than enough.
+ */
+#define CPER_REC_LEN					256
+/*
  * Severity difinition for error_severity in struct cper_record_header
  * and section_severity in struct cper_section_descriptor
  */
@@ -395,6 +402,8 @@ struct cper_sec_pcie {
 #pragma pack()
 
 u64 cper_next_record_id(void);
+const char *cper_severity_str(unsigned int);
+const char *cper_mem_err_type_str(unsigned int);
 void cper_print_bits(const char *prefix, unsigned int bits,
 		     const char * const strs[], unsigned int strs_size);
 
-- 
2.0.0.rc2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 4/7 v2] RAS, debugfs: Add debugfs interface for RAS subsystem
  2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
                   ` (2 preceding siblings ...)
  2014-06-11  8:34 ` [PATCH 3/7 v5] CPER: Adjust code flow of some functions Chen, Gong
@ 2014-06-11  8:34 ` Chen, Gong
  2014-06-11 19:01   ` Borislav Petkov
  2014-06-11  8:34 ` [PATCH 5/7 v7] trace, RAS: Add eMCA trace event interface Chen, Gong
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 32+ messages in thread
From: Chen, Gong @ 2014-06-11  8:34 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab, rostedt; +Cc: linux-acpi, Chen, Gong

Implement a new debugfs interface for RAS susbsystem.
A file named daemon_active is added there accordingly.
This file is used to track if user space daemon enables
perf/trace interface or not. One can track which daemon
opens it via "lsof /path/to/debugfs/ras/daemon_active".

v2 -> v1: Change file access mode from 0444 to 0400.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/ras/Makefile  |  2 +-
 drivers/ras/debugfs.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ras/ras.c     | 14 +++++++++++++
 include/linux/ras.h   | 15 ++++++++++++++
 4 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 drivers/ras/debugfs.c
 create mode 100644 include/linux/ras.h

diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
index 223e806..d7f7334 100644
--- a/drivers/ras/Makefile
+++ b/drivers/ras/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_RAS) += ras.o
+obj-$(CONFIG_RAS) += ras.o debugfs.o
diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c
new file mode 100644
index 0000000..d0bc389
--- /dev/null
+++ b/drivers/ras/debugfs.c
@@ -0,0 +1,57 @@
+#include <linux/debugfs.h>
+
+struct dentry *ras_debugfs_dir;
+EXPORT_SYMBOL_GPL(ras_debugfs_dir);
+
+static atomic_t trace_count = ATOMIC_INIT(0);
+
+int ras_userspace_consumers(void)
+{
+	return atomic_read(&trace_count);
+}
+EXPORT_SYMBOL_GPL(ras_userspace_consumers);
+
+static int trace_show(struct seq_file *m, void *v)
+{
+	return atomic_read(&trace_count);
+}
+
+static int trace_open(struct inode *inode, struct file *file)
+{
+	atomic_inc(&trace_count);
+	return single_open(file, trace_show, NULL);
+}
+
+static int trace_release(struct inode *inode, struct file *file)
+{
+	atomic_dec(&trace_count);
+	return single_release(inode, file);
+}
+
+static const struct file_operations trace_fops = {
+	.open    = trace_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = trace_release,
+};
+
+int __init ras_add_daemon_trace(void)
+{
+	struct dentry *fentry;
+
+	if (!ras_debugfs_dir)
+		return -ENOENT;
+
+	fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir,
+				     NULL, &trace_fops);
+	if (!fentry)
+		return -ENODEV;
+
+	return 0;
+
+}
+
+void __init ras_debugfs_init(void)
+{
+	ras_debugfs_dir = debugfs_create_dir("ras", NULL);
+}
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index b0c6ed1..4cac43a 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -5,8 +5,22 @@
  *	Chen, Gong <gong.chen@linux.intel.com>
  */
 
+#include <linux/init.h>
+#include <linux/ras.h>
+
 #define CREATE_TRACE_POINTS
 #define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
+static int __init ras_init(void)
+{
+	int rc = 0;
+
+	ras_debugfs_init();
+	rc = ras_add_daemon_trace();
+
+	return rc;
+}
+subsys_initcall(ras_init);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
diff --git a/include/linux/ras.h b/include/linux/ras.h
new file mode 100644
index 0000000..af53248
--- /dev/null
+++ b/include/linux/ras.h
@@ -0,0 +1,15 @@
+#ifndef __RAS_H__
+#define __RAS_H__
+
+#ifdef CONFIG_DEBUG_FS
+extern struct dentry *ras_debugfs_dir;
+int ras_userspace_consumers(void);
+void ras_debugfs_init(void);
+int ras_add_daemon_trace(void);
+#else
+static inline int ras_userspace_consumers(void) { return 0; }
+static inline void ras_debugfs_init(void) { return; }
+static inline int ras_add_daemon_trace(void) { return 0; }
+#endif
+
+#endif
-- 
2.0.0.rc2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 5/7 v7] trace, RAS: Add eMCA trace event interface
  2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
                   ` (3 preceding siblings ...)
  2014-06-11  8:34 ` [PATCH 4/7 v2] RAS, debugfs: Add debugfs interface for RAS subsystem Chen, Gong
@ 2014-06-11  8:34 ` Chen, Gong
  2014-06-11 19:02   ` Borislav Petkov
  2014-06-11  8:34 ` [PATCH 6/7 v4] trace, eMCA: Add a knob to adjust where to save event log Chen, Gong
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 32+ messages in thread
From: Chen, Gong @ 2014-06-11  8:34 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab, rostedt; +Cc: linux-acpi, Chen, Gong

Add trace interface to elaborate all H/W error related information.

v7 -> v6: compact trace info to save trace buffer space.
v6 -> v5: format adjustment.
v5 -> v4: Add physical mask(LSB) in trace.
v4 -> v3: change ras trace dependency rule.
v3 -> v2: minor adjustment according to the suggestion from Boris.
v2 -> v1: spinlock is not needed anymore.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/acpi/Kconfig        |  4 ++-
 drivers/acpi/acpi_extlog.c  | 27 ++++++++++++++++---
 drivers/firmware/efi/cper.c | 48 +++++++++++++++++++++++++++++++---
 drivers/ras/ras.c           |  1 +
 include/linux/cper.h        | 21 +++++++++++++++
 include/ras/ras_event.h     | 63 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 156 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index a34a228..099a2d5 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -370,6 +370,7 @@ config ACPI_EXTLOG
 	tristate "Extended Error Log support"
 	depends on X86_MCE && X86_LOCAL_APIC
 	select UEFI_CPER
+	select RAS_TRACE
 	default n
 	help
 	  Certain usages such as Predictive Failure Analysis (PFA) require
@@ -384,6 +385,7 @@ config ACPI_EXTLOG
 
 	  Enhanced MCA Logging allows firmware to provide additional error
 	  information to system software, synchronous with MCE or CMCI. This
-	  driver adds support for that functionality.
+	  driver adds support for that functionality with corresponding
+	  tracepoint which carries that information to userspace.
 
 endif	# ACPI
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 1853341..e61da95 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -16,6 +16,7 @@
 #include <asm/mce.h>
 
 #include "apei/apei-internal.h"
+#include <ras/ras_event.h>
 
 #define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */
 
@@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	struct mce *mce = (struct mce *)data;
 	int	bank = mce->bank;
 	int	cpu = mce->extcpu;
-	struct acpi_generic_status *estatus;
-	int rc;
+	struct acpi_generic_status *estatus, *tmp;
+	struct acpi_generic_data *gdata;
+	const uuid_le *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";
+	uuid_le *sec_type;
+	static u32 err_seq;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
 	if (estatus == NULL)
@@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	/* clear record status to enable BIOS to update it again */
 	estatus->block_status = 0;
 
-	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
+	tmp = (struct acpi_generic_status *)elog_buf;
+	print_extlog_rcd(NULL, tmp, cpu);
+
+	/* log event via trace */
+	err_seq++;
+	gdata = (struct acpi_generic_data *)(tmp + 1);
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+		fru_id = (uuid_le *)gdata->fru_id;
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+		fru_text = gdata->fru_text;
+	sec_type = (uuid_le *)gdata->section_type;
+	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+		struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+		if (gdata->error_data_length >= sizeof(*mem))
+			trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
+					       (u8)gdata->error_severity);
+	}
 
 	return NOTIFY_STOP;
 }
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 83b56b61..85d6d30 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype)
 }
 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
 
-int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
+int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
 	u32 len, n;
 
@@ -249,7 +249,7 @@ int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
 	return n;
 }
 
-int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
+int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
 	u32 len, n;
 	const char *bank = NULL, *device = NULL;
@@ -271,8 +271,47 @@ int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
 	return n;
 }
 
+void cper_mem_err_pack(const struct cper_sec_mem_err *mem, void *data)
+{
+	struct cper_mem_err_compact *cmem = (struct cper_mem_err_compact *)data;
+
+	cmem->validation_bits = mem->validation_bits;
+	cmem->node = mem->node;
+	cmem->card = mem->card;
+	cmem->module = mem->module;
+	cmem->bank = mem->bank;
+	cmem->device = mem->device;
+	cmem->row = mem->row;
+	cmem->column = mem->column;
+	cmem->bit_pos = mem->bit_pos;
+	cmem->requestor_id = mem->requestor_id;
+	cmem->responder_id = mem->responder_id;
+	cmem->target_id = mem->target_id;
+	cmem->rank = mem->rank;
+	cmem->mem_array_handle = mem->mem_array_handle;
+	cmem->mem_dev_handle = mem->mem_dev_handle;
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_pack);
+
+const char *cper_mem_err_unpack(struct trace_seq *p, void *data)
+{
+	struct cper_mem_err_compact *cmem = (struct cper_mem_err_compact *)data;
+	const char *ret = p->buffer + p->len;
+
+	if (cper_mem_err_location(cmem, rcd_decode_str))
+		trace_seq_printf(p, "%s", rcd_decode_str);
+	if (cper_dimm_err_location(cmem, rcd_decode_str))
+		trace_seq_printf(p, "%s", rcd_decode_str);
+	trace_seq_putc(p, '\0');
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_unpack);
+
 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 {
+	struct cper_mem_err_compact cmem;
+
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
 		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
 	if (mem->validation_bits & CPER_MEM_VALID_PA)
@@ -281,14 +320,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
 		printk("%s""physical_address_mask: 0x%016llx\n",
 		       pfx, mem->physical_addr_mask);
-	if (cper_mem_err_location(mem, rcd_decode_str))
+	cper_mem_err_pack(mem, &cmem);
+	if (cper_mem_err_location(&cmem, rcd_decode_str))
 		printk("%s%s\n", pfx, rcd_decode_str);
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 		u8 etype = mem->error_type;
 		printk("%s""error_type: %d, %s\n", pfx, etype,
 		       cper_mem_err_type_str(etype));
 	}
-	if (cper_dimm_err_location(mem, rcd_decode_str))
+	if (cper_dimm_err_location(&cmem, rcd_decode_str))
 		printk("%s%s\n", pfx, rcd_decode_str);
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 4cac43a..da227a3 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -23,4 +23,5 @@ static int __init ras_init(void)
 }
 subsys_initcall(ras_init);
 
+EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index ed088b9..3548160 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -22,6 +22,7 @@
 #define LINUX_CPER_H
 
 #include <linux/uuid.h>
+#include <linux/trace_seq.h>
 
 /* CPER record signature and the size */
 #define CPER_SIG_RECORD				"CPER"
@@ -363,6 +364,24 @@ struct cper_sec_mem_err {
 	__u16	mem_dev_handle;		/* module handle in UEFI 2.4 */
 };
 
+struct cper_mem_err_compact {
+	__u64	validation_bits;
+	__u16	node;
+	__u16	card;
+	__u16	module;
+	__u16	bank;
+	__u16	device;
+	__u16	row;
+	__u16	column;
+	__u16	bit_pos;
+	__u64	requestor_id;
+	__u64	responder_id;
+	__u64	target_id;
+	__u16	rank;
+	__u16	mem_array_handle;
+	__u16	mem_dev_handle;
+};
+
 struct cper_sec_pcie {
 	__u64		validation_bits;
 	__u32		port_type;
@@ -406,5 +425,7 @@ const char *cper_severity_str(unsigned int);
 const char *cper_mem_err_type_str(unsigned int);
 void cper_print_bits(const char *prefix, unsigned int bits,
 		     const char * const strs[], unsigned int strs_size);
+void cper_mem_err_pack(const struct cper_sec_mem_err *, void *);
+const char *cper_mem_err_unpack(struct trace_seq *, void *);
 
 #endif
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index acbcbb8..c5e58db 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -9,6 +9,69 @@
 #include <linux/edac.h>
 #include <linux/ktime.h>
 #include <linux/aer.h>
+#include <linux/cper.h>
+
+/*
+ * MCE Extended Error Log trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ */
+
+/* memory trace event */
+
+TRACE_EVENT(extlog_mem_event,
+	TP_PROTO(struct cper_sec_mem_err *mem,
+		 u32 err_seq,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 u8 sev),
+
+	TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
+
+	TP_STRUCT__entry(
+		__field(u32, err_seq)
+		__field(u8, etype)
+		__field(u8, sev)
+		__field(u64, pa)
+		__field(u8, pa_mask_lsb)
+		__array(u8, fru_id, 40)
+		__string(fru_text, fru_text)
+		__array(u8, data, sizeof(struct cper_mem_err_compact))
+	),
+
+	TP_fast_assign(
+		__entry->err_seq = err_seq;
+		if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+			__entry->etype = mem->error_type;
+		else
+			__entry->etype = ~0;
+		__entry->sev = sev;
+		if (mem->validation_bits & CPER_MEM_VALID_PA)
+			__entry->pa = mem->physical_addr;
+		else
+			__entry->pa = ~0ull;
+
+		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+			__entry->pa_mask_lsb =
+				(u8)__ffs64(mem->physical_addr_mask);
+		else
+			__entry->pa_mask_lsb = ~0;
+		snprintf(__entry->fru_id, 39, "%pUl", fru_id);
+		__assign_str(fru_text, fru_text);
+		cper_mem_err_pack(mem, __entry->data);
+	),
+
+	TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %s %.20s",
+		  __entry->err_seq,
+		  cper_severity_str(__entry->sev),
+		  cper_mem_err_type_str(__entry->etype),
+		  __entry->pa,
+		  __entry->pa_mask_lsb,
+		  cper_mem_err_unpack(p, __entry->data),
+		  __entry->fru_id,
+		  __get_str(fru_text))
+);
 
 /*
  * Hardware Events Report
-- 
2.0.0.rc2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 6/7 v4] trace, eMCA: Add a knob to adjust where to save event log
  2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
                   ` (4 preceding siblings ...)
  2014-06-11  8:34 ` [PATCH 5/7 v7] trace, RAS: Add eMCA trace event interface Chen, Gong
@ 2014-06-11  8:34 ` Chen, Gong
  2014-06-11  8:34 ` [PATCH 7/7] RAS, extlog: Adjust init flow Chen, Gong
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 32+ messages in thread
From: Chen, Gong @ 2014-06-11  8:34 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab, rostedt; +Cc: linux-acpi, Chen, Gong

To avoid saving two copies for one H/W event, add a new
file under debugfs to control how to save event log.
Once this file is opened, the perf/trace will be used,
in the meanwhile, kernel will stop to print event log
to the console. On the other hand, if this file is closed,
kernel will print event log to the console again.

v4 -> v3: format adjustment.
v3 -> v2: minor adjustment to make flow cleanly.
v2 -> v1: move counter operation from *read* to *open*.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/acpi/acpi_extlog.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index e61da95..a99d4a6 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,6 +12,7 @@
 #include <linux/cper.h>
 #include <linux/ratelimit.h>
 #include <linux/edac.h>
+#include <linux/ras.h>
 #include <asm/cpu.h>
 #include <asm/mce.h>
 
@@ -154,7 +155,11 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	estatus->block_status = 0;
 
 	tmp = (struct acpi_generic_status *)elog_buf;
-	print_extlog_rcd(NULL, tmp, cpu);
+
+	if (!ras_userspace_consumers()) {
+		print_extlog_rcd(NULL, tmp, cpu);
+		goto out;
+	}
 
 	/* log event via trace */
 	err_seq++;
@@ -171,6 +176,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 					       (u8)gdata->error_severity);
 	}
 
+out:
 	return NOTIFY_STOP;
 }
 
-- 
2.0.0.rc2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 7/7] RAS, extlog: Adjust init flow
  2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
                   ` (5 preceding siblings ...)
  2014-06-11  8:34 ` [PATCH 6/7 v4] trace, eMCA: Add a knob to adjust where to save event log Chen, Gong
@ 2014-06-11  8:34 ` Chen, Gong
  2014-06-11 21:33 ` New eMCA trace event interface V4 Luck, Tony
  2014-06-12  6:11 ` [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface Chen, Gong
  8 siblings, 0 replies; 32+ messages in thread
From: Chen, Gong @ 2014-06-11  8:34 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab, rostedt; +Cc: linux-acpi, Chen, Gong

Unless the platform has eMCA related capability, don't
need to check if there is conflict with EDAC driver.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/acpi/acpi_extlog.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index a99d4a6..0ad6f38 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -223,19 +223,16 @@ static int __init extlog_init(void)
 	u64 cap;
 	int rc;
 
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
+
+	if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr())
+		return -ENODEV;
+
 	if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
 		pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
 		return -EPERM;
 	}
 
-	rc = -ENODEV;
-	rdmsrl(MSR_IA32_MCG_CAP, cap);
-	if (!(cap & MCG_ELOG_P))
-		return rc;
-
-	if (!extlog_get_l1addr())
-		return rc;
-
 	rc = -EINVAL;
 	/* get L1 header to fetch necessary information */
 	l1_hdr_size = sizeof(struct extlog_l1_head);
-- 
2.0.0.rc2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 1/7 v5] trace, RAS: Add basic RAS trace event
  2014-06-11  8:34 ` [PATCH 1/7 v5] trace, RAS: Add basic RAS trace event Chen, Gong
@ 2014-06-11 18:59   ` Borislav Petkov
  0 siblings, 0 replies; 32+ messages in thread
From: Borislav Petkov @ 2014-06-11 18:59 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, m.chehab, rostedt, linux-acpi, lkml

On Wed, Jun 11, 2014 at 04:34:45AM -0400, Chen, Gong wrote:
> To avoid confuision and conflict of usage for RAS related trace event,
> add an unified RAS trace event stub.
> 
> v5 -> v4: remove explicit RAS menuconfig.
> v4 -> v3: change dependency rule of RAS_TRACE.
> v3 -> v2: fix dependency in Kconfig.
> v2 -> v1: adjust Kconfig to take RAS as a separate subsystem.

Let's simplify it a little - I've dropped RAS_TRACE for now. We can
carve it out later, when needed.

---
From: "Chen, Gong" <gong.chen@linux.intel.com>
Subject: [PATCH 1/7 v5] trace, RAS: Add basic RAS trace event

To avoid confuision and conflict of usage for RAS related trace event,
add an unified RAS trace event stub.

Start a RAS subsystem menu which will be fleshed out in time, when more
features get added to it.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1402475691-30045-2-git-send-email-gong.chen@linux.intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/Kconfig        |  2 ++
 drivers/Makefile       |  1 +
 drivers/edac/Kconfig   |  1 +
 drivers/edac/edac_mc.c |  3 ---
 drivers/ras/Kconfig    |  6 ++++++
 drivers/ras/Makefile   |  1 +
 drivers/ras/ras.c      | 12 ++++++++++++
 7 files changed, 23 insertions(+), 3 deletions(-)
 create mode 100644 drivers/ras/Kconfig
 create mode 100644 drivers/ras/Makefile
 create mode 100644 drivers/ras/ras.c

Index: linux/drivers/Kconfig
===================================================================
--- linux.orig/drivers/Kconfig	2014-06-11 17:14:23.782437196 +0200
+++ linux/drivers/Kconfig	2014-06-11 17:14:23.770437196 +0200
@@ -176,4 +176,6 @@ source "drivers/powercap/Kconfig"
 
 source "drivers/mcb/Kconfig"
 
+source "drivers/ras/Kconfig"
+
 endmenu
Index: linux/drivers/Makefile
===================================================================
--- linux.orig/drivers/Makefile	2014-06-11 17:14:23.782437196 +0200
+++ linux/drivers/Makefile	2014-06-11 17:14:23.770437196 +0200
@@ -158,3 +158,4 @@ obj-$(CONFIG_NTB)		+= ntb/
 obj-$(CONFIG_FMC)		+= fmc/
 obj-$(CONFIG_POWERCAP)		+= powercap/
 obj-$(CONFIG_MCB)		+= mcb/
+obj-$(CONFIG_RAS)		+= ras/
Index: linux/drivers/edac/Kconfig
===================================================================
--- linux.orig/drivers/edac/Kconfig	2014-06-11 17:14:23.782437196 +0200
+++ linux/drivers/edac/Kconfig	2014-06-11 17:24:18.142427373 +0200
@@ -72,6 +72,7 @@ config EDAC_MCE_INJ
 
 config EDAC_MM_EDAC
 	tristate "Main Memory EDAC (Error Detection And Correction) reporting"
+	select RAS
 	help
 	  Some systems are able to detect and correct errors in main
 	  memory.  EDAC can report statistics on memory error
Index: linux/drivers/edac/edac_mc.c
===================================================================
--- linux.orig/drivers/edac/edac_mc.c	2014-06-11 17:14:23.782437196 +0200
+++ linux/drivers/edac/edac_mc.c	2014-06-11 17:14:23.770437196 +0200
@@ -33,9 +33,6 @@
 #include <asm/edac.h>
 #include "edac_core.h"
 #include "edac_module.h"
-
-#define CREATE_TRACE_POINTS
-#define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
 /* lock to memory controller's control array */
Index: linux/drivers/ras/Kconfig
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/drivers/ras/Kconfig	2014-06-11 17:24:00.846427659 +0200
@@ -0,0 +1,2 @@
+config RAS
+	bool
Index: linux/drivers/ras/Makefile
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/drivers/ras/Makefile	2014-06-11 17:14:23.774437196 +0200
@@ -0,0 +1 @@
+obj-$(CONFIG_RAS) += ras.o
Index: linux/drivers/ras/ras.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/drivers/ras/ras.c	2014-06-11 17:14:23.774437196 +0200
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Authors:
+ *	Chen, Gong <gong.chen@linux.intel.com>
+ */
+
+#define CREATE_TRACE_POINTS
+#define TRACE_INCLUDE_PATH ../../include/ras
+#include <ras/ras_event.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
--

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 2/7 v3] trace, AER: Move trace into unified interface
  2014-06-11  8:34 ` [PATCH 2/7 v3] trace, AER: Move trace into unified interface Chen, Gong
@ 2014-06-11 19:00   ` Borislav Petkov
  0 siblings, 0 replies; 32+ messages in thread
From: Borislav Petkov @ 2014-06-11 19:00 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, m.chehab, rostedt, linux-acpi, lkml

On Wed, Jun 11, 2014 at 04:34:46AM -0400, Chen, Gong wrote:
> AER uses a separate trace interface by now. To make it
> consistent, move it into unified RAS trace interface.
> 
> v3 -> v2: change dependency rule of RAS_TRACE.
> v2 -> v1: remove unnecessary dependency in drivers/ras/Kconfig.
> 
> Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
> ---
>  drivers/pci/pcie/aer/Kconfig           |  1 +
>  drivers/pci/pcie/aer/aerdrv_errprint.c |  4 +-
>  include/ras/ras_event.h                | 64 ++++++++++++++++++++++++++++
>  include/trace/events/ras.h             | 77 ----------------------------------
>  4 files changed, 66 insertions(+), 80 deletions(-)
>  delete mode 100644 include/trace/events/ras.h
> 
> diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig
> index 50e94e0..c611384 100644
> --- a/drivers/pci/pcie/aer/Kconfig
> +++ b/drivers/pci/pcie/aer/Kconfig
> @@ -5,6 +5,7 @@
>  config PCIEAER
>  	boolean "Root Port Advanced Error Reporting support"
>  	depends on PCIEPORTBUS
> +	select RAS_TRACE
>  	default y
>  	help
>  	  This enables PCI Express Root Port Advanced Error Reporting

With this hunk changed to

Index: b/drivers/pci/pcie/aer/Kconfig
===================================================================
--- a/drivers/pci/pcie/aer/Kconfig      2014-06-11 17:33:57.298417802 +0200
+++ b/drivers/pci/pcie/aer/Kconfig      2014-06-11 17:34:16.302417487 +0200
@@ -5,6 +5,7 @@
 config PCIEAER
        boolean "Root Port Advanced Error Reporting support"
        depends on PCIEPORTBUS
+       select RAS
        default y
        help
          This enables PCI Express Root Port Advanced Error Reporting
--

Acked-by: Borislav Petkov <bp@suse.de>

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/7 v2] RAS, debugfs: Add debugfs interface for RAS subsystem
  2014-06-11  8:34 ` [PATCH 4/7 v2] RAS, debugfs: Add debugfs interface for RAS subsystem Chen, Gong
@ 2014-06-11 19:01   ` Borislav Petkov
  0 siblings, 0 replies; 32+ messages in thread
From: Borislav Petkov @ 2014-06-11 19:01 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, m.chehab, rostedt, linux-acpi, lkml

On Wed, Jun 11, 2014 at 04:34:48AM -0400, Chen, Gong wrote:
> Implement a new debugfs interface for RAS susbsystem.
> A file named daemon_active is added there accordingly.
> This file is used to track if user space daemon enables
> perf/trace interface or not. One can track which daemon
> opens it via "lsof /path/to/debugfs/ras/daemon_active".
> 
> v2 -> v1: Change file access mode from 0444 to 0400.
> 
> Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
> ---
>  drivers/ras/Makefile  |  2 +-
>  drivers/ras/debugfs.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/ras/ras.c     | 14 +++++++++++++
>  include/linux/ras.h   | 15 ++++++++++++++
>  4 files changed, 87 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/ras/debugfs.c
>  create mode 100644 include/linux/ras.h
> 
> diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
> index 223e806..d7f7334 100644
> --- a/drivers/ras/Makefile
> +++ b/drivers/ras/Makefile
> @@ -1 +1 @@
> -obj-$(CONFIG_RAS) += ras.o
> +obj-$(CONFIG_RAS) += ras.o debugfs.o
> diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c
> new file mode 100644
> index 0000000..d0bc389
> --- /dev/null
> +++ b/drivers/ras/debugfs.c
> @@ -0,0 +1,57 @@
> +#include <linux/debugfs.h>
> +
> +struct dentry *ras_debugfs_dir;
> +EXPORT_SYMBOL_GPL(ras_debugfs_dir);

No need to export this. Revised version below:

---
From: "Chen, Gong" <gong.chen@linux.intel.com>

Implement a new debugfs interface for RAS susbsystem.
A file named daemon_active is added there accordingly.
This file is used to track if user space daemon accesses
perf/trace interface or not. One can track which daemon
opens it via "lsof /path/to/debugfs/ras/daemon_active".

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1402475691-30045-5-git-send-email-gong.chen@linux.intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/ras/Makefile  |  2 +-
 drivers/ras/debugfs.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ras/ras.c     | 14 +++++++++++++
 include/linux/ras.h   | 15 ++++++++++++++
 4 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 drivers/ras/debugfs.c
 create mode 100644 include/linux/ras.h

Index: linux/drivers/ras/Makefile
===================================================================
--- linux.orig/drivers/ras/Makefile	2014-06-11 17:54:21.738397566 +0200
+++ linux/drivers/ras/Makefile	2014-06-11 17:54:21.726397566 +0200
@@ -1 +1 @@
-obj-$(CONFIG_RAS) += ras.o
+obj-$(CONFIG_RAS) += ras.o debugfs.o
Index: linux/drivers/ras/debugfs.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/drivers/ras/debugfs.c	2014-06-11 17:58:47.214393178 +0200
@@ -0,0 +1,56 @@
+#include <linux/debugfs.h>
+
+static struct dentry *ras_debugfs_dir;
+
+static atomic_t trace_count = ATOMIC_INIT(0);
+
+int ras_userspace_consumers(void)
+{
+	return atomic_read(&trace_count);
+}
+EXPORT_SYMBOL_GPL(ras_userspace_consumers);
+
+static int trace_show(struct seq_file *m, void *v)
+{
+	return atomic_read(&trace_count);
+}
+
+static int trace_open(struct inode *inode, struct file *file)
+{
+	atomic_inc(&trace_count);
+	return single_open(file, trace_show, NULL);
+}
+
+static int trace_release(struct inode *inode, struct file *file)
+{
+	atomic_dec(&trace_count);
+	return single_release(inode, file);
+}
+
+static const struct file_operations trace_fops = {
+	.open    = trace_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = trace_release,
+};
+
+int __init ras_add_daemon_trace(void)
+{
+	struct dentry *fentry;
+
+	if (!ras_debugfs_dir)
+		return -ENOENT;
+
+	fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir,
+				     NULL, &trace_fops);
+	if (!fentry)
+		return -ENODEV;
+
+	return 0;
+
+}
+
+void __init ras_debugfs_init(void)
+{
+	ras_debugfs_dir = debugfs_create_dir("ras", NULL);
+}
Index: linux/drivers/ras/ras.c
===================================================================
--- linux.orig/drivers/ras/ras.c	2014-06-11 17:54:21.738397566 +0200
+++ linux/drivers/ras/ras.c	2014-06-11 17:54:21.730397566 +0200
@@ -5,8 +5,22 @@
  *	Chen, Gong <gong.chen@linux.intel.com>
  */
 
+#include <linux/init.h>
+#include <linux/ras.h>
+
 #define CREATE_TRACE_POINTS
 #define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
+static int __init ras_init(void)
+{
+	int rc = 0;
+
+	ras_debugfs_init();
+	rc = ras_add_daemon_trace();
+
+	return rc;
+}
+subsys_initcall(ras_init);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
Index: linux/include/linux/ras.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/include/linux/ras.h	2014-06-11 17:58:43.350393242 +0200
@@ -0,0 +1,14 @@
+#ifndef __RAS_H__
+#define __RAS_H__
+
+#ifdef CONFIG_DEBUG_FS
+int ras_userspace_consumers(void);
+void ras_debugfs_init(void);
+int ras_add_daemon_trace(void);
+#else
+static inline int ras_userspace_consumers(void) { return 0; }
+static inline void ras_debugfs_init(void) { return; }
+static inline int ras_add_daemon_trace(void) { return 0; }
+#endif
+
+#endif

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 v7] trace, RAS: Add eMCA trace event interface
  2014-06-11  8:34 ` [PATCH 5/7 v7] trace, RAS: Add eMCA trace event interface Chen, Gong
@ 2014-06-11 19:02   ` Borislav Petkov
  2014-06-12  2:42     ` Chen, Gong
  0 siblings, 1 reply; 32+ messages in thread
From: Borislav Petkov @ 2014-06-11 19:02 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, m.chehab, rostedt, linux-acpi, lkml

On Wed, Jun 11, 2014 at 04:34:49AM -0400, Chen, Gong wrote:
> Add trace interface to elaborate all H/W error related information.
> 
> v7 -> v6: compact trace info to save trace buffer space.
> v6 -> v5: format adjustment.
> v5 -> v4: Add physical mask(LSB) in trace.
> v4 -> v3: change ras trace dependency rule.
> v3 -> v2: minor adjustment according to the suggestion from Boris.
> v2 -> v1: spinlock is not needed anymore.
> 
> Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
> ---
>  drivers/acpi/Kconfig        |  4 ++-
>  drivers/acpi/acpi_extlog.c  | 27 ++++++++++++++++---
>  drivers/firmware/efi/cper.c | 48 +++++++++++++++++++++++++++++++---
>  drivers/ras/ras.c           |  1 +
>  include/linux/cper.h        | 21 +++++++++++++++
>  include/ras/ras_event.h     | 63 +++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 156 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> index a34a228..099a2d5 100644
> --- a/drivers/acpi/Kconfig
> +++ b/drivers/acpi/Kconfig
> @@ -370,6 +370,7 @@ config ACPI_EXTLOG
>  	tristate "Extended Error Log support"
>  	depends on X86_MCE && X86_LOCAL_APIC
>  	select UEFI_CPER
> +	select RAS_TRACE
>  	default n
>  	help
>  	  Certain usages such as Predictive Failure Analysis (PFA) require
> @@ -384,6 +385,7 @@ config ACPI_EXTLOG
>  
>  	  Enhanced MCA Logging allows firmware to provide additional error
>  	  information to system software, synchronous with MCE or CMCI. This
> -	  driver adds support for that functionality.
> +	  driver adds support for that functionality with corresponding
> +	  tracepoint which carries that information to userspace.
>  
>  endif	# ACPI
> diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
> index 1853341..e61da95 100644
> --- a/drivers/acpi/acpi_extlog.c
> +++ b/drivers/acpi/acpi_extlog.c
> @@ -16,6 +16,7 @@
>  #include <asm/mce.h>
>  
>  #include "apei/apei-internal.h"
> +#include <ras/ras_event.h>
>  
>  #define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */
>  
> @@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
>  	struct mce *mce = (struct mce *)data;
>  	int	bank = mce->bank;
>  	int	cpu = mce->extcpu;
> -	struct acpi_generic_status *estatus;
> -	int rc;
> +	struct acpi_generic_status *estatus, *tmp;
> +	struct acpi_generic_data *gdata;
> +	const uuid_le *fru_id = &NULL_UUID_LE;
> +	char *fru_text = "";
> +	uuid_le *sec_type;
> +	static u32 err_seq;
>  
>  	estatus = extlog_elog_entry_check(cpu, bank);
>  	if (estatus == NULL)
> @@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
>  	/* clear record status to enable BIOS to update it again */
>  	estatus->block_status = 0;
>  
> -	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
> +	tmp = (struct acpi_generic_status *)elog_buf;
> +	print_extlog_rcd(NULL, tmp, cpu);
> +
> +	/* log event via trace */
> +	err_seq++;
> +	gdata = (struct acpi_generic_data *)(tmp + 1);
> +	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
> +		fru_id = (uuid_le *)gdata->fru_id;
> +	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
> +		fru_text = gdata->fru_text;
> +	sec_type = (uuid_le *)gdata->section_type;
> +	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
> +		struct cper_sec_mem_err *mem = (void *)(gdata + 1);
> +		if (gdata->error_data_length >= sizeof(*mem))
> +			trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
> +					       (u8)gdata->error_severity);
> +	}
>  
>  	return NOTIFY_STOP;
>  }
> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
> index 83b56b61..85d6d30 100644
> --- a/drivers/firmware/efi/cper.c
> +++ b/drivers/firmware/efi/cper.c
> @@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype)
>  }
>  EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
>  
> -int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
> +int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
>  {
>  	u32 len, n;
>  
> @@ -249,7 +249,7 @@ int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
>  	return n;
>  }
>  
> -int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
> +int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
>  {
>  	u32 len, n;
>  	const char *bank = NULL, *device = NULL;
> @@ -271,8 +271,47 @@ int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
>  	return n;
>  }
>  
> +void cper_mem_err_pack(const struct cper_sec_mem_err *mem, void *data)
> +{
> +	struct cper_mem_err_compact *cmem = (struct cper_mem_err_compact *)data;
> +
> +	cmem->validation_bits = mem->validation_bits;
> +	cmem->node = mem->node;
> +	cmem->card = mem->card;
> +	cmem->module = mem->module;
> +	cmem->bank = mem->bank;
> +	cmem->device = mem->device;
> +	cmem->row = mem->row;
> +	cmem->column = mem->column;
> +	cmem->bit_pos = mem->bit_pos;
> +	cmem->requestor_id = mem->requestor_id;
> +	cmem->responder_id = mem->responder_id;
> +	cmem->target_id = mem->target_id;
> +	cmem->rank = mem->rank;
> +	cmem->mem_array_handle = mem->mem_array_handle;
> +	cmem->mem_dev_handle = mem->mem_dev_handle;
> +}
> +EXPORT_SYMBOL_GPL(cper_mem_err_pack);

Why do we export this one and the one below? What .config warrants this?

CONFIG_ACPI_EXTLOG=m doesn't need them, AFAICT.

> +const char *cper_mem_err_unpack(struct trace_seq *p, void *data)
> +{
> +	struct cper_mem_err_compact *cmem = (struct cper_mem_err_compact *)data;
> +	const char *ret = p->buffer + p->len;
> +
> +	if (cper_mem_err_location(cmem, rcd_decode_str))
> +		trace_seq_printf(p, "%s", rcd_decode_str);
> +	if (cper_dimm_err_location(cmem, rcd_decode_str))
> +		trace_seq_printf(p, "%s", rcd_decode_str);
> +	trace_seq_putc(p, '\0');
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(cper_mem_err_unpack);
> +
>  static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
>  {
> +	struct cper_mem_err_compact cmem;
> +
>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
>  		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
>  	if (mem->validation_bits & CPER_MEM_VALID_PA)
> @@ -281,14 +320,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
>  	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
>  		printk("%s""physical_address_mask: 0x%016llx\n",
>  		       pfx, mem->physical_addr_mask);
> -	if (cper_mem_err_location(mem, rcd_decode_str))
> +	cper_mem_err_pack(mem, &cmem);
> +	if (cper_mem_err_location(&cmem, rcd_decode_str))
>  		printk("%s%s\n", pfx, rcd_decode_str);
>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
>  		u8 etype = mem->error_type;
>  		printk("%s""error_type: %d, %s\n", pfx, etype,
>  		       cper_mem_err_type_str(etype));
>  	}
> -	if (cper_dimm_err_location(mem, rcd_decode_str))
> +	if (cper_dimm_err_location(&cmem, rcd_decode_str))
>  		printk("%s%s\n", pfx, rcd_decode_str);
>  }
>  
> diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
> index 4cac43a..da227a3 100644
> --- a/drivers/ras/ras.c
> +++ b/drivers/ras/ras.c
> @@ -23,4 +23,5 @@ static int __init ras_init(void)
>  }
>  subsys_initcall(ras_init);
>  
> +EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
> diff --git a/include/linux/cper.h b/include/linux/cper.h
> index ed088b9..3548160 100644
> --- a/include/linux/cper.h
> +++ b/include/linux/cper.h
> @@ -22,6 +22,7 @@
>  #define LINUX_CPER_H
>  
>  #include <linux/uuid.h>
> +#include <linux/trace_seq.h>
>  
>  /* CPER record signature and the size */
>  #define CPER_SIG_RECORD				"CPER"
> @@ -363,6 +364,24 @@ struct cper_sec_mem_err {
>  	__u16	mem_dev_handle;		/* module handle in UEFI 2.4 */
>  };
>  
> +struct cper_mem_err_compact {
> +	__u64	validation_bits;
> +	__u16	node;
> +	__u16	card;
> +	__u16	module;
> +	__u16	bank;
> +	__u16	device;
> +	__u16	row;
> +	__u16	column;
> +	__u16	bit_pos;
> +	__u64	requestor_id;
> +	__u64	responder_id;
> +	__u64	target_id;
> +	__u16	rank;
> +	__u16	mem_array_handle;
> +	__u16	mem_dev_handle;
> +};
> +
>  struct cper_sec_pcie {
>  	__u64		validation_bits;
>  	__u32		port_type;
> @@ -406,5 +425,7 @@ const char *cper_severity_str(unsigned int);
>  const char *cper_mem_err_type_str(unsigned int);
>  void cper_print_bits(const char *prefix, unsigned int bits,
>  		     const char * const strs[], unsigned int strs_size);
> +void cper_mem_err_pack(const struct cper_sec_mem_err *, void *);
> +const char *cper_mem_err_unpack(struct trace_seq *, void *);
>  
>  #endif
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index acbcbb8..c5e58db 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -9,6 +9,69 @@
>  #include <linux/edac.h>
>  #include <linux/ktime.h>
>  #include <linux/aer.h>
> +#include <linux/cper.h>
> +
> +/*
> + * MCE Extended Error Log trace event
> + *
> + * These events are generated when hardware detects a corrected or
> + * uncorrected event.
> + */
> +
> +/* memory trace event */
> +
> +TRACE_EVENT(extlog_mem_event,
> +	TP_PROTO(struct cper_sec_mem_err *mem,
> +		 u32 err_seq,
> +		 const uuid_le *fru_id,
> +		 const char *fru_text,
> +		 u8 sev),
> +
> +	TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
> +
> +	TP_STRUCT__entry(
> +		__field(u32, err_seq)
> +		__field(u8, etype)
> +		__field(u8, sev)
> +		__field(u64, pa)
> +		__field(u8, pa_mask_lsb)
> +		__array(u8, fru_id, 40)

How did you come up with this magic number? Why isn't that sizeof(uuid_le)?

> +		__string(fru_text, fru_text)
> +		__array(u8, data, sizeof(struct cper_mem_err_compact))
> +	),
> +
> +	TP_fast_assign(
> +		__entry->err_seq = err_seq;
> +		if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
> +			__entry->etype = mem->error_type;
> +		else
> +			__entry->etype = ~0;
> +		__entry->sev = sev;
> +		if (mem->validation_bits & CPER_MEM_VALID_PA)
> +			__entry->pa = mem->physical_addr;
> +		else
> +			__entry->pa = ~0ull;
> +
> +		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
> +			__entry->pa_mask_lsb =
> +				(u8)__ffs64(mem->physical_addr_mask);

No need for the linebreak here - just let it stick out.

> +		else
> +			__entry->pa_mask_lsb = ~0;
> +		snprintf(__entry->fru_id, 39, "%pUl", fru_id);

Yeah, I didn't catch the reasoning behind why we need to convert the FRU
into a string and not leave it simply as u8[16]...

> +		__assign_str(fru_text, fru_text);
> +		cper_mem_err_pack(mem, __entry->data);
> +	),
> +
> +	TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %s %.20s",
> +		  __entry->err_seq,
> +		  cper_severity_str(__entry->sev),
> +		  cper_mem_err_type_str(__entry->etype),
> +		  __entry->pa,
> +		  __entry->pa_mask_lsb,
> +		  cper_mem_err_unpack(p, __entry->data),
> +		  __entry->fru_id,
> +		  __get_str(fru_text))
> +);
>  
>  /*
>   * Hardware Events Report
> -- 
> 2.0.0.rc2
> 
> 

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 32+ messages in thread

* RE: New eMCA trace event interface V4
  2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
                   ` (6 preceding siblings ...)
  2014-06-11  8:34 ` [PATCH 7/7] RAS, extlog: Adjust init flow Chen, Gong
@ 2014-06-11 21:33 ` Luck, Tony
  2014-06-12  6:11 ` [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface Chen, Gong
  8 siblings, 0 replies; 32+ messages in thread
From: Luck, Tony @ 2014-06-11 21:33 UTC (permalink / raw)
  To: Chen, Gong, bp, m.chehab, rostedt; +Cc: linux-acpi

Let's try and close this out.

I made a branch in the ras git tree and applied the first few parts:

	git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git eMCA-trace

I took Boris' modified versions of parts 1 & 4, and his alternate hunk for part 2.
I also assumed no comment on part 3 to be assent, so applied that too.

Gong: when you see Boris' comments on part5 - you can just resend an update
based on the this tree.

-Tony

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 v7] trace, RAS: Add eMCA trace event interface
  2014-06-11 19:02   ` Borislav Petkov
@ 2014-06-12  2:42     ` Chen, Gong
  0 siblings, 0 replies; 32+ messages in thread
From: Chen, Gong @ 2014-06-12  2:42 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: tony.luck, m.chehab, rostedt, linux-acpi, lkml

[-- Attachment #1: Type: text/plain, Size: 898 bytes --]

On Wed, Jun 11, 2014 at 09:02:15PM +0200, Borislav Petkov wrote:
> > +EXPORT_SYMBOL_GPL(cper_mem_err_pack);
> 
> Why do we export this one and the one below? What .config warrants this?
> 
> CONFIG_ACPI_EXTLOG=m doesn't need them, AFAICT.
> 
Right. acpi_extlog doesn't use it. They can be exported later until needed.

> > +	TP_STRUCT__entry(
> > +		__field(u32, err_seq)
> > +		__field(u8, etype)
> > +		__field(u8, sev)
> > +		__field(u64, pa)
> > +		__field(u8, pa_mask_lsb)
> > +		__array(u8, fru_id, 40)
> 
> How did you come up with this magic number? Why isn't that sizeof(uuid_le)?
Cause I want to convert it into a string.

> > +		snprintf(__entry->fru_id, 39, "%pUl", fru_id);
> 
> Yeah, I didn't catch the reasoning behind why we need to convert the FRU
> into a string and not leave it simply as u8[16]...
Fair enough. It can be compressed a little bit more.


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface
  2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
                   ` (7 preceding siblings ...)
  2014-06-11 21:33 ` New eMCA trace event interface V4 Luck, Tony
@ 2014-06-12  6:11 ` Chen, Gong
  2014-06-12 13:28   ` Steven Rostedt
  8 siblings, 1 reply; 32+ messages in thread
From: Chen, Gong @ 2014-06-12  6:11 UTC (permalink / raw)
  To: tony.luck; +Cc: bp, m.chehab, rostedt, linux-acpi, Chen, Gong

Add trace interface to elaborate all H/W error related information.

v7 -> v6: compact trace info to save trace buffer space.
v6 -> v5: format adjustment.
v5 -> v4: Add physical mask(LSB) in trace.
v4 -> v3: change ras trace dependency rule.
v3 -> v2: minor adjustment according to the suggestion from Boris.
v2 -> v1: spinlock is not needed anymore.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/acpi/Kconfig        |  4 ++-
 drivers/acpi/acpi_extlog.c  | 27 +++++++++++++++++---
 drivers/firmware/efi/cper.c | 46 ++++++++++++++++++++++++++++++---
 drivers/ras/ras.c           |  1 +
 include/linux/cper.h        | 21 +++++++++++++++
 include/ras/ras_event.h     | 62 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 153 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index a34a228..099a2d5 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -370,6 +370,7 @@ config ACPI_EXTLOG
 	tristate "Extended Error Log support"
 	depends on X86_MCE && X86_LOCAL_APIC
 	select UEFI_CPER
+	select RAS
 	default n
 	help
 	  Certain usages such as Predictive Failure Analysis (PFA) require
@@ -384,6 +385,7 @@ config ACPI_EXTLOG
 
 	  Enhanced MCA Logging allows firmware to provide additional error
 	  information to system software, synchronous with MCE or CMCI. This
-	  driver adds support for that functionality.
+	  driver adds support for that functionality with corresponding
+	  tracepoint which carries that information to userspace.
 
 endif	# ACPI
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index c4a5d87..3c4a8aa 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -16,6 +16,7 @@
 #include <asm/mce.h>
 
 #include "apei/apei-internal.h"
+#include <ras/ras_event.h>
 
 #define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */
 
@@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	struct mce *mce = (struct mce *)data;
 	int	bank = mce->bank;
 	int	cpu = mce->extcpu;
-	struct acpi_generic_status *estatus;
-	int rc;
+	struct acpi_generic_status *estatus, *tmp;
+	struct acpi_generic_data *gdata;
+	const uuid_le *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";
+	uuid_le *sec_type;
+	static u32 err_seq;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
 	if (estatus == NULL)
@@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	/* clear record status to enable BIOS to update it again */
 	estatus->block_status = 0;
 
-	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
+	tmp = (struct acpi_generic_status *)elog_buf;
+	print_extlog_rcd(NULL, tmp, cpu);
+
+	/* log event via trace */
+	err_seq++;
+	gdata = (struct acpi_generic_data *)(tmp + 1);
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+		fru_id = (uuid_le *)gdata->fru_id;
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+		fru_text = gdata->fru_text;
+	sec_type = (uuid_le *)gdata->section_type;
+	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+		struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+		if (gdata->error_data_length >= sizeof(*mem))
+			trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
+					       (u8)gdata->error_severity);
+	}
 
 	return NOTIFY_STOP;
 }
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 83b56b61..c084a24 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype)
 }
 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
 
-int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
+int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
 	u32 len, n;
 
@@ -249,7 +249,7 @@ int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
 	return n;
 }
 
-int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
+int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
 	u32 len, n;
 	const char *bank = NULL, *device = NULL;
@@ -271,8 +271,45 @@ int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
 	return n;
 }
 
+void cper_mem_err_pack(const struct cper_sec_mem_err *mem, void *data)
+{
+	struct cper_mem_err_compact *cmem = (struct cper_mem_err_compact *)data;
+
+	cmem->validation_bits = mem->validation_bits;
+	cmem->node = mem->node;
+	cmem->card = mem->card;
+	cmem->module = mem->module;
+	cmem->bank = mem->bank;
+	cmem->device = mem->device;
+	cmem->row = mem->row;
+	cmem->column = mem->column;
+	cmem->bit_pos = mem->bit_pos;
+	cmem->requestor_id = mem->requestor_id;
+	cmem->responder_id = mem->responder_id;
+	cmem->target_id = mem->target_id;
+	cmem->rank = mem->rank;
+	cmem->mem_array_handle = mem->mem_array_handle;
+	cmem->mem_dev_handle = mem->mem_dev_handle;
+}
+
+const char *cper_mem_err_unpack(struct trace_seq *p, void *data)
+{
+	struct cper_mem_err_compact *cmem = (struct cper_mem_err_compact *)data;
+	const char *ret = p->buffer + p->len;
+
+	if (cper_mem_err_location(cmem, rcd_decode_str))
+		trace_seq_printf(p, "%s", rcd_decode_str);
+	if (cper_dimm_err_location(cmem, rcd_decode_str))
+		trace_seq_printf(p, "%s", rcd_decode_str);
+	trace_seq_putc(p, '\0');
+
+	return ret;
+}
+
 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 {
+	struct cper_mem_err_compact cmem;
+
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
 		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
 	if (mem->validation_bits & CPER_MEM_VALID_PA)
@@ -281,14 +318,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
 		printk("%s""physical_address_mask: 0x%016llx\n",
 		       pfx, mem->physical_addr_mask);
-	if (cper_mem_err_location(mem, rcd_decode_str))
+	cper_mem_err_pack(mem, &cmem);
+	if (cper_mem_err_location(&cmem, rcd_decode_str))
 		printk("%s%s\n", pfx, rcd_decode_str);
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 		u8 etype = mem->error_type;
 		printk("%s""error_type: %d, %s\n", pfx, etype,
 		       cper_mem_err_type_str(etype));
 	}
-	if (cper_dimm_err_location(mem, rcd_decode_str))
+	if (cper_dimm_err_location(&cmem, rcd_decode_str))
 		printk("%s%s\n", pfx, rcd_decode_str);
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 4cac43a..da227a3 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -23,4 +23,5 @@ static int __init ras_init(void)
 }
 subsys_initcall(ras_init);
 
+EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index ed088b9..3548160 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -22,6 +22,7 @@
 #define LINUX_CPER_H
 
 #include <linux/uuid.h>
+#include <linux/trace_seq.h>
 
 /* CPER record signature and the size */
 #define CPER_SIG_RECORD				"CPER"
@@ -363,6 +364,24 @@ struct cper_sec_mem_err {
 	__u16	mem_dev_handle;		/* module handle in UEFI 2.4 */
 };
 
+struct cper_mem_err_compact {
+	__u64	validation_bits;
+	__u16	node;
+	__u16	card;
+	__u16	module;
+	__u16	bank;
+	__u16	device;
+	__u16	row;
+	__u16	column;
+	__u16	bit_pos;
+	__u64	requestor_id;
+	__u64	responder_id;
+	__u64	target_id;
+	__u16	rank;
+	__u16	mem_array_handle;
+	__u16	mem_dev_handle;
+};
+
 struct cper_sec_pcie {
 	__u64		validation_bits;
 	__u32		port_type;
@@ -406,5 +425,7 @@ const char *cper_severity_str(unsigned int);
 const char *cper_mem_err_type_str(unsigned int);
 void cper_print_bits(const char *prefix, unsigned int bits,
 		     const char * const strs[], unsigned int strs_size);
+void cper_mem_err_pack(const struct cper_sec_mem_err *, void *);
+const char *cper_mem_err_unpack(struct trace_seq *, void *);
 
 #endif
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index acbcbb8..f66142b 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -9,6 +9,68 @@
 #include <linux/edac.h>
 #include <linux/ktime.h>
 #include <linux/aer.h>
+#include <linux/cper.h>
+
+/*
+ * MCE Extended Error Log trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ */
+
+/* memory trace event */
+
+TRACE_EVENT(extlog_mem_event,
+	TP_PROTO(struct cper_sec_mem_err *mem,
+		 u32 err_seq,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 u8 sev),
+
+	TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
+
+	TP_STRUCT__entry(
+		__field(u32, err_seq)
+		__field(u8, etype)
+		__field(u8, sev)
+		__field(u64, pa)
+		__field(u8, pa_mask_lsb)
+		__array(u8, fru_id, sizeof(uuid_le))
+		__string(fru_text, fru_text)
+		__array(u8, data, sizeof(struct cper_mem_err_compact))
+	),
+
+	TP_fast_assign(
+		__entry->err_seq = err_seq;
+		if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+			__entry->etype = mem->error_type;
+		else
+			__entry->etype = ~0;
+		__entry->sev = sev;
+		if (mem->validation_bits & CPER_MEM_VALID_PA)
+			__entry->pa = mem->physical_addr;
+		else
+			__entry->pa = ~0ull;
+
+		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+			__entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
+		else
+			__entry->pa_mask_lsb = ~0;
+		memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
+		__assign_str(fru_text, fru_text);
+		cper_mem_err_pack(mem, __entry->data);
+	),
+
+	TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
+		  __entry->err_seq,
+		  cper_severity_str(__entry->sev),
+		  cper_mem_err_type_str(__entry->etype),
+		  __entry->pa,
+		  __entry->pa_mask_lsb,
+		  cper_mem_err_unpack(p, __entry->data),
+		  __entry->fru_id,
+		  __get_str(fru_text))
+);
 
 /*
  * Hardware Events Report
-- 
2.0.0.rc2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface
  2014-06-12  6:11 ` [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface Chen, Gong
@ 2014-06-12 13:28   ` Steven Rostedt
  2014-06-13  2:19     ` Chen, Gong
  0 siblings, 1 reply; 32+ messages in thread
From: Steven Rostedt @ 2014-06-12 13:28 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, bp, m.chehab, linux-acpi

On Thu, 12 Jun 2014 02:11:57 -0400
"Chen, Gong" <gong.chen@linux.intel.com> wrote:


> +void cper_mem_err_pack(const struct cper_sec_mem_err *mem, void *data)
> +{
> +	struct cper_mem_err_compact *cmem = (struct cper_mem_err_compact *)data;
> +
> +	cmem->validation_bits = mem->validation_bits;
> +	cmem->node = mem->node;
> +	cmem->card = mem->card;
> +	cmem->module = mem->module;
> +	cmem->bank = mem->bank;
> +	cmem->device = mem->device;
> +	cmem->row = mem->row;
> +	cmem->column = mem->column;
> +	cmem->bit_pos = mem->bit_pos;
> +	cmem->requestor_id = mem->requestor_id;
> +	cmem->responder_id = mem->responder_id;
> +	cmem->target_id = mem->target_id;
> +	cmem->rank = mem->rank;
> +	cmem->mem_array_handle = mem->mem_array_handle;
> +	cmem->mem_dev_handle = mem->mem_dev_handle;
> +}
> +
> +const char *cper_mem_err_unpack(struct trace_seq *p, void *data)
> +{
> +	struct cper_mem_err_compact *cmem = (struct cper_mem_err_compact *)data;
> +	const char *ret = p->buffer + p->len;
> +
> +	if (cper_mem_err_location(cmem, rcd_decode_str))
> +		trace_seq_printf(p, "%s", rcd_decode_str);
> +	if (cper_dimm_err_location(cmem, rcd_decode_str))
> +		trace_seq_printf(p, "%s", rcd_decode_str);
> +	trace_seq_putc(p, '\0');
> +
> +	return ret;
> +}
> +
>  static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
>  {
> +	struct cper_mem_err_compact cmem;
> +
>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
>  		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
>  	if (mem->validation_bits & CPER_MEM_VALID_PA)
> @@ -281,14 +318,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
>  	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
>  		printk("%s""physical_address_mask: 0x%016llx\n",
>  		       pfx, mem->physical_addr_mask);
> -	if (cper_mem_err_location(mem, rcd_decode_str))
> +	cper_mem_err_pack(mem, &cmem);
> +	if (cper_mem_err_location(&cmem, rcd_decode_str))
>  		printk("%s%s\n", pfx, rcd_decode_str);
>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
>  		u8 etype = mem->error_type;
>  		printk("%s""error_type: %d, %s\n", pfx, etype,
>  		       cper_mem_err_type_str(etype));
>  	}
> -	if (cper_dimm_err_location(mem, rcd_decode_str))
> +	if (cper_dimm_err_location(&cmem, rcd_decode_str))
>  		printk("%s%s\n", pfx, rcd_decode_str);
>  }
>  
> diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
> index 4cac43a..da227a3 100644
> --- a/drivers/ras/ras.c
> +++ b/drivers/ras/ras.c
> @@ -23,4 +23,5 @@ static int __init ras_init(void)
>  }
>  subsys_initcall(ras_init);
>  
> +EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
> diff --git a/include/linux/cper.h b/include/linux/cper.h
> index ed088b9..3548160 100644
> --- a/include/linux/cper.h
> +++ b/include/linux/cper.h
> @@ -22,6 +22,7 @@
>  #define LINUX_CPER_H
>  
>  #include <linux/uuid.h>
> +#include <linux/trace_seq.h>
>  
>  /* CPER record signature and the size */
>  #define CPER_SIG_RECORD				"CPER"
> @@ -363,6 +364,24 @@ struct cper_sec_mem_err {
>  	__u16	mem_dev_handle;		/* module handle in UEFI 2.4 */
>  };
>  
> +struct cper_mem_err_compact {
> +	__u64	validation_bits;
> +	__u16	node;
> +	__u16	card;
> +	__u16	module;
> +	__u16	bank;
> +	__u16	device;
> +	__u16	row;
> +	__u16	column;
> +	__u16	bit_pos;
> +	__u64	requestor_id;
> +	__u64	responder_id;
> +	__u64	target_id;
> +	__u16	rank;
> +	__u16	mem_array_handle;
> +	__u16	mem_dev_handle;
> +};
> +
>  struct cper_sec_pcie {
>  	__u64		validation_bits;
>  	__u32		port_type;
> @@ -406,5 +425,7 @@ const char *cper_severity_str(unsigned int);
>  const char *cper_mem_err_type_str(unsigned int);
>  void cper_print_bits(const char *prefix, unsigned int bits,
>  		     const char * const strs[], unsigned int strs_size);
> +void cper_mem_err_pack(const struct cper_sec_mem_err *, void *);
> +const char *cper_mem_err_unpack(struct trace_seq *, void *);
>  
>  #endif
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index acbcbb8..f66142b 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -9,6 +9,68 @@
>  #include <linux/edac.h>
>  #include <linux/ktime.h>
>  #include <linux/aer.h>
> +#include <linux/cper.h>
> +
> +/*
> + * MCE Extended Error Log trace event
> + *
> + * These events are generated when hardware detects a corrected or
> + * uncorrected event.
> + */
> +
> +/* memory trace event */
> +
> +TRACE_EVENT(extlog_mem_event,
> +	TP_PROTO(struct cper_sec_mem_err *mem,
> +		 u32 err_seq,
> +		 const uuid_le *fru_id,
> +		 const char *fru_text,
> +		 u8 sev),
> +
> +	TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
> +
> +	TP_STRUCT__entry(
> +		__field(u32, err_seq)
> +		__field(u8, etype)
> +		__field(u8, sev)
> +		__field(u64, pa)
> +		__field(u8, pa_mask_lsb)
> +		__array(u8, fru_id, sizeof(uuid_le))
> +		__string(fru_text, fru_text)
> +		__array(u8, data, sizeof(struct cper_mem_err_compact))

The above array works, but I'm wondering why you don't just use the
types themselves?

That is:

		__field(uuid_le, fru_id)
		__field(struct cper_mem_err_compact, data)

Then you don't need to use the memcpy for the fru_id, but just:

	__entry->fru_id = *fru_id;

Same with the data, you don't need to pass in void *, but the struct
itself into cper_mem_err_(un)pack(), and simplify those functions.

-- Steve


> +	),
> +
> +	TP_fast_assign(
> +		__entry->err_seq = err_seq;
> +		if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
> +			__entry->etype = mem->error_type;
> +		else
> +			__entry->etype = ~0;
> +		__entry->sev = sev;
> +		if (mem->validation_bits & CPER_MEM_VALID_PA)
> +			__entry->pa = mem->physical_addr;
> +		else
> +			__entry->pa = ~0ull;
> +
> +		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
> +			__entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
> +		else
> +			__entry->pa_mask_lsb = ~0;
> +		memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
> +		__assign_str(fru_text, fru_text);
> +		cper_mem_err_pack(mem, __entry->data);
> +	),
> +
> +	TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
> +		  __entry->err_seq,
> +		  cper_severity_str(__entry->sev),
> +		  cper_mem_err_type_str(__entry->etype),
> +		  __entry->pa,
> +		  __entry->pa_mask_lsb,
> +		  cper_mem_err_unpack(p, __entry->data),
> +		  __entry->fru_id,
> +		  __get_str(fru_text))
> +);
>  
>  /*
>   * Hardware Events Report


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface
  2014-06-12 13:28   ` Steven Rostedt
@ 2014-06-13  2:19     ` Chen, Gong
  2014-06-13  3:01       ` Steven Rostedt
  0 siblings, 1 reply; 32+ messages in thread
From: Chen, Gong @ 2014-06-13  2:19 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: tony.luck, bp, m.chehab, linux-acpi

[-- Attachment #1: Type: text/plain, Size: 807 bytes --]

On Thu, Jun 12, 2014 at 09:28:08AM -0400, Steven Rostedt wrote:
> > +	TP_STRUCT__entry(
> > +		__field(u32, err_seq)
> > +		__field(u8, etype)
> > +		__field(u8, sev)
> > +		__field(u64, pa)
> > +		__field(u8, pa_mask_lsb)
> > +		__array(u8, fru_id, sizeof(uuid_le))
> > +		__string(fru_text, fru_text)
> > +		__array(u8, data, sizeof(struct cper_mem_err_compact))
> 
> The above array works, but I'm wondering why you don't just use the
> types themselves?
> 
> That is:
> 
> 		__field(uuid_le, fru_id)
> 		__field(struct cper_mem_err_compact, data)
> 
Every time when I use above codes I will hit some compiler error like
"error: conversion to non-scalar type requested". It looks some
data types like above (unless typedef) can't be used in __field.
Is it true? How to fix that?


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface
  2014-06-13  2:19     ` Chen, Gong
@ 2014-06-13  3:01       ` Steven Rostedt
  2014-06-13  3:08         ` Steven Rostedt
  0 siblings, 1 reply; 32+ messages in thread
From: Steven Rostedt @ 2014-06-13  3:01 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, bp, m.chehab, linux-acpi

On Thu, 12 Jun 2014 22:19:57 -0400
"Chen, Gong" <gong.chen@linux.intel.com> wrote:

> On Thu, Jun 12, 2014 at 09:28:08AM -0400, Steven Rostedt wrote:
> > > +	TP_STRUCT__entry(
> > > +		__field(u32, err_seq)
> > > +		__field(u8, etype)
> > > +		__field(u8, sev)
> > > +		__field(u64, pa)
> > > +		__field(u8, pa_mask_lsb)
> > > +		__array(u8, fru_id, sizeof(uuid_le))
> > > +		__string(fru_text, fru_text)
> > > +		__array(u8, data, sizeof(struct cper_mem_err_compact))
> > 
> > The above array works, but I'm wondering why you don't just use the
> > types themselves?
> > 
> > That is:
> > 
> > 		__field(uuid_le, fru_id)
> > 		__field(struct cper_mem_err_compact, data)
> > 
> Every time when I use above codes I will hit some compiler error like
> "error: conversion to non-scalar type requested". It looks some
> data types like above (unless typedef) can't be used in __field.
> Is it true? How to fix that?
> 

Ah, that's a bug in the ftrace.h file. I'll need to test this against
it to see what error it gives. I'll work on that tomorrow as it's
bedtime for me now.

-- Steve

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface
  2014-06-13  3:01       ` Steven Rostedt
@ 2014-06-13  3:08         ` Steven Rostedt
  2014-06-13  7:09           ` Chen, Gong
  0 siblings, 1 reply; 32+ messages in thread
From: Steven Rostedt @ 2014-06-13  3:08 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Chen, Gong, tony.luck, bp, m.chehab, linux-acpi

On Thu, 12 Jun 2014 23:01:17 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Thu, 12 Jun 2014 22:19:57 -0400
> "Chen, Gong" <gong.chen@linux.intel.com> wrote:
> 
> > On Thu, Jun 12, 2014 at 09:28:08AM -0400, Steven Rostedt wrote:
> > > > +	TP_STRUCT__entry(
> > > > +		__field(u32, err_seq)
> > > > +		__field(u8, etype)
> > > > +		__field(u8, sev)
> > > > +		__field(u64, pa)
> > > > +		__field(u8, pa_mask_lsb)
> > > > +		__array(u8, fru_id, sizeof(uuid_le))
> > > > +		__string(fru_text, fru_text)
> > > > +		__array(u8, data, sizeof(struct cper_mem_err_compact))
> > > 
> > > The above array works, but I'm wondering why you don't just use the
> > > types themselves?
> > > 
> > > That is:
> > > 
> > > 		__field(uuid_le, fru_id)
> > > 		__field(struct cper_mem_err_compact, data)
> > > 
> > Every time when I use above codes I will hit some compiler error like
> > "error: conversion to non-scalar type requested". It looks some
> > data types like above (unless typedef) can't be used in __field.
> > Is it true? How to fix that?
> > 
> 
> Ah, that's a bug in the ftrace.h file. I'll need to test this against
> it to see what error it gives. I'll work on that tomorrow as it's
> bedtime for me now.

Wait, I take that back. I was thinking the error was with the
__array(). A __field() should work fine!

Can you post me the patch you did and the error you get when you
compile.

-- Steve

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface
  2014-06-13  3:08         ` Steven Rostedt
@ 2014-06-13  7:09           ` Chen, Gong
  2014-06-17  2:09             ` Chen, Gong
  0 siblings, 1 reply; 32+ messages in thread
From: Chen, Gong @ 2014-06-13  7:09 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: tony.luck, bp, m.chehab, linux-acpi

[-- Attachment #1: Type: text/plain, Size: 3468 bytes --]

On Thu, Jun 12, 2014 at 11:08:33PM -0400, Steven Rostedt wrote:
> > > > The above array works, but I'm wondering why you don't just use the
> > > > types themselves?
> > > > 
> > > > That is:
> > > > 
> > > > 		__field(uuid_le, fru_id)
> > > > 		__field(struct cper_mem_err_compact, data)
> > > > 
> > > Every time when I use above codes I will hit some compiler error like
> > > "error: conversion to non-scalar type requested". It looks some
> > > data types like above (unless typedef) can't be used in __field.
> > > Is it true? How to fix that?
> > > 
> > 
> > Ah, that's a bug in the ftrace.h file. I'll need to test this against
> > it to see what error it gives. I'll work on that tomorrow as it's
> > bedtime for me now.
> 
> Wait, I take that back. I was thinking the error was with the
> __array(). A __field() should work fine!
> 
> Can you post me the patch you did and the error you get when you
> compile.

Here is the incremented patch & compiler error.
PS: the same issue for __field(struct cper_mem_err_compact, data)

diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index a794691..174aac1 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -35,7 +35,7 @@ TRACE_EVENT(extlog_mem_event,
                __field(u8, sev)
                __field(u64, pa)
                __field(u8, pa_mask_lsb)
-               __array(u8, fru_id, sizeof(uuid_le))
+               __field(uuid_le, fru_id)
                __string(fru_text, fru_text)
                __array(u8, data, sizeof(struct cper_mem_err_compact))
        ),
@@ -56,7 +56,7 @@ TRACE_EVENT(extlog_mem_event,
                        __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
                else
                        __entry->pa_mask_lsb = ~0;
-               memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
+               __entry->fru_id = *fru_id;
                __assign_str(fru_text, fru_text);
                cper_mem_err_pack(mem, __entry->data);
        ),
@@ -68,7 +68,7 @@ TRACE_EVENT(extlog_mem_event,
                  __entry->pa,
                  __entry->pa_mask_lsb,
                  cper_mem_err_unpack(p, __entry->data),
-                 __entry->fru_id,
+                 &__entry->fru_id,
                  __get_str(fru_text))
 );


In file included from include/trace/define_trace.h:90:0,
                 from include/ras/ras_event.h:226,
                 from drivers/ras/ras.c:13:
include/trace/../../include/ras/ras_event.h: In function ‘ftrace_define_fields_extlog_mem_event’:
include/trace/ftrace.h:330:9: error: conversion to non-scalar type requested
  struct ftrace_raw_##call field;     \
         ^
include/trace/ftrace.h:32:2: note: in expansion of macro ‘DECLARE_EVENT_CLASS’
  DECLARE_EVENT_CLASS(name,          \
  ^
include/trace/../../include/ras/ras_event.h:23:1: note: in expansion of macro ‘TRACE_EVENT’
 TRACE_EVENT(extlog_mem_event,
 ^
include/trace/ftrace.h:330:9: error: conversion to non-scalar type requested
  struct ftrace_raw_##call field;     \
         ^
include/trace/ftrace.h:32:2: note: in expansion of macro ‘DECLARE_EVENT_CLASS’
  DECLARE_EVENT_CLASS(name,          \
  ^
include/trace/../../include/ras/ras_event.h:23:1: note: in expansion of macro ‘TRACE_EVENT’
 TRACE_EVENT(extlog_mem_event,
 ^
scripts/Makefile.build:318: recipe for target 'drivers/ras/ras.o' failed


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface
  2014-06-13  7:09           ` Chen, Gong
@ 2014-06-17  2:09             ` Chen, Gong
  2014-06-17  3:37               ` Steven Rostedt
                                 ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Chen, Gong @ 2014-06-17  2:09 UTC (permalink / raw)
  To: Steven Rostedt, tony.luck, bp, m.chehab, linux-acpi

[-- Attachment #1: Type: text/plain, Size: 3722 bytes --]

On Fri, Jun 13, 2014 at 03:09:28AM -0400, Chen, Gong wrote:
> On Thu, Jun 12, 2014 at 11:08:33PM -0400, Steven Rostedt wrote:
> > > > > The above array works, but I'm wondering why you don't just use the
> > > > > types themselves?
> > > > > 
> > > > > That is:
> > > > > 
> > > > > 		__field(uuid_le, fru_id)
> > > > > 		__field(struct cper_mem_err_compact, data)
> > > > > 
> > > > Every time when I use above codes I will hit some compiler error like
> > > > "error: conversion to non-scalar type requested". It looks some
> > > > data types like above (unless typedef) can't be used in __field.
> > > > Is it true? How to fix that?
> > > > 
> > > 
> > > Ah, that's a bug in the ftrace.h file. I'll need to test this against
> > > it to see what error it gives. I'll work on that tomorrow as it's
> > > bedtime for me now.
> > 
> > Wait, I take that back. I was thinking the error was with the
> > __array(). A __field() should work fine!
> > 
> > Can you post me the patch you did and the error you get when you
> > compile.
> 
> Here is the incremented patch & compiler error.
> PS: the same issue for __field(struct cper_mem_err_compact, data)
> 
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index a794691..174aac1 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -35,7 +35,7 @@ TRACE_EVENT(extlog_mem_event,
>                 __field(u8, sev)
>                 __field(u64, pa)
>                 __field(u8, pa_mask_lsb)
> -               __array(u8, fru_id, sizeof(uuid_le))
> +               __field(uuid_le, fru_id)
>                 __string(fru_text, fru_text)
>                 __array(u8, data, sizeof(struct cper_mem_err_compact))
>         ),
> @@ -56,7 +56,7 @@ TRACE_EVENT(extlog_mem_event,
>                         __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
>                 else
>                         __entry->pa_mask_lsb = ~0;
> -               memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
> +               __entry->fru_id = *fru_id;
>                 __assign_str(fru_text, fru_text);
>                 cper_mem_err_pack(mem, __entry->data);
>         ),
> @@ -68,7 +68,7 @@ TRACE_EVENT(extlog_mem_event,
>                   __entry->pa,
>                   __entry->pa_mask_lsb,
>                   cper_mem_err_unpack(p, __entry->data),
> -                 __entry->fru_id,
> +                 &__entry->fru_id,
>                   __get_str(fru_text))
>  );
> 
> 
> In file included from include/trace/define_trace.h:90:0,
>                  from include/ras/ras_event.h:226,
>                  from drivers/ras/ras.c:13:
> include/trace/../../include/ras/ras_event.h: In function ‘ftrace_define_fields_extlog_mem_event’:
> include/trace/ftrace.h:330:9: error: conversion to non-scalar type requested
>   struct ftrace_raw_##call field;     \
>          ^
> include/trace/ftrace.h:32:2: note: in expansion of macro ‘DECLARE_EVENT_CLASS’
>   DECLARE_EVENT_CLASS(name,          \
>   ^
> include/trace/../../include/ras/ras_event.h:23:1: note: in expansion of macro ‘TRACE_EVENT’
>  TRACE_EVENT(extlog_mem_event,
>  ^
> include/trace/ftrace.h:330:9: error: conversion to non-scalar type requested
>   struct ftrace_raw_##call field;     \
>          ^
> include/trace/ftrace.h:32:2: note: in expansion of macro ‘DECLARE_EVENT_CLASS’
>   DECLARE_EVENT_CLASS(name,          \
>   ^
> include/trace/../../include/ras/ras_event.h:23:1: note: in expansion of macro ‘TRACE_EVENT’
>  TRACE_EVENT(extlog_mem_event,
>  ^
> scripts/Makefile.build:318: recipe for target 'drivers/ras/ras.o' failed
> 


Any update, Steven?

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface
  2014-06-17  2:09             ` Chen, Gong
@ 2014-06-17  3:37               ` Steven Rostedt
  2014-06-17 12:59               ` Steven Rostedt
  2014-06-18  2:33               ` eMCA trace interface update Chen, Gong
  2 siblings, 0 replies; 32+ messages in thread
From: Steven Rostedt @ 2014-06-17  3:37 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, bp, m.chehab, linux-acpi

On Mon, 16 Jun 2014 22:09:37 -0400
"Chen, Gong" <gong.chen@linux.intel.com> wrote:

> 
> Any update, Steven?

Sorry I missed this. Claws-mail has an annoying tendency when I have
something selected and I get new mail, it turns all my mail to read
except the mail that just came in. I need to figure out that bug and
fix it :-(

Anyway, I know what your issue is. It's the check for signed type. It
doesn't work for structures. I'm working on a way to fix that.

-- Steve

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface
  2014-06-17  2:09             ` Chen, Gong
  2014-06-17  3:37               ` Steven Rostedt
@ 2014-06-17 12:59               ` Steven Rostedt
  2014-06-18  2:33               ` eMCA trace interface update Chen, Gong
  2 siblings, 0 replies; 32+ messages in thread
From: Steven Rostedt @ 2014-06-17 12:59 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, bp, m.chehab, linux-acpi

On Mon, 16 Jun 2014 22:09:37 -0400
"Chen, Gong" <gong.chen@linux.intel.com> wrote:


> 
> Any update, Steven?


Here, apply the below patch and then use "__field_struct()" instead of
"__field()". That should work. If so, I can add it to my repo (needs
some cleanup and documentation) in a separate branch that can be pulled
to base your patches on top of.

-- Steve

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0fd06fe..26b4f2e 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -44,6 +44,12 @@
 #undef __field_ext
 #define __field_ext(type, item, filter_type)	type	item;
 
+#undef __field_struct
+#define __field_struct(type, item)	type	item;
+
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)	type	item;
+
 #undef __array
 #define __array(type, item, len)	type	item[len];
 
@@ -122,6 +128,12 @@
 #undef __field_ext
 #define __field_ext(type, item, filter_type)
 
+#undef __field_struct
+#define __field_struct(type, item)
+
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)
+
 #undef __array
 #define __array(type, item, len)
 
@@ -315,9 +327,21 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = {	\
 	if (ret)							\
 		return ret;
 
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)			\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item),			\
+				 0, filter_type);			\
+	if (ret)							\
+		return ret;
+
 #undef __field
 #define __field(type, item)	__field_ext(type, item, FILTER_OTHER)
 
+#undef __field_struct
+#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER)
+
 #undef __array
 #define __array(type, item, len)					\
 	do {								\
@@ -379,6 +403,12 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #undef __field_ext
 #define __field_ext(type, item, filter_type)
 
+#undef __field_struct
+#define __field_struct(type, item)
+
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)
+
 #undef __array
 #define __array(type, item, len)
 
@@ -550,6 +580,9 @@ static inline notrace int ftrace_get_offsets_##call(			\
 #undef __field
 #define __field(type, item)
 
+#undef __field_struct
+#define __field_struct(type, item)
+
 #undef __array
 #define __array(type, item, len)
 

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* eMCA trace interface update
  2014-06-17  2:09             ` Chen, Gong
  2014-06-17  3:37               ` Steven Rostedt
  2014-06-17 12:59               ` Steven Rostedt
@ 2014-06-18  2:33               ` Chen, Gong
  2014-06-18  2:33                 ` [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface Chen, Gong
  2 siblings, 1 reply; 32+ messages in thread
From: Chen, Gong @ 2014-06-18  2:33 UTC (permalink / raw)
  To: rostedt; +Cc: tony.luck, bp, m.chehab, linux-acpi

[PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface

Hi, Steven

Your patch works well. Here is the updated patch based on your patch.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface
  2014-06-18  2:33               ` eMCA trace interface update Chen, Gong
@ 2014-06-18  2:33                 ` Chen, Gong
  2014-06-20  2:06                   ` Chen, Gong
  2014-06-22 16:48                   ` Borislav Petkov
  0 siblings, 2 replies; 32+ messages in thread
From: Chen, Gong @ 2014-06-18  2:33 UTC (permalink / raw)
  To: rostedt; +Cc: tony.luck, bp, m.chehab, linux-acpi, Chen, Gong

Add trace interface to elaborate all H/W error related information.

v7 -> v6: compact trace info to save trace buffer space.
v6 -> v5: format adjustment.
v5 -> v4: Add physical mask(LSB) in trace.
v4 -> v3: change ras trace dependency rule.
v3 -> v2: minor adjustment according to the suggestion from Boris.
v2 -> v1: spinlock is not needed anymore.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/acpi/Kconfig        |  4 ++-
 drivers/acpi/acpi_extlog.c  | 27 +++++++++++++++++---
 drivers/firmware/efi/cper.c | 45 +++++++++++++++++++++++++++++---
 drivers/ras/ras.c           |  1 +
 include/linux/cper.h        | 23 +++++++++++++++++
 include/ras/ras_event.h     | 62 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 154 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index a34a228..206942b 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -370,6 +370,7 @@ config ACPI_EXTLOG
 	tristate "Extended Error Log support"
 	depends on X86_MCE && X86_LOCAL_APIC
 	select UEFI_CPER
+	select RAS
 	default n
 	help
 	  Certain usages such as Predictive Failure Analysis (PFA) require
@@ -384,6 +385,7 @@ config ACPI_EXTLOG
 
 	  Enhanced MCA Logging allows firmware to provide additional error
 	  information to system software, synchronous with MCE or CMCI. This
-	  driver adds support for that functionality.
+	  driver adds support for that functionality with corresponding
+	  tracepoint which carries that information to userspace.
 
 endif	# ACPI
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 1853341..e61da95 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -16,6 +16,7 @@
 #include <asm/mce.h>
 
 #include "apei/apei-internal.h"
+#include <ras/ras_event.h>
 
 #define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */
 
@@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	struct mce *mce = (struct mce *)data;
 	int	bank = mce->bank;
 	int	cpu = mce->extcpu;
-	struct acpi_generic_status *estatus;
-	int rc;
+	struct acpi_generic_status *estatus, *tmp;
+	struct acpi_generic_data *gdata;
+	const uuid_le *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";
+	uuid_le *sec_type;
+	static u32 err_seq;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
 	if (estatus == NULL)
@@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	/* clear record status to enable BIOS to update it again */
 	estatus->block_status = 0;
 
-	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
+	tmp = (struct acpi_generic_status *)elog_buf;
+	print_extlog_rcd(NULL, tmp, cpu);
+
+	/* log event via trace */
+	err_seq++;
+	gdata = (struct acpi_generic_data *)(tmp + 1);
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+		fru_id = (uuid_le *)gdata->fru_id;
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+		fru_text = gdata->fru_text;
+	sec_type = (uuid_le *)gdata->section_type;
+	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+		struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+		if (gdata->error_data_length >= sizeof(*mem))
+			trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
+					       (u8)gdata->error_severity);
+	}
 
 	return NOTIFY_STOP;
 }
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 83b56b61..743afd5 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype)
 }
 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
 
-int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
+int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
 	u32 len, n;
 
@@ -249,7 +249,7 @@ int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
 	return n;
 }
 
-int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
+int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
 	u32 len, n;
 	const char *bank = NULL, *device = NULL;
@@ -271,8 +271,44 @@ int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
 	return n;
 }
 
+void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
+		       struct cper_mem_err_compact *cmem)
+{
+	cmem->validation_bits = mem->validation_bits;
+	cmem->node = mem->node;
+	cmem->card = mem->card;
+	cmem->module = mem->module;
+	cmem->bank = mem->bank;
+	cmem->device = mem->device;
+	cmem->row = mem->row;
+	cmem->column = mem->column;
+	cmem->bit_pos = mem->bit_pos;
+	cmem->requestor_id = mem->requestor_id;
+	cmem->responder_id = mem->responder_id;
+	cmem->target_id = mem->target_id;
+	cmem->rank = mem->rank;
+	cmem->mem_array_handle = mem->mem_array_handle;
+	cmem->mem_dev_handle = mem->mem_dev_handle;
+}
+
+const char *cper_mem_err_unpack(struct trace_seq *p,
+				struct cper_mem_err_compact *cmem)
+{
+	const char *ret = p->buffer + p->len;
+
+	if (cper_mem_err_location(cmem, rcd_decode_str))
+		trace_seq_printf(p, "%s", rcd_decode_str);
+	if (cper_dimm_err_location(cmem, rcd_decode_str))
+		trace_seq_printf(p, "%s", rcd_decode_str);
+	trace_seq_putc(p, '\0');
+
+	return ret;
+}
+
 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 {
+	struct cper_mem_err_compact cmem;
+
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
 		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
 	if (mem->validation_bits & CPER_MEM_VALID_PA)
@@ -281,14 +317,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
 		printk("%s""physical_address_mask: 0x%016llx\n",
 		       pfx, mem->physical_addr_mask);
-	if (cper_mem_err_location(mem, rcd_decode_str))
+	cper_mem_err_pack(mem, &cmem);
+	if (cper_mem_err_location(&cmem, rcd_decode_str))
 		printk("%s%s\n", pfx, rcd_decode_str);
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 		u8 etype = mem->error_type;
 		printk("%s""error_type: %d, %s\n", pfx, etype,
 		       cper_mem_err_type_str(etype));
 	}
-	if (cper_dimm_err_location(mem, rcd_decode_str))
+	if (cper_dimm_err_location(&cmem, rcd_decode_str))
 		printk("%s%s\n", pfx, rcd_decode_str);
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 4cac43a..da227a3 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -23,4 +23,5 @@ static int __init ras_init(void)
 }
 subsys_initcall(ras_init);
 
+EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index ed088b9..b65db8a 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -22,6 +22,7 @@
 #define LINUX_CPER_H
 
 #include <linux/uuid.h>
+#include <linux/trace_seq.h>
 
 /* CPER record signature and the size */
 #define CPER_SIG_RECORD				"CPER"
@@ -363,6 +364,24 @@ struct cper_sec_mem_err {
 	__u16	mem_dev_handle;		/* module handle in UEFI 2.4 */
 };
 
+struct cper_mem_err_compact {
+	__u64	validation_bits;
+	__u16	node;
+	__u16	card;
+	__u16	module;
+	__u16	bank;
+	__u16	device;
+	__u16	row;
+	__u16	column;
+	__u16	bit_pos;
+	__u64	requestor_id;
+	__u64	responder_id;
+	__u64	target_id;
+	__u16	rank;
+	__u16	mem_array_handle;
+	__u16	mem_dev_handle;
+};
+
 struct cper_sec_pcie {
 	__u64		validation_bits;
 	__u32		port_type;
@@ -406,5 +425,9 @@ const char *cper_severity_str(unsigned int);
 const char *cper_mem_err_type_str(unsigned int);
 void cper_print_bits(const char *prefix, unsigned int bits,
 		     const char * const strs[], unsigned int strs_size);
+void cper_mem_err_pack(const struct cper_sec_mem_err *,
+		       struct cper_mem_err_compact *);
+const char *cper_mem_err_unpack(struct trace_seq *,
+				struct cper_mem_err_compact *);
 
 #endif
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index acbcbb8..5923367 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -9,6 +9,68 @@
 #include <linux/edac.h>
 #include <linux/ktime.h>
 #include <linux/aer.h>
+#include <linux/cper.h>
+
+/*
+ * MCE Extended Error Log trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ */
+
+/* memory trace event */
+
+TRACE_EVENT(extlog_mem_event,
+	TP_PROTO(struct cper_sec_mem_err *mem,
+		 u32 err_seq,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 u8 sev),
+
+	TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
+
+	TP_STRUCT__entry(
+		__field(u32, err_seq)
+		__field(u8, etype)
+		__field(u8, sev)
+		__field(u64, pa)
+		__field(u8, pa_mask_lsb)
+		__field_struct(uuid_le, fru_id)
+		__string(fru_text, fru_text)
+		__field_struct(struct cper_mem_err_compact, data)
+	),
+
+	TP_fast_assign(
+		__entry->err_seq = err_seq;
+		if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+			__entry->etype = mem->error_type;
+		else
+			__entry->etype = ~0;
+		__entry->sev = sev;
+		if (mem->validation_bits & CPER_MEM_VALID_PA)
+			__entry->pa = mem->physical_addr;
+		else
+			__entry->pa = ~0ull;
+
+		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+			__entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
+		else
+			__entry->pa_mask_lsb = ~0;
+		__entry->fru_id = *fru_id;
+		__assign_str(fru_text, fru_text);
+		cper_mem_err_pack(mem, &__entry->data);
+	),
+
+	TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
+		  __entry->err_seq,
+		  cper_severity_str(__entry->sev),
+		  cper_mem_err_type_str(__entry->etype),
+		  __entry->pa,
+		  __entry->pa_mask_lsb,
+		  cper_mem_err_unpack(p, &__entry->data),
+		  &__entry->fru_id,
+		  __get_str(fru_text))
+);
 
 /*
  * Hardware Events Report
-- 
2.0.0.rc2


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface
  2014-06-18  2:33                 ` [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface Chen, Gong
@ 2014-06-20  2:06                   ` Chen, Gong
  2014-06-20 23:01                     ` Tony Luck
  2014-06-22 16:48                   ` Borislav Petkov
  1 sibling, 1 reply; 32+ messages in thread
From: Chen, Gong @ 2014-06-20  2:06 UTC (permalink / raw)
  To: rostedt; +Cc: tony.luck, bp, m.chehab, linux-acpi

[-- Attachment #1: Type: text/plain, Size: 620 bytes --]

On Tue, Jun 17, 2014 at 10:33:07PM -0400, Chen, Gong wrote:
> Date: Tue, 17 Jun 2014 22:33:07 -0400
> From: "Chen, Gong" <gong.chen@linux.intel.com>
> To: rostedt@goodmis.org
> Cc: tony.luck@intel.com, bp@alien8.de, m.chehab@samsung.com,
>  linux-acpi@vger.kernel.org, "Chen, Gong" <gong.chen@linux.intel.com>
> Subject: [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface
> X-Mailer: git-send-email 2.0.0.rc2
> 
> Add trace interface to elaborate all H/W error related information.
> 
So, Steven & Tony

How to solve the dependency between different git tree? (One for trace, One
for RAS)

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface
  2014-06-20  2:06                   ` Chen, Gong
@ 2014-06-20 23:01                     ` Tony Luck
  2014-06-21  0:56                       ` Steven Rostedt
  0 siblings, 1 reply; 32+ messages in thread
From: Tony Luck @ 2014-06-20 23:01 UTC (permalink / raw)
  To: Steven Rostedt, Luck, Tony, Borislav Petkov, m.chehab, linux-acpi

On Thu, Jun 19, 2014 at 7:06 PM, Chen, Gong <gong.chen@linux.intel.com> wrote:
>> Add trace interface to elaborate all H/W error related information.
>>
> So, Steven & Tony
>
> How to solve the dependency between different git tree? (One for trace, One
> for RAS)

1) Simplest (for me) - Steve pushes the trace change upstream now (at
-rc1 now, -rc2 on Sunday - dodgy, but possible).

2) Steve puts the change in a topic branch and publishes to
kernel.org. I clone that, and add eMCA patches on top

3) Steve signs off the patch, and I put it into a RAS branch (awkward
for Steve in subsequent changes to trace touch same areas)

4) Something else

-Tony

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface
  2014-06-20 23:01                     ` Tony Luck
@ 2014-06-21  0:56                       ` Steven Rostedt
  0 siblings, 0 replies; 32+ messages in thread
From: Steven Rostedt @ 2014-06-21  0:56 UTC (permalink / raw)
  To: Tony Luck; +Cc: Luck, Tony, Borislav Petkov, m.chehab, linux-acpi

On Fri, 20 Jun 2014 16:01:52 -0700
Tony Luck <tony.luck@gmail.com> wrote:

> On Thu, Jun 19, 2014 at 7:06 PM, Chen, Gong <gong.chen@linux.intel.com> wrote:
> >> Add trace interface to elaborate all H/W error related information.
> >>
> > So, Steven & Tony
> >
> > How to solve the dependency between different git tree? (One for trace, One
> > for RAS)
> 
> 1) Simplest (for me) - Steve pushes the trace change upstream now (at
> -rc1 now, -rc2 on Sunday - dodgy, but possible).
> 
> 2) Steve puts the change in a topic branch and publishes to
> kernel.org. I clone that, and add eMCA patches on top
> 
> 3) Steve signs off the patch, and I put it into a RAS branch (awkward
> for Steve in subsequent changes to trace touch same areas)
> 
> 4) Something else
> 

I was going to go with #2 but I have some patches that need to go soon
and since this patch only adds a new macro that no one uses, maybe I
can still slip it in.

-- Steve

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface
  2014-06-18  2:33                 ` [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface Chen, Gong
  2014-06-20  2:06                   ` Chen, Gong
@ 2014-06-22 16:48                   ` Borislav Petkov
  2014-06-23 23:51                     ` Luck, Tony
  1 sibling, 1 reply; 32+ messages in thread
From: Borislav Petkov @ 2014-06-22 16:48 UTC (permalink / raw)
  To: Chen, Gong; +Cc: rostedt, tony.luck, m.chehab, linux-acpi

On Tue, Jun 17, 2014 at 10:33:07PM -0400, Chen, Gong wrote:
> Add trace interface to elaborate all H/W error related information.
> 
> v7 -> v6: compact trace info to save trace buffer space.
> v6 -> v5: format adjustment.
> v5 -> v4: Add physical mask(LSB) in trace.
> v4 -> v3: change ras trace dependency rule.
> v3 -> v2: minor adjustment according to the suggestion from Boris.
> v2 -> v1: spinlock is not needed anymore.
> 
> Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>

Yep, this is starting to look good now:

Acked-by: Borislav Petkov <bp@suse.de>

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 32+ messages in thread

* RE: [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface
  2014-06-22 16:48                   ` Borislav Petkov
@ 2014-06-23 23:51                     ` Luck, Tony
  2014-06-24 10:20                       ` Borislav Petkov
  0 siblings, 1 reply; 32+ messages in thread
From: Luck, Tony @ 2014-06-23 23:51 UTC (permalink / raw)
  To: Borislav Petkov, Chen, Gong; +Cc: rostedt, m.chehab, linux-acpi

> Yep, this is starting to look good now:
>
> Acked-by: Borislav Petkov <bp@suse.de>

Thanks Boris,

Ok - one more piece committed, two to go.  They both
seem pretty simple:

Part 6 just enables the debugfs knob to turn off console logging if someone has the magic file open.
  http://www.spinics.net/lists/linux-acpi/msg50906.html

Part 7 is a trivial cleanup to not bother checking for EDAC/EXTLOG conflicts on platforms that don't support EXTLOG
  http://www.spinics.net/lists/linux-acpi/msg50527.html

Neither seems to need an update as the result of changes we made
to earlier parts of the series.

-Tony

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface
  2014-06-23 23:51                     ` Luck, Tony
@ 2014-06-24 10:20                       ` Borislav Petkov
  2014-06-24 17:38                         ` Luck, Tony
  0 siblings, 1 reply; 32+ messages in thread
From: Borislav Petkov @ 2014-06-24 10:20 UTC (permalink / raw)
  To: Luck, Tony; +Cc: Chen, Gong, rostedt, m.chehab, linux-acpi

On Mon, Jun 23, 2014 at 11:51:04PM +0000, Luck, Tony wrote:
> > Yep, this is starting to look good now:
> >
> > Acked-by: Borislav Petkov <bp@suse.de>
> 
> Thanks Boris,
> 
> Ok - one more piece committed, two to go.  They both
> seem pretty simple:
> 
> Part 6 just enables the debugfs knob to turn off console logging if someone has the magic file open.
>   http://www.spinics.net/lists/linux-acpi/msg50906.html
> 
> Part 7 is a trivial cleanup to not bother checking for EDAC/EXTLOG conflicts on platforms that don't support EXTLOG
>   http://www.spinics.net/lists/linux-acpi/msg50527.html
> 
> Neither seems to need an update as the result of changes we made
> to earlier parts of the series.

Right.

Acked-by: Borislav Petkov <bp@suse.de>

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 32+ messages in thread

* RE: [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface
  2014-06-24 10:20                       ` Borislav Petkov
@ 2014-06-24 17:38                         ` Luck, Tony
  0 siblings, 0 replies; 32+ messages in thread
From: Luck, Tony @ 2014-06-24 17:38 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Chen, Gong, rostedt, m.chehab, linux-acpi

>> Neither seems to need an update as the result of changes we made
>> to earlier parts of the series.
>
> Right.
>
> Acked-by: Borislav Petkov <bp@suse.de>

Ok - thanks.

Whole series (based on top of Steven's not-yet-pulled-by-Linus branch) is in a new
extlog-trace branch in the ras tree.

I'm working on the user-space updates to rasdaemon to pull the new
format traces.

-Tony

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2014-06-24 17:40 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-11  8:34 New eMCA trace event interface V4 Chen, Gong
2014-06-11  8:34 ` [PATCH 1/7 v5] trace, RAS: Add basic RAS trace event Chen, Gong
2014-06-11 18:59   ` Borislav Petkov
2014-06-11  8:34 ` [PATCH 2/7 v3] trace, AER: Move trace into unified interface Chen, Gong
2014-06-11 19:00   ` Borislav Petkov
2014-06-11  8:34 ` [PATCH 3/7 v5] CPER: Adjust code flow of some functions Chen, Gong
2014-06-11  8:34 ` [PATCH 4/7 v2] RAS, debugfs: Add debugfs interface for RAS subsystem Chen, Gong
2014-06-11 19:01   ` Borislav Petkov
2014-06-11  8:34 ` [PATCH 5/7 v7] trace, RAS: Add eMCA trace event interface Chen, Gong
2014-06-11 19:02   ` Borislav Petkov
2014-06-12  2:42     ` Chen, Gong
2014-06-11  8:34 ` [PATCH 6/7 v4] trace, eMCA: Add a knob to adjust where to save event log Chen, Gong
2014-06-11  8:34 ` [PATCH 7/7] RAS, extlog: Adjust init flow Chen, Gong
2014-06-11 21:33 ` New eMCA trace event interface V4 Luck, Tony
2014-06-12  6:11 ` [PATCH 5/7 REVISION] trace, RAS: Add eMCA trace event interface Chen, Gong
2014-06-12 13:28   ` Steven Rostedt
2014-06-13  2:19     ` Chen, Gong
2014-06-13  3:01       ` Steven Rostedt
2014-06-13  3:08         ` Steven Rostedt
2014-06-13  7:09           ` Chen, Gong
2014-06-17  2:09             ` Chen, Gong
2014-06-17  3:37               ` Steven Rostedt
2014-06-17 12:59               ` Steven Rostedt
2014-06-18  2:33               ` eMCA trace interface update Chen, Gong
2014-06-18  2:33                 ` [PATCH 5/7 REVISION v2] trace, RAS: Add eMCA trace event interface Chen, Gong
2014-06-20  2:06                   ` Chen, Gong
2014-06-20 23:01                     ` Tony Luck
2014-06-21  0:56                       ` Steven Rostedt
2014-06-22 16:48                   ` Borislav Petkov
2014-06-23 23:51                     ` Luck, Tony
2014-06-24 10:20                       ` Borislav Petkov
2014-06-24 17:38                         ` Luck, Tony

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.