From mboxrd@z Thu Jan 1 00:00:00 1970 From: Naoya Horiguchi Subject: [PATCH v2] trace, ras: move ras_event.h under include/trace/events Date: Wed, 20 May 2015 08:15:30 +0000 Message-ID: <20150520081530.GA12646@hori1.linux.bs1.fc.nec.co.jp> References: <20150518185226.23154d47@canb.auug.org.au> <555A0327.9060709@infradead.org> <20150519024933.GA1614@hori1.linux.bs1.fc.nec.co.jp> <20150519094636.67c9a4a3@gandalf.local.home> <20150520053614.GA6236@hori1.linux.bs1.fc.nec.co.jp> <20150520060119.GB27005@hori1.linux.bs1.fc.nec.co.jp> <20150520060336.GC27005@hori1.linux.bs1.fc.nec.co.jp> <20150520071625.GF27005@hori1.linux.bs1.fc.nec.co.jp> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-2022-jp" Content-Transfer-Encoding: 8BIT Return-path: Received: from TYO202.gate.nec.co.jp ([210.143.35.52]:62646 "EHLO tyo202.gate.nec.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751155AbbETISQ convert rfc822-to-8bit (ORCPT ); Wed, 20 May 2015 04:18:16 -0400 In-Reply-To: <20150520071625.GF27005@hori1.linux.bs1.fc.nec.co.jp> Content-Language: ja-JP Content-ID: <77EFBED4D106AF49A0B93242C194C877@gisp.nec.co.jp> Sender: linux-next-owner@vger.kernel.org List-ID: To: Steven Rostedt Cc: Randy Dunlap , Stephen Rothwell , "linux-next@vger.kernel.org" , "linux-kernel@vger.kernel.org" , Linux MM , Andrew Morton , Jim Davis , Chen Gong Here is the revised one. --- Most of header files for tracepoints are located to include/trace/events or their relevant subdirectories under drivers/. One exception is include/ras/ras_events.h, which looks inconsistent. So let's move it to the default places for such headers. Signed-off-by: Naoya Horiguchi --- ChangeLog v1->v2: - rename the header from ras_event.h to ras.h (then no need to define TRACE_INCLUDE_FILE) - remove "#define TRACE_INCLUDE_PATH ../../include/ras" from a/drivers/ras/ras.c --- drivers/acpi/acpi_extlog.c | 2 +- drivers/edac/edac_mc.c | 2 +- drivers/edac/ghes_edac.c | 2 +- drivers/pci/pcie/aer/aerdrv_errprint.c | 2 +- drivers/ras/ras.c | 3 +- include/ras/ras_event.h | 323 --------------------------------- include/trace/events/ras.h | 322 ++++++++++++++++++++++++++++++++ mm/memory-failure.c | 2 +- 8 files changed, 328 insertions(+), 330 deletions(-) delete mode 100644 include/ras/ras_event.h create mode 100644 include/trace/events/ras.h diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index b3842ffc19ba..b04bfd9f14e3 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -17,7 +17,7 @@ #include #include "apei/apei-internal.h" -#include +#include #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index af3be1914dbb..c95ecb74cdc4 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -33,7 +33,7 @@ #include #include "edac_core.h" #include "edac_module.h" -#include +#include /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index b24681998740..223005766527 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -15,7 +15,7 @@ #include #include #include "edac_core.h" -#include +#include #define GHES_EDAC_REVISION " Ver: 1.0.0" diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 167fe411ce2e..b5d4c0d9778d 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -22,7 +22,7 @@ #include #include "aerdrv.h" -#include +#include #define AER_AGENT_RECEIVER 0 #define AER_AGENT_REQUESTER 1 diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index b67dd362b7b6..d1557687ba18 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -9,8 +9,7 @@ #include #define CREATE_TRACE_POINTS -#define TRACE_INCLUDE_PATH ../../include/ras -#include +#include static int __init ras_init(void) { diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h deleted file mode 100644 index 1443d79e4fe6..000000000000 --- a/include/ras/ras_event.h +++ /dev/null @@ -1,323 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ras -#define TRACE_INCLUDE_FILE ras_event - -#if !defined(_TRACE_HW_EVENT_MC_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_HW_EVENT_MC_H - -#include -#include -#include -#include -#include -#include -#include - -/* - * MCE Extended Error Log trace event - * - * These events are generated when hardware detects a corrected or - * uncorrected event. - */ - -/* memory trace event */ - -#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) -TRACE_EVENT(extlog_mem_event, - TP_PROTO(struct cper_sec_mem_err *mem, - u32 err_seq, - const uuid_le *fru_id, - const char *fru_text, - u8 sev), - - TP_ARGS(mem, err_seq, fru_id, fru_text, sev), - - TP_STRUCT__entry( - __field(u32, err_seq) - __field(u8, etype) - __field(u8, sev) - __field(u64, pa) - __field(u8, pa_mask_lsb) - __field_struct(uuid_le, fru_id) - __string(fru_text, fru_text) - __field_struct(struct cper_mem_err_compact, data) - ), - - TP_fast_assign( - __entry->err_seq = err_seq; - if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) - __entry->etype = mem->error_type; - else - __entry->etype = ~0; - __entry->sev = sev; - if (mem->validation_bits & CPER_MEM_VALID_PA) - __entry->pa = mem->physical_addr; - else - __entry->pa = ~0ull; - - if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) - __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask); - else - __entry->pa_mask_lsb = ~0; - __entry->fru_id = *fru_id; - __assign_str(fru_text, fru_text); - cper_mem_err_pack(mem, &__entry->data); - ), - - TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s", - __entry->err_seq, - cper_severity_str(__entry->sev), - cper_mem_err_type_str(__entry->etype), - __entry->pa, - __entry->pa_mask_lsb, - cper_mem_err_unpack(p, &__entry->data), - &__entry->fru_id, - __get_str(fru_text)) -); -#endif - -/* - * Hardware Events Report - * - * Those events are generated when hardware detected a corrected or - * uncorrected event, and are meant to replace the current API to report - * errors defined on both EDAC and MCE subsystems. - * - * FIXME: Add events for handling memory errors originated from the - * MCE subsystem. - */ - -/* - * Hardware-independent Memory Controller specific events - */ - -/* - * Default error mechanisms for Memory Controller errors (CE and UE) - */ -TRACE_EVENT(mc_event, - - TP_PROTO(const unsigned int err_type, - const char *error_msg, - const char *label, - const int error_count, - const u8 mc_index, - const s8 top_layer, - const s8 mid_layer, - const s8 low_layer, - unsigned long address, - const u8 grain_bits, - unsigned long syndrome, - const char *driver_detail), - - TP_ARGS(err_type, error_msg, label, error_count, mc_index, - top_layer, mid_layer, low_layer, address, grain_bits, - syndrome, driver_detail), - - TP_STRUCT__entry( - __field( unsigned int, error_type ) - __string( msg, error_msg ) - __string( label, label ) - __field( u16, error_count ) - __field( u8, mc_index ) - __field( s8, top_layer ) - __field( s8, middle_layer ) - __field( s8, lower_layer ) - __field( long, address ) - __field( u8, grain_bits ) - __field( long, syndrome ) - __string( driver_detail, driver_detail ) - ), - - TP_fast_assign( - __entry->error_type = err_type; - __assign_str(msg, error_msg); - __assign_str(label, label); - __entry->error_count = error_count; - __entry->mc_index = mc_index; - __entry->top_layer = top_layer; - __entry->middle_layer = mid_layer; - __entry->lower_layer = low_layer; - __entry->address = address; - __entry->grain_bits = grain_bits; - __entry->syndrome = syndrome; - __assign_str(driver_detail, driver_detail); - ), - - TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)", - __entry->error_count, - mc_event_error_type(__entry->error_type), - __entry->error_count > 1 ? "s" : "", - ((char *)__get_str(msg))[0] ? " " : "", - __get_str(msg), - __get_str(label), - __entry->mc_index, - __entry->top_layer, - __entry->middle_layer, - __entry->lower_layer, - __entry->address, - 1 << __entry->grain_bits, - __entry->syndrome, - ((char *)__get_str(driver_detail))[0] ? " " : "", - __get_str(driver_detail)) -); - -/* - * PCIe AER Trace event - * - * These events are generated when hardware detects a corrected or - * uncorrected event on a PCIe device. The event report has - * the following structure: - * - * char * dev_name - The name of the slot where the device resides - * ([domain:]bus:device.function). - * u32 status - Either the correctable or uncorrectable register - * indicating what error or errors have been seen - * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED - */ - -#define aer_correctable_errors \ - {PCI_ERR_COR_RCVR, "Receiver Error"}, \ - {PCI_ERR_COR_BAD_TLP, "Bad TLP"}, \ - {PCI_ERR_COR_BAD_DLLP, "Bad DLLP"}, \ - {PCI_ERR_COR_REP_ROLL, "RELAY_NUM Rollover"}, \ - {PCI_ERR_COR_REP_TIMER, "Replay Timer Timeout"}, \ - {PCI_ERR_COR_ADV_NFAT, "Advisory Non-Fatal Error"}, \ - {PCI_ERR_COR_INTERNAL, "Corrected Internal Error"}, \ - {PCI_ERR_COR_LOG_OVER, "Header Log Overflow"} - -#define aer_uncorrectable_errors \ - {PCI_ERR_UNC_UND, "Undefined"}, \ - {PCI_ERR_UNC_DLP, "Data Link Protocol Error"}, \ - {PCI_ERR_UNC_SURPDN, "Surprise Down Error"}, \ - {PCI_ERR_UNC_POISON_TLP,"Poisoned TLP"}, \ - {PCI_ERR_UNC_FCP, "Flow Control Protocol Error"}, \ - {PCI_ERR_UNC_COMP_TIME, "Completion Timeout"}, \ - {PCI_ERR_UNC_COMP_ABORT,"Completer Abort"}, \ - {PCI_ERR_UNC_UNX_COMP, "Unexpected Completion"}, \ - {PCI_ERR_UNC_RX_OVER, "Receiver Overflow"}, \ - {PCI_ERR_UNC_MALF_TLP, "Malformed TLP"}, \ - {PCI_ERR_UNC_ECRC, "ECRC Error"}, \ - {PCI_ERR_UNC_UNSUP, "Unsupported Request Error"}, \ - {PCI_ERR_UNC_ACSV, "ACS Violation"}, \ - {PCI_ERR_UNC_INTN, "Uncorrectable Internal Error"},\ - {PCI_ERR_UNC_MCBTLP, "MC Blocked TLP"}, \ - {PCI_ERR_UNC_ATOMEG, "AtomicOp Egress Blocked"}, \ - {PCI_ERR_UNC_TLPPRE, "TLP Prefix Blocked Error"} - -TRACE_EVENT(aer_event, - TP_PROTO(const char *dev_name, - const u32 status, - const u8 severity), - - TP_ARGS(dev_name, status, severity), - - TP_STRUCT__entry( - __string( dev_name, dev_name ) - __field( u32, status ) - __field( u8, severity ) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name); - __entry->status = status; - __entry->severity = severity; - ), - - TP_printk("%s PCIe Bus Error: severity=%s, %s\n", - __get_str(dev_name), - __entry->severity == AER_CORRECTABLE ? "Corrected" : - __entry->severity == AER_FATAL ? - "Fatal" : "Uncorrected, non-fatal", - __entry->severity == AER_CORRECTABLE ? - __print_flags(__entry->status, "|", aer_correctable_errors) : - __print_flags(__entry->status, "|", aer_uncorrectable_errors)) -); - -/* - * memory-failure recovery action result event - * - * unsigned long pfn - Page Frame Number of the corrupted page - * int type - Page types of the corrupted page - * int result - Result of recovery action - */ - -#ifdef CONFIG_MEMORY_FAILURE -#define MF_ACTION_RESULT \ - EM ( MF_IGNORED, "Ignored" ) \ - EM ( MF_FAILED, "Failed" ) \ - EM ( MF_DELAYED, "Delayed" ) \ - EMe ( MF_RECOVERED, "Recovered" ) - -#define MF_PAGE_TYPE \ - EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ - EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ - EM ( MF_MSG_SLAB, "kernel slab page" ) \ - EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \ - EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \ - EM ( MF_MSG_HUGE, "huge page" ) \ - EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ - EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \ - EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \ - EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \ - EM ( MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page" ) \ - EM ( MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page" ) \ - EM ( MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page" ) \ - EM ( MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page" ) \ - EM ( MF_MSG_DIRTY_LRU, "dirty LRU page" ) \ - EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \ - EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \ - EM ( MF_MSG_BUDDY, "free buddy page" ) \ - EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \ - EMe ( MF_MSG_UNKNOWN, "unknown page" ) - -/* - * First define the enums in MM_ACTION_RESULT to be exported to userspace - * via TRACE_DEFINE_ENUM(). - */ -#undef EM -#undef EMe -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -MF_ACTION_RESULT -MF_PAGE_TYPE - -/* - * Now redefine the EM() and EMe() macros to map the enums to the strings - * that will be printed in the output. - */ -#undef EM -#undef EMe -#define EM(a, b) { a, b }, -#define EMe(a, b) { a, b } - -TRACE_EVENT(memory_failure_event, - TP_PROTO(unsigned long pfn, - int type, - int result), - - TP_ARGS(pfn, type, result), - - TP_STRUCT__entry( - __field(unsigned long, pfn) - __field(int, type) - __field(int, result) - ), - - TP_fast_assign( - __entry->pfn = pfn; - __entry->type = type; - __entry->result = result; - ), - - TP_printk("pfn %#lx: recovery action for %s: %s", - __entry->pfn, - __print_symbolic(__entry->type, MF_PAGE_TYPE), - __print_symbolic(__entry->result, MF_ACTION_RESULT) - ) -); -#endif /* CONFIG_MEMORY_FAILURE */ -#endif /* _TRACE_HW_EVENT_MC_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h new file mode 100644 index 000000000000..e5cf7629b364 --- /dev/null +++ b/include/trace/events/ras.h @@ -0,0 +1,322 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ras + +#if !defined(_TRACE_HW_EVENT_MC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HW_EVENT_MC_H + +#include +#include +#include +#include +#include +#include +#include + +/* + * MCE Extended Error Log trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event. + */ + +/* memory trace event */ + +#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) +TRACE_EVENT(extlog_mem_event, + TP_PROTO(struct cper_sec_mem_err *mem, + u32 err_seq, + const uuid_le *fru_id, + const char *fru_text, + u8 sev), + + TP_ARGS(mem, err_seq, fru_id, fru_text, sev), + + TP_STRUCT__entry( + __field(u32, err_seq) + __field(u8, etype) + __field(u8, sev) + __field(u64, pa) + __field(u8, pa_mask_lsb) + __field_struct(uuid_le, fru_id) + __string(fru_text, fru_text) + __field_struct(struct cper_mem_err_compact, data) + ), + + TP_fast_assign( + __entry->err_seq = err_seq; + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) + __entry->etype = mem->error_type; + else + __entry->etype = ~0; + __entry->sev = sev; + if (mem->validation_bits & CPER_MEM_VALID_PA) + __entry->pa = mem->physical_addr; + else + __entry->pa = ~0ull; + + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) + __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask); + else + __entry->pa_mask_lsb = ~0; + __entry->fru_id = *fru_id; + __assign_str(fru_text, fru_text); + cper_mem_err_pack(mem, &__entry->data); + ), + + TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s", + __entry->err_seq, + cper_severity_str(__entry->sev), + cper_mem_err_type_str(__entry->etype), + __entry->pa, + __entry->pa_mask_lsb, + cper_mem_err_unpack(p, &__entry->data), + &__entry->fru_id, + __get_str(fru_text)) +); +#endif + +/* + * Hardware Events Report + * + * Those events are generated when hardware detected a corrected or + * uncorrected event, and are meant to replace the current API to report + * errors defined on both EDAC and MCE subsystems. + * + * FIXME: Add events for handling memory errors originated from the + * MCE subsystem. + */ + +/* + * Hardware-independent Memory Controller specific events + */ + +/* + * Default error mechanisms for Memory Controller errors (CE and UE) + */ +TRACE_EVENT(mc_event, + + TP_PROTO(const unsigned int err_type, + const char *error_msg, + const char *label, + const int error_count, + const u8 mc_index, + const s8 top_layer, + const s8 mid_layer, + const s8 low_layer, + unsigned long address, + const u8 grain_bits, + unsigned long syndrome, + const char *driver_detail), + + TP_ARGS(err_type, error_msg, label, error_count, mc_index, + top_layer, mid_layer, low_layer, address, grain_bits, + syndrome, driver_detail), + + TP_STRUCT__entry( + __field( unsigned int, error_type ) + __string( msg, error_msg ) + __string( label, label ) + __field( u16, error_count ) + __field( u8, mc_index ) + __field( s8, top_layer ) + __field( s8, middle_layer ) + __field( s8, lower_layer ) + __field( long, address ) + __field( u8, grain_bits ) + __field( long, syndrome ) + __string( driver_detail, driver_detail ) + ), + + TP_fast_assign( + __entry->error_type = err_type; + __assign_str(msg, error_msg); + __assign_str(label, label); + __entry->error_count = error_count; + __entry->mc_index = mc_index; + __entry->top_layer = top_layer; + __entry->middle_layer = mid_layer; + __entry->lower_layer = low_layer; + __entry->address = address; + __entry->grain_bits = grain_bits; + __entry->syndrome = syndrome; + __assign_str(driver_detail, driver_detail); + ), + + TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)", + __entry->error_count, + mc_event_error_type(__entry->error_type), + __entry->error_count > 1 ? "s" : "", + ((char *)__get_str(msg))[0] ? " " : "", + __get_str(msg), + __get_str(label), + __entry->mc_index, + __entry->top_layer, + __entry->middle_layer, + __entry->lower_layer, + __entry->address, + 1 << __entry->grain_bits, + __entry->syndrome, + ((char *)__get_str(driver_detail))[0] ? " " : "", + __get_str(driver_detail)) +); + +/* + * PCIe AER Trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event on a PCIe device. The event report has + * the following structure: + * + * char * dev_name - The name of the slot where the device resides + * ([domain:]bus:device.function). + * u32 status - Either the correctable or uncorrectable register + * indicating what error or errors have been seen + * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED + */ + +#define aer_correctable_errors \ + {PCI_ERR_COR_RCVR, "Receiver Error"}, \ + {PCI_ERR_COR_BAD_TLP, "Bad TLP"}, \ + {PCI_ERR_COR_BAD_DLLP, "Bad DLLP"}, \ + {PCI_ERR_COR_REP_ROLL, "RELAY_NUM Rollover"}, \ + {PCI_ERR_COR_REP_TIMER, "Replay Timer Timeout"}, \ + {PCI_ERR_COR_ADV_NFAT, "Advisory Non-Fatal Error"}, \ + {PCI_ERR_COR_INTERNAL, "Corrected Internal Error"}, \ + {PCI_ERR_COR_LOG_OVER, "Header Log Overflow"} + +#define aer_uncorrectable_errors \ + {PCI_ERR_UNC_UND, "Undefined"}, \ + {PCI_ERR_UNC_DLP, "Data Link Protocol Error"}, \ + {PCI_ERR_UNC_SURPDN, "Surprise Down Error"}, \ + {PCI_ERR_UNC_POISON_TLP,"Poisoned TLP"}, \ + {PCI_ERR_UNC_FCP, "Flow Control Protocol Error"}, \ + {PCI_ERR_UNC_COMP_TIME, "Completion Timeout"}, \ + {PCI_ERR_UNC_COMP_ABORT,"Completer Abort"}, \ + {PCI_ERR_UNC_UNX_COMP, "Unexpected Completion"}, \ + {PCI_ERR_UNC_RX_OVER, "Receiver Overflow"}, \ + {PCI_ERR_UNC_MALF_TLP, "Malformed TLP"}, \ + {PCI_ERR_UNC_ECRC, "ECRC Error"}, \ + {PCI_ERR_UNC_UNSUP, "Unsupported Request Error"}, \ + {PCI_ERR_UNC_ACSV, "ACS Violation"}, \ + {PCI_ERR_UNC_INTN, "Uncorrectable Internal Error"},\ + {PCI_ERR_UNC_MCBTLP, "MC Blocked TLP"}, \ + {PCI_ERR_UNC_ATOMEG, "AtomicOp Egress Blocked"}, \ + {PCI_ERR_UNC_TLPPRE, "TLP Prefix Blocked Error"} + +TRACE_EVENT(aer_event, + TP_PROTO(const char *dev_name, + const u32 status, + const u8 severity), + + TP_ARGS(dev_name, status, severity), + + TP_STRUCT__entry( + __string( dev_name, dev_name ) + __field( u32, status ) + __field( u8, severity ) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + __entry->severity = severity; + ), + + TP_printk("%s PCIe Bus Error: severity=%s, %s\n", + __get_str(dev_name), + __entry->severity == AER_CORRECTABLE ? "Corrected" : + __entry->severity == AER_FATAL ? + "Fatal" : "Uncorrected, non-fatal", + __entry->severity == AER_CORRECTABLE ? + __print_flags(__entry->status, "|", aer_correctable_errors) : + __print_flags(__entry->status, "|", aer_uncorrectable_errors)) +); + +/* + * memory-failure recovery action result event + * + * unsigned long pfn - Page Frame Number of the corrupted page + * int type - Page types of the corrupted page + * int result - Result of recovery action + */ + +#ifdef CONFIG_MEMORY_FAILURE +#define MF_ACTION_RESULT \ + EM ( MF_IGNORED, "Ignored" ) \ + EM ( MF_FAILED, "Failed" ) \ + EM ( MF_DELAYED, "Delayed" ) \ + EMe ( MF_RECOVERED, "Recovered" ) + +#define MF_PAGE_TYPE \ + EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ + EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ + EM ( MF_MSG_SLAB, "kernel slab page" ) \ + EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \ + EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \ + EM ( MF_MSG_HUGE, "huge page" ) \ + EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ + EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \ + EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \ + EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \ + EM ( MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page" ) \ + EM ( MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page" ) \ + EM ( MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page" ) \ + EM ( MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page" ) \ + EM ( MF_MSG_DIRTY_LRU, "dirty LRU page" ) \ + EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \ + EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \ + EM ( MF_MSG_BUDDY, "free buddy page" ) \ + EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \ + EMe ( MF_MSG_UNKNOWN, "unknown page" ) + +/* + * First define the enums in MM_ACTION_RESULT to be exported to userspace + * via TRACE_DEFINE_ENUM(). + */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +MF_ACTION_RESULT +MF_PAGE_TYPE + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) { a, b }, +#define EMe(a, b) { a, b } + +TRACE_EVENT(memory_failure_event, + TP_PROTO(unsigned long pfn, + int type, + int result), + + TP_ARGS(pfn, type, result), + + TP_STRUCT__entry( + __field(unsigned long, pfn) + __field(int, type) + __field(int, result) + ), + + TP_fast_assign( + __entry->pfn = pfn; + __entry->type = type; + __entry->result = result; + ), + + TP_printk("pfn %#lx: recovery action for %s: %s", + __entry->pfn, + __print_symbolic(__entry->type, MF_PAGE_TYPE), + __print_symbolic(__entry->result, MF_ACTION_RESULT) + ) +); +#endif /* CONFIG_MEMORY_FAILURE */ +#endif /* _TRACE_HW_EVENT_MC_H */ + +/* This part must be outside protection */ +#include diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8cbe23ac1056..55f07ccb9056 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -57,7 +57,7 @@ #include #include #include "internal.h" -#include "ras/ras_event.h" +#include int sysctl_memory_failure_early_kill __read_mostly = 0; -- 2.1.0