* [PATCH v3 1/6] efi / ras: CCIX Memory error reporting
2019-11-12 16:52 [PATCH v3 0/6] CCIX Protocol error reporting Jonathan Cameron
@ 2019-11-12 16:52 ` Jonathan Cameron
2019-11-13 6:43 ` kbuild test robot
2019-11-13 8:20 ` kbuild test robot
2019-11-12 16:52 ` [PATCH v3 2/6] efi / ras: CCIX Cache " Jonathan Cameron
` (4 subsequent siblings)
5 siblings, 2 replies; 12+ messages in thread
From: Jonathan Cameron @ 2019-11-12 16:52 UTC (permalink / raw)
To: linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse
Cc: rjw, tony.luck, linuxarm, ard.biesheuvel, nariman.poushin,
Thanu Rangarajan, Jonathan Cameron
CCIX defines a number of different error types
(See CCIX spec 1.0) and UEFI 2.8 defines a CPER record to allow
for them to be reported when firmware first handling is in use.
The last part of that record is a copy of the CCIX protocol
error record which can provide very detailed information.
This patch introduces infrastructure and support for one of those
error types, CCIX Memory Errors. Later patches will supply
equivalent support for the other error types.
The variable length and content of the different messages makes
a single tracepoint impractical. As such the current RAS
tracepoint only covers the memory error. Additional trace points
will be introduced for other error types along with their
cper handling in a follow up series.
RAS daemon support to follow shortly. qemu injection patches
also available but not currently planing to upstream those.
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
drivers/acpi/apei/Kconfig | 8 +
drivers/acpi/apei/ghes.c | 39 ++++
drivers/firmware/efi/Kconfig | 5 +
drivers/firmware/efi/Makefile | 1 +
drivers/firmware/efi/cper-ccix.c | 359 +++++++++++++++++++++++++++++++
drivers/firmware/efi/cper.c | 6 +
include/linux/cper.h | 118 ++++++++++
include/ras/ras_event.h | 77 +++++++
8 files changed, 613 insertions(+)
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 6b18f8bc7be3..e687b18dee34 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -68,3 +68,11 @@ config ACPI_APEI_ERST_DEBUG
error information to and from a persistent store. Enable this
if you want to debugging and testing the ERST kernel support
and firmware implementation.
+
+config ACPI_APEI_CCIX
+ bool "APEI CCIX error recovery support"
+ depends on ACPI_APEI && MEMORY_FAILURE
+ help
+ CCIX has a number of defined error types. This option enables
+ the handling of CPER records generated by a firmware performing
+ firmware first error handling of these CCIX errors.
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 777f6f7122b4..75a177ae9de3 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -490,6 +490,42 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
#endif
}
+static void ghes_handle_ccix_per(struct acpi_hest_generic_data *gdata, int sev)
+{
+#ifdef CONFIG_ACPI_APEI_CCIX
+ struct cper_sec_ccix_header *header = acpi_hest_get_payload(gdata);
+ __u32 *dw;
+ enum ccix_per_type per_type;
+ static u32 err_seq;
+ void *payload;
+
+ /* Check if space for CCIX CPER header and 8 DW of a PER log header */
+ if (gdata->error_data_length <
+ sizeof(*header) + CCIX_PER_LOG_HEADER_DWS * sizeof(__u32))
+ return;
+
+ if ((header->validation_bits & CPER_CCIX_VALID_PER_LOG) == 0)
+ return;
+
+ dw = (__u32 *)(header + 1);
+
+ per_type = FIELD_GET(CCIX_PER_LOG_DW1_PER_TYPE_M, dw[1]);
+ payload = acpi_hest_get_payload(gdata);
+
+ switch (per_type) {
+ case CCIX_MEMORY_ERROR:
+ trace_ccix_memory_error_event(payload, err_seq, sev,
+ ccix_mem_err_ven_len_get(payload));
+ break;
+ default:
+ /* Unknown error type */
+ pr_info("CCIX error of unknown or vendor defined type\n");
+ break;
+ }
+ err_seq++;
+#endif
+}
+
static void ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@@ -520,6 +556,9 @@ static void ghes_do_proc(struct ghes *ghes,
else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
ghes_handle_aer(gdata);
}
+ else if (guid_equal(sec_type, &CPER_SEC_CCIX)) {
+ ghes_handle_ccix_per(gdata, estatus->error_severity);
+ }
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index b248870a9806..096e693a9522 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -209,6 +209,11 @@ config UEFI_CPER_X86
depends on UEFI_CPER && X86
default y
+config UEFI_CPER_CCIX
+ bool
+ depends on UEFI_CPER
+ default y
+
config EFI_DEV_PATH_PARSER
bool
depends on ACPI
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 4ac2de4dfa72..9a52c7d01e94 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -34,3 +34,4 @@ obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o
obj-$(CONFIG_EFI_EARLYCON) += earlycon.o
obj-$(CONFIG_UEFI_CPER_ARM) += cper-arm.o
obj-$(CONFIG_UEFI_CPER_X86) += cper-x86.o
+obj-$(CONFIG_UEFI_CPER_CCIX) += cper-ccix.o
diff --git a/drivers/firmware/efi/cper-ccix.c b/drivers/firmware/efi/cper-ccix.c
new file mode 100644
index 000000000000..53ab5d650479
--- /dev/null
+++ b/drivers/firmware/efi/cper-ccix.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * UEFI Common Platform Error Record (CPER) support for CCIX
+ * protocol errors.
+ *
+ * Copyright (C) 2019, Huawei
+ * Author: Jonathan Cameron <jonathan.cameron@huawei.com>
+ *
+ * The CCIX® trademark and CCIX trade name are owned solely by
+ * CCIX CONSORTIUM, INC. and all rights are reserved therein.
+ *
+ * CPER is the format used to describe platform hardware error by
+ * various tables, such as ERST, BERT and HEST etc.
+ *
+ * For more information about CPER, please refer to Appendix N of UEFI
+ * Specification version 2.9.
+ *
+ * CCIX defines a number of Protocol Error Messages which for the
+ * main body of the CCIX CPER records. These are defined in the
+ * CCIX Specification 1.0.
+ */
+
+#include <acpi/ghes.h>
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cper.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <ras/ras_event.h>
+
+static char rcd_decode_str[CPER_REC_LEN];
+
+static const char * const ccix_comp_type_strs[] = {
+ "Request Agent",
+ "Home Agent",
+ "Slave Agent",
+ "Port",
+ "Link",
+};
+
+const char *cper_ccix_comp_type_str(u8 comp_type)
+{
+ return comp_type < ARRAY_SIZE(ccix_comp_type_strs) ?
+ ccix_comp_type_strs[comp_type] : "Reserved";
+}
+
+static const char * const ccix_per_type_strs[] = {
+ "Memory Error",
+ "Cache Error",
+ "ATC Error",
+ "Port Error",
+ "Link Error",
+ "Agent Internal",
+};
+
+static const char * const ccix_mem_pool_gen_type_strs[] = {
+ "Other, Non-specified",
+ "ROM",
+ "Volatile",
+ "Non-volatile",
+ "Device",
+};
+
+static const char *cper_ccix_mem_err_generic_type_str(u16 type)
+{
+ const char *gen_type_str;
+
+ if (type < ARRAY_SIZE(ccix_mem_pool_gen_type_strs))
+ gen_type_str = ccix_mem_pool_gen_type_strs[type];
+ else if (type >= 0x80)
+ gen_type_str = "Vendor";
+ else
+ gen_type_str = "Reserved";
+
+ return gen_type_str;
+}
+
+static const char * const ccix_mem_op_type_strs[] = {
+ "Generic",
+ "Read",
+ "Write",
+ "Reserved",
+ "Scrub",
+};
+
+static const char *cper_ccix_mem_err_op_str(u8 op_type)
+{
+ return op_type < ARRAY_SIZE(ccix_mem_op_type_strs) ?
+ ccix_mem_op_type_strs[op_type] :
+ "Reserved";
+}
+
+/* Sightly different from the generic version */
+static const char * const ccix_mem_err_type_strs[] = {
+ "Unknown",
+ "No Error",
+ "Single-bit ECC",
+ "Multi-bit ECC",
+ "Single-symbol ChipKill ECC",
+ "Multi-symbol ChipKill ECC",
+ "Master Abort",
+ "Target Abort",
+ "Parity Error",
+ "Watchdog Timeout",
+ "Invalid Address",
+ "Mirror Broken",
+ "Memory Sparing",
+ "Scrub",
+ "Physical Memory Map-out Event",
+};
+
+const char *cper_ccix_mem_err_type_str(unsigned int error_type)
+{
+ return error_type < ARRAY_SIZE(ccix_mem_err_type_strs) ?
+ ccix_mem_err_type_strs[error_type] : "Reserved";
+}
+
+static const char * const ccix_mem_spec_type_strs[] = {
+ "Other, Not-specified",
+ "SRAM",
+ "DDR",
+ "NVDIMM-F",
+ "NVDIMM-N",
+ "HBM",
+ "Flash"
+};
+
+static const char *cper_ccix_mem_err_spec_type_str(u8 specific_type)
+{
+ if (specific_type < ARRAY_SIZE(ccix_mem_spec_type_strs))
+ return ccix_mem_spec_type_strs[specific_type];
+ else if (specific_type >= 0x80)
+ return "Vendor";
+ else
+ return "Reserved";
+}
+
+/*
+ * We pack up everything except those that are needed for software handling:
+ * - error_type, physical_addr
+ * and header values that would require additional validation bits:
+ * - source, component, severity,
+ * implicit: protocol error type (mem)
+ */
+void cper_ccix_mem_err_pack(const struct cper_sec_ccix_mem_error *mem_record,
+ struct cper_ccix_mem_err_compact *cmem_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data)
+{
+ cmem_err->validation_bits = mem_record->validation_bits;
+ cmem_err->mem_err_type = mem_record->memory_error_type;
+ cmem_err->pool_generic_type = mem_record->pool_generic_type;
+ cmem_err->op_type = mem_record->op_type;
+ cmem_err->card = mem_record->card;
+ cmem_err->module = mem_record->module;
+ cmem_err->bank = mem_record->bank;
+ cmem_err->device = mem_record->device;
+ cmem_err->row = mem_record->row;
+ cmem_err->column = mem_record->column;
+ cmem_err->rank = mem_record->rank;
+ cmem_err->bit_pos = mem_record->bit_pos;
+ cmem_err->chip_id = mem_record->chip_id;
+ cmem_err->pool_specific_type = mem_record->pool_specific_type;
+ cmem_err->fru = mem_record->fru;
+ memcpy(vendor_data, &mem_record->vendor_data[1], vendor_data_len);
+}
+
+static int cper_ccix_err_location(struct cper_ccix_mem_err_compact *cmem_err,
+ char *msg)
+{
+ u32 len = CPER_REC_LEN - 1;
+ u32 n = 0;
+
+ if (!msg)
+ return 0;
+
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_GENERIC_MEM_VALID)
+ n += snprintf(msg + n, len, "Pool Generic Type: %s ",
+ cper_ccix_mem_err_generic_type_str(cmem_err->pool_generic_type));
+
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_MEM_ERR_TYPE_VALID)
+ n += snprintf(msg + n, len, "Err Type: %s ",
+ cper_ccix_mem_err_type_str(cmem_err->mem_err_type));
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_OP_VALID)
+ n += snprintf(msg + n, len, "Operation: %s ",
+ cper_ccix_mem_err_op_str(cmem_err->op_type));
+
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_CARD_VALID)
+ n += snprintf(msg + n, len, "Card: %d ", cmem_err->card);
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_MOD_VALID)
+ n += snprintf(msg + n, len, "Mod: %d ", cmem_err->module);
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_BANK_VALID)
+ n += snprintf(msg + n, len, "Bank: %d ", cmem_err->bank);
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_DEVICE_VALID)
+ n += snprintf(msg + n, len, "Device: %d ", cmem_err->device);
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_ROW_VALID)
+ n += snprintf(msg + n, len, "Row: %d ", cmem_err->row);
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_COL_VALID)
+ n += snprintf(msg + n, len, "Col: %d ", cmem_err->column);
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_RANK_VALID)
+ n += snprintf(msg + n, len, "Rank: %d ", cmem_err->rank);
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_BIT_POS_VALID)
+ n += snprintf(msg + n, len, "BitPos: %d ", cmem_err->bit_pos);
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_CHIP_ID_VALID)
+ n += snprintf(msg + n, len, "ChipID: %d ", cmem_err->chip_id);
+ if (cmem_err->validation_bits & CCIX_MEM_ERR_SPEC_TYPE_VALID)
+ n += snprintf(msg + n, len, "Pool Specific Type: %s ",
+ cper_ccix_mem_err_spec_type_str(cmem_err->pool_specific_type));
+ n += snprintf(msg + n, len, "FRU: %d ", cmem_err->fru);
+
+ return n;
+}
+
+const char *cper_ccix_mem_err_unpack(struct trace_seq *p,
+ struct cper_ccix_mem_err_compact *cmem_err)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (cper_ccix_err_location(cmem_err, rcd_decode_str))
+ trace_seq_printf(p, "%s", rcd_decode_str);
+ trace_seq_putc(p, '\0');
+
+ return ret;
+}
+
+static int cper_ccix_mem_err_details(const char *pfx,
+ struct acpi_hest_generic_data *gdata)
+{
+ struct cper_ccix_mem_error *full_mem_err;
+ struct cper_sec_ccix_mem_error *mem_err;
+ u16 vendor_data_len;
+ int i;
+
+ if (gdata->error_data_length < sizeof(*full_mem_err))
+ return -ENOSPC;
+
+ full_mem_err = acpi_hest_get_payload(gdata);
+
+ mem_err = &full_mem_err->mem_record;
+ printk("%s""FRU ID: %u, Length: %u\n", pfx,
+ mem_err->fru, mem_err->length);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_GENERIC_MEM_VALID)
+ printk("%s""Pool Generic Type: %s\n", pfx,
+ cper_ccix_mem_err_generic_type_str(mem_err->pool_generic_type));
+
+ if (mem_err->validation_bits & CCIX_MEM_ERR_OP_VALID)
+ printk("%s""Operation: %s\n", pfx,
+ cper_ccix_mem_err_op_str(mem_err->op_type));
+
+ if (mem_err->validation_bits & CCIX_MEM_ERR_MEM_ERR_TYPE_VALID) {
+ printk("%s""Mem Error Type: %s\n", pfx,
+ cper_ccix_mem_err_type_str(mem_err->memory_error_type));
+ }
+ if (mem_err->validation_bits & CCIX_MEM_ERR_CARD_VALID)
+ printk("%s""Card: %u\n", pfx, mem_err->card);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_MOD_VALID)
+ printk("%s""Module: %u\n", pfx, mem_err->module);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_BANK_VALID)
+ printk("%s""Bank: %u\n", pfx, mem_err->bank);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_DEVICE_VALID)
+ printk("%s""Device: %u\n", pfx, mem_err->device);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_ROW_VALID)
+ printk("%s""Row: %u\n", pfx, mem_err->row);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_COL_VALID)
+ printk("%s""Column: %u\n", pfx, mem_err->column);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_RANK_VALID)
+ printk("%s""Rank: %u\n", pfx, mem_err->rank);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_BIT_POS_VALID)
+ printk("%s""Bit Pos: %u\n", pfx, mem_err->bit_pos);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_CHIP_ID_VALID)
+ printk("%s""Chip ID: %u\n", pfx, mem_err->chip_id);
+ if (mem_err->validation_bits & CCIX_MEM_ERR_SPEC_TYPE_VALID)
+ printk("%s""Specific Type: %s\n", pfx,
+ cper_ccix_mem_err_spec_type_str(mem_err->pool_specific_type));
+
+ if (mem_err->validation_bits & CCIX_MEM_ERR_VENDOR_DATA_VALID) {
+ if (gdata->error_data_length < sizeof(*full_mem_err) + 4)
+ return -ENOSPC;
+
+ vendor_data_len = mem_err->vendor_data[0] & GENMASK(15, 0);
+ if (gdata->error_data_length <
+ sizeof(*full_mem_err) + vendor_data_len)
+ return -ENOSPC;
+
+ for (i = 0; i < vendor_data_len / 4 - 1; i++)
+ printk("%s""Vendor%d: 0x%08x\n", pfx, i,
+ mem_err->vendor_data[i + 1]);
+ }
+
+ return 0;
+}
+
+int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
+{
+ struct cper_sec_ccix_header *header = acpi_hest_get_payload(gdata);
+ __u32 *dw;
+ __u32 comp_type;
+ enum ccix_per_type per_type;
+ bool vendor_per;
+
+ if (gdata->error_data_length < sizeof(*header))
+ return -ENOSPC;
+
+ printk("%s""CPER Length: %u\n", pfx, header->length);
+ if (header->validation_bits & CPER_CCIX_VALID_SOURCE_ID)
+ printk("%s""Source: %u\n", pfx, header->source_id);
+ if (header->validation_bits & CPER_CCIX_VALID_PORT_ID)
+ printk("%s""Port: %u\n", pfx, header->port_id);
+ /* Not much use if we don't have the per log, in theory it's optional */
+ if ((header->validation_bits & CPER_CCIX_VALID_PER_LOG) == 0)
+ return 0;
+
+ /* The per log header is a packed structure so needs breaking up */
+ if (gdata->error_data_length < sizeof(*header) + 8 * 4)
+ return -ENOSPC;
+
+ dw = (__u32 *)(header + 1);
+
+ printk("%s""PER Rev: %lu, Log Length: %lu\n", pfx,
+ FIELD_GET(CCIX_PER_LOG_DW0_REV_M, dw[0]),
+ FIELD_GET(CCIX_PER_LOG_DW0_LEN_M, dw[0]));
+ comp_type = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M, dw[1]);
+ printk("%s""Component: %s\n", pfx, cper_ccix_comp_type_str(comp_type));
+ printk("%s""ME: %lu, SevUE: %lu, SevNoComm: %lu, SevDegraded: %lu, SevDeferred %lu\n",
+ pfx,
+ FIELD_GET(CCIX_PER_LOG_DW0_ME_M, dw[0]),
+ FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M, dw[1]),
+ FIELD_GET(CCIX_PER_LOG_DW1_SEV_NO_COMM_M, dw[1]),
+ FIELD_GET(CCIX_PER_LOG_DW1_SEV_DEGRADED_M, dw[1]),
+ FIELD_GET(CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M, dw[1]));
+
+ /* per_type is vendor defined if VEN is set */
+ vendor_per = FIELD_GET(CCIX_PER_LOG_DW1_VEN_VAL_M, dw[1]) ?
+ true : false;
+ per_type = FIELD_GET(CCIX_PER_LOG_DW1_PER_TYPE_M, dw[1]);
+ if (vendor_per)
+ printk("%s""Protocol Error Type: Vendor%u", pfx, per_type);
+ else
+ printk("%s""Protocol Error Type: %s\n", pfx,
+ per_type < ARRAY_SIZE(ccix_per_type_strs) ?
+ ccix_per_type_strs[per_type] : "Reserved");
+
+ if (FIELD_GET(CCIX_PER_LOG_DW1_ADDR_VAL_M, dw[1]))
+ printk("%s""Address: 0x%llx\n", pfx,
+ (((__u64)dw[2]) << 32) | (dw[3] & 0xFFFFFFFC));
+
+ /* Vendor defined PER message, perhaps we could print it out */
+ if (vendor_per)
+ return 0;
+
+ switch (per_type) {
+ case CCIX_MEMORY_ERROR:
+ return cper_ccix_mem_err_details(pfx, gdata);
+ default:
+ /* Vendor defined so no formatting be done */
+ break;
+ }
+ return 0;
+}
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index b1af0de2e100..03bb27db2e87 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -474,6 +474,12 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
cper_print_pcie(newpfx, pcie, gdata);
else
goto err_section_too_small;
+ } else if (guid_equal(sec_type, &CPER_SEC_CCIX)) {
+ int ret;
+ /* CCIX CPER entries are variable length */
+ ret = cper_print_ccix_per(newpfx, gdata);
+ if (ret)
+ goto err_section_too_small;
#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
} else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 4f005d95ce88..df7a34c3ba4f 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -174,6 +174,9 @@ enum {
#define CPER_SEC_PCIE \
GUID_INIT(0xD995E954, 0xBBC1, 0x430F, 0xAD, 0x91, 0xB4, 0x4D, \
0xCB, 0x3C, 0x6F, 0x35)
+#define CPER_SEC_CCIX \
+ GUID_INIT(0x91335EF6, 0xEBFB, 0x4478, 0xA6, 0xA6, 0x88, 0xB7, \
+ 0x28, 0xCF, 0x75, 0xD7)
/* Firmware Error Record Reference */
#define CPER_SEC_FW_ERR_REC_REF \
GUID_INIT(0x81212A96, 0x09ED, 0x4996, 0x94, 0x71, 0x8D, 0x72, \
@@ -242,6 +245,10 @@ enum {
#define CPER_PCIE_SLOT_SHIFT 3
+#define CPER_CCIX_VALID_SOURCE_ID BIT(0)
+#define CPER_CCIX_VALID_PORT_ID BIT(1)
+#define CPER_CCIX_VALID_PER_LOG BIT(2)
+
#define CPER_ARM_VALID_MPIDR BIT(0)
#define CPER_ARM_VALID_AFFINITY_LEVEL BIT(1)
#define CPER_ARM_VALID_RUNNING_STATE BIT(2)
@@ -521,6 +528,105 @@ struct cper_sec_pcie {
u8 aer_info[96];
};
+struct cper_sec_ccix_header {
+ __u32 length;
+ __u64 validation_bits;
+ __u8 source_id;
+ __u8 port_id;
+ __u8 reserved[2];
+};
+
+#define CCIX_PER_LOG_DW0_REV_M GENMASK(7, 0)
+#define CCIX_PER_LOG_DW0_LEN_M GENMASK(14, 8)
+#define CCIX_PER_LOG_DW0_ME_M BIT(15)
+#define CCIX_PER_LOG_DW1_COMP_TYPE_M GENMASK(15, 12)
+#define CCIX_PER_LOG_DW1_SEV_UE_M BIT(16)
+#define CCIX_PER_LOG_DW1_SEV_NO_COMM_M BIT(17)
+#define CCIX_PER_LOG_DW1_SEV_DEGRADED_M BIT(18)
+#define CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M BIT(19)
+#define CCIX_PER_LOG_DW1_PER_TYPE_M GENMASK(27, 24)
+#define CCIX_PER_LOG_DW1_ADDR_VAL_M BIT(30)
+#define CCIX_PER_LOG_DW1_VEN_VAL_M BIT(31)
+enum ccix_per_type {
+ CCIX_MEMORY_ERROR = 0,
+ CCIX_CACHE_ERROR = 1,
+ CCIX_ATC_ERROR = 2,
+ CCIX_PORT_ERROR = 3,
+ CCIX_LINK_ERROR = 4,
+ CCIX_AGENT_INTERNAL_ERROR = 5,
+};
+
+#define CCIX_PER_LOG_HEADER_DWS 8
+
+struct cper_sec_ccix_mem_error {
+ __u32 validation_bits;
+#define CCIX_MEM_ERR_GENERIC_MEM_VALID BIT(0)
+#define CCIX_MEM_ERR_OP_VALID BIT(1)
+#define CCIX_MEM_ERR_MEM_ERR_TYPE_VALID BIT(2)
+#define CCIX_MEM_ERR_CARD_VALID BIT(3)
+#define CCIX_MEM_ERR_BANK_VALID BIT(4)
+#define CCIX_MEM_ERR_DEVICE_VALID BIT(5)
+#define CCIX_MEM_ERR_ROW_VALID BIT(6)
+#define CCIX_MEM_ERR_COL_VALID BIT(7)
+#define CCIX_MEM_ERR_RANK_VALID BIT(8)
+#define CCIX_MEM_ERR_BIT_POS_VALID BIT(9)
+#define CCIX_MEM_ERR_CHIP_ID_VALID BIT(10)
+#define CCIX_MEM_ERR_VENDOR_DATA_VALID BIT(11)
+#define CCIX_MEM_ERR_MOD_VALID BIT(12)
+#define CCIX_MEM_ERR_SPEC_TYPE_VALID BIT(13)
+
+ __u8 fru;
+ __u8 reserved;
+ __u16 length; /* Includes vendor specific log info */
+ __u8 pool_generic_type;
+ __u8 op_type;
+ __u8 memory_error_type;
+ __u8 card;
+ __u16 module;
+ __u16 bank;
+ __u32 device;
+ __u32 row;
+ __u32 column;
+ __u32 rank;
+ __u8 bit_pos;
+ __u8 chip_id;
+ __u8 pool_specific_type;
+ __u32 vendor_data[];
+};
+
+struct cper_ccix_mem_error {
+ struct cper_sec_ccix_header header;
+ __u32 ccix_header[CCIX_PER_LOG_HEADER_DWS];
+ struct cper_sec_ccix_mem_error mem_record;
+};
+
+static inline u16 ccix_mem_err_ven_len_get(struct cper_ccix_mem_error *mem_err)
+{
+ if (mem_err->mem_record.validation_bits &
+ CCIX_MEM_ERR_VENDOR_DATA_VALID)
+ return mem_err->mem_record.vendor_data[0] & 0xFFFF;
+ else
+ return 0;
+}
+
+struct cper_ccix_mem_err_compact {
+ __u32 validation_bits;
+ __u8 mem_err_type;
+ __u8 pool_generic_type;
+ __u8 pool_specific_type;
+ __u8 op_type;
+ __u8 card;
+ __u16 module;
+ __u16 bank;
+ __u32 device;
+ __u32 row;
+ __u32 column;
+ __u32 rank;
+ __u8 bit_pos;
+ __u8 chip_id;
+ __u8 fru;
+};
+
/* Reset to default packing */
#pragma pack()
@@ -535,6 +641,18 @@ void cper_mem_err_pack(const struct cper_sec_mem_err *,
struct cper_mem_err_compact *);
const char *cper_mem_err_unpack(struct trace_seq *,
struct cper_mem_err_compact *);
+void cper_ccix_mem_err_pack(const struct cper_sec_ccix_mem_error *mem_record,
+ struct cper_ccix_mem_err_compact *cmem_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data);
+const char *cper_ccix_mem_err_unpack(struct trace_seq *p,
+ struct cper_ccix_mem_err_compact *cmem_err);
+const char *cper_ccix_mem_err_type_str(unsigned int error_type);
+const char *cper_ccix_comp_type_str(u8 comp_type);
+struct acpi_hest_generic_data;
+int cper_print_ccix_per(const char *pfx,
+ struct acpi_hest_generic_data *gdata);
+
void cper_print_proc_arm(const char *pfx,
const struct cper_sec_proc_arm *proc);
void cper_print_proc_ia(const char *pfx,
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 36c5c5e38c1d..128728eaeef4 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -14,6 +14,7 @@
#include <linux/cper.h>
#include <linux/mm.h>
+#include <linux/bitfield.h>
/*
* MCE Extended Error Log trace event
*
@@ -338,6 +339,82 @@ TRACE_EVENT(aer_event,
"Not available")
);
+/*
+ * CCIX PER log memory error trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ *
+ * Some elements of the record are not included
+ * - PER version (tracepoint should remain compatible across versions)
+ * - Multiple Error
+ */
+TRACE_EVENT(ccix_memory_error_event,
+ TP_PROTO(struct cper_ccix_mem_error *mem,
+ u32 err_seq,
+ u8 sev,
+ u16 ven_len),
+
+ TP_ARGS(mem, err_seq, sev, ven_len),
+
+ TP_STRUCT__entry(
+ __field(u32, err_seq)
+ __field(u8, sev)
+ __field(u8, sevdetail)
+ __field(u8, source)
+ __field(u8, component)
+ __field(u64, pa)
+ __field(u8, pa_mask_lsb)
+ __field_struct(struct cper_ccix_mem_err_compact, data)
+ __field(u16, vendor_data_length)
+ __dynamic_array(u8, vendor_data, ven_len)
+ ),
+
+ TP_fast_assign(
+ __entry->err_seq = err_seq;
+ __entry->sev = sev;
+ __entry->sevdetail =
+ FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
+ CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
+ CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
+ CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
+ mem->ccix_header[1]);
+ if (mem->header.validation_bits & 0x1)
+ __entry->source = mem->header.source_id;
+ else
+ __entry->source = ~0;
+ __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
+ mem->ccix_header[1]);
+ if (mem->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
+ __entry->pa = (u64)mem->ccix_header[2] << 32 |
+ (mem->ccix_header[3] & 0xfffffffc);
+ __entry->pa_mask_lsb = mem->ccix_header[4] & 0xff;
+ } else {
+ __entry->pa = ~0ull;
+ __entry->pa_mask_lsb = ~0;
+ }
+ __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
+ cper_ccix_mem_err_pack(&mem->mem_record, &__entry->data,
+ __entry->vendor_data_length,
+ __get_dynamic_array(vendor_data));
+ ),
+
+ TP_printk("{%d} %s CCIX PER Memory Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
+ __entry->err_seq,
+ cper_severity_str(__entry->sev),
+ cper_ccix_comp_type_str(__entry->component),
+ __entry->sevdetail & BIT(0) ? 1 : 0,
+ __entry->sevdetail & BIT(1) ? 1 : 0,
+ __entry->sevdetail & BIT(2) ? 1 : 0,
+ __entry->sevdetail & BIT(3) ? 1 : 0,
+ __entry->pa,
+ __entry->pa_mask_lsb,
+ cper_ccix_mem_err_unpack(p, &__entry->data),
+ __print_hex(__get_dynamic_array(vendor_data),
+ __entry->vendor_data_length)
+ )
+);
+
/*
* memory-failure recovery action result event
*
--
2.20.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/6] efi / ras: CCIX Memory error reporting
2019-11-12 16:52 ` [PATCH v3 1/6] efi / ras: CCIX Memory " Jonathan Cameron
@ 2019-11-13 6:43 ` kbuild test robot
2019-11-13 8:20 ` kbuild test robot
1 sibling, 0 replies; 12+ messages in thread
From: kbuild test robot @ 2019-11-13 6:43 UTC (permalink / raw)
To: Jonathan Cameron
Cc: kbuild-all, linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse, rjw, tony.luck, linuxarm,
ard.biesheuvel, nariman.poushin, Thanu Rangarajan,
Jonathan Cameron
[-- Attachment #1: Type: text/plain, Size: 1653 bytes --]
Hi Jonathan,
I love your patch! Yet something to improve:
[auto build test ERROR on efi/next]
[cannot apply to v5.4-rc7 next-20191112]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Jonathan-Cameron/efi-ras-CCIX-Memory-error-reporting/20191113-133322
base: https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
config: i386-defconfig (attached as .config)
compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
ld: drivers/ras/ras.o: in function `perf_trace_ccix_memory_error_event':
>> ras.c:(.text+0x794): undefined reference to `cper_ccix_mem_err_pack'
ld: drivers/ras/ras.o: in function `trace_raw_output_ccix_memory_error_event':
>> ras.c:(.text+0x83d): undefined reference to `cper_ccix_mem_err_unpack'
>> ld: ras.c:(.text+0x863): undefined reference to `cper_ccix_comp_type_str'
>> ld: ras.c:(.text+0x86f): undefined reference to `cper_severity_str'
ld: drivers/ras/ras.o: in function `trace_event_raw_event_ccix_memory_error_event':
ras.c:(.text+0x10e9): undefined reference to `cper_ccix_mem_err_pack'
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28148 bytes --]
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/6] efi / ras: CCIX Memory error reporting
2019-11-12 16:52 ` [PATCH v3 1/6] efi / ras: CCIX Memory " Jonathan Cameron
2019-11-13 6:43 ` kbuild test robot
@ 2019-11-13 8:20 ` kbuild test robot
1 sibling, 0 replies; 12+ messages in thread
From: kbuild test robot @ 2019-11-13 8:20 UTC (permalink / raw)
To: Jonathan Cameron
Cc: kbuild-all, linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse, rjw, tony.luck, linuxarm,
ard.biesheuvel, nariman.poushin, Thanu Rangarajan,
Jonathan Cameron
[-- Attachment #1: Type: text/plain, Size: 4742 bytes --]
Hi Jonathan,
I love your patch! Yet something to improve:
[auto build test ERROR on efi/next]
[cannot apply to v5.4-rc7 next-20191112]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Jonathan-Cameron/efi-ras-CCIX-Memory-error-reporting/20191113-133322
base: https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
config: x86_64-randconfig-a004-201945 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
ld: drivers/ras/ras.o: in function `perf_trace_ccix_memory_error_event':
>> include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_mem_err_pack'
ld: drivers/ras/ras.o: in function `trace_event_raw_event_ccix_memory_error_event':
>> include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_mem_err_pack'
ld: drivers/ras/ras.o: in function `trace_raw_output_ccix_memory_error_event':
>> include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_mem_err_unpack'
>> ld: include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_comp_type_str'
>> ld: include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_severity_str'
vim +352 include/trace/../../include/ras/ras_event.h
341
342 /*
343 * CCIX PER log memory error trace event
344 *
345 * These events are generated when hardware detects a corrected or
346 * uncorrected event.
347 *
348 * Some elements of the record are not included
349 * - PER version (tracepoint should remain compatible across versions)
350 * - Multiple Error
351 */
> 352 TRACE_EVENT(ccix_memory_error_event,
353 TP_PROTO(struct cper_ccix_mem_error *mem,
354 u32 err_seq,
355 u8 sev,
356 u16 ven_len),
357
358 TP_ARGS(mem, err_seq, sev, ven_len),
359
360 TP_STRUCT__entry(
361 __field(u32, err_seq)
362 __field(u8, sev)
363 __field(u8, sevdetail)
364 __field(u8, source)
365 __field(u8, component)
366 __field(u64, pa)
367 __field(u8, pa_mask_lsb)
368 __field_struct(struct cper_ccix_mem_err_compact, data)
369 __field(u16, vendor_data_length)
370 __dynamic_array(u8, vendor_data, ven_len)
371 ),
372
373 TP_fast_assign(
374 __entry->err_seq = err_seq;
375 __entry->sev = sev;
376 __entry->sevdetail =
377 FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
378 CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
379 CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
380 CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
381 mem->ccix_header[1]);
382 if (mem->header.validation_bits & 0x1)
383 __entry->source = mem->header.source_id;
384 else
385 __entry->source = ~0;
386 __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
387 mem->ccix_header[1]);
388 if (mem->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
389 __entry->pa = (u64)mem->ccix_header[2] << 32 |
390 (mem->ccix_header[3] & 0xfffffffc);
391 __entry->pa_mask_lsb = mem->ccix_header[4] & 0xff;
392 } else {
393 __entry->pa = ~0ull;
394 __entry->pa_mask_lsb = ~0;
395 }
396 __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
397 cper_ccix_mem_err_pack(&mem->mem_record, &__entry->data,
398 __entry->vendor_data_length,
399 __get_dynamic_array(vendor_data));
400 ),
401
402 TP_printk("{%d} %s CCIX PER Memory Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
403 __entry->err_seq,
404 cper_severity_str(__entry->sev),
405 cper_ccix_comp_type_str(__entry->component),
406 __entry->sevdetail & BIT(0) ? 1 : 0,
407 __entry->sevdetail & BIT(1) ? 1 : 0,
408 __entry->sevdetail & BIT(2) ? 1 : 0,
409 __entry->sevdetail & BIT(3) ? 1 : 0,
410 __entry->pa,
411 __entry->pa_mask_lsb,
412 cper_ccix_mem_err_unpack(p, &__entry->data),
413 __print_hex(__get_dynamic_array(vendor_data),
414 __entry->vendor_data_length)
415 )
416 );
417
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 32398 bytes --]
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 2/6] efi / ras: CCIX Cache error reporting
2019-11-12 16:52 [PATCH v3 0/6] CCIX Protocol error reporting Jonathan Cameron
2019-11-12 16:52 ` [PATCH v3 1/6] efi / ras: CCIX Memory " Jonathan Cameron
@ 2019-11-12 16:52 ` Jonathan Cameron
2019-11-13 9:10 ` kbuild test robot
2019-11-13 9:32 ` kbuild test robot
2019-11-12 16:52 ` [PATCH v3 3/6] efi / ras: CCIX Address Translation " Jonathan Cameron
` (3 subsequent siblings)
5 siblings, 2 replies; 12+ messages in thread
From: Jonathan Cameron @ 2019-11-12 16:52 UTC (permalink / raw)
To: linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse
Cc: rjw, tony.luck, linuxarm, ard.biesheuvel, nariman.poushin,
Thanu Rangarajan, Jonathan Cameron
As CCIX Request Agents have fully cache coherent caches,
the CCIX 1.0 Base Specification defines detailed error
reporting for these caches.
A CCIX cache error is reported via a CPER record as defined in the
UEFI 2.8 specification. The PER log section is defined in the
CCIX 1.0 Base Specification.
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
drivers/acpi/apei/ghes.c | 4 +
drivers/firmware/efi/cper-ccix.c | 170 +++++++++++++++++++++++++++++++
include/linux/cper.h | 57 +++++++++++
include/ras/ras_event.h | 66 ++++++++++++
4 files changed, 297 insertions(+)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 75a177ae9de3..c99a4216b67d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -517,6 +517,10 @@ static void ghes_handle_ccix_per(struct acpi_hest_generic_data *gdata, int sev)
trace_ccix_memory_error_event(payload, err_seq, sev,
ccix_mem_err_ven_len_get(payload));
break;
+ case CCIX_CACHE_ERROR:
+ trace_ccix_cache_error_event(payload, err_seq, sev,
+ ccix_cache_err_ven_len_get(payload));
+ break;
default:
/* Unknown error type */
pr_info("CCIX error of unknown or vendor defined type\n");
diff --git a/drivers/firmware/efi/cper-ccix.c b/drivers/firmware/efi/cper-ccix.c
index 53ab5d650479..14d3f5b8ceab 100644
--- a/drivers/firmware/efi/cper-ccix.c
+++ b/drivers/firmware/efi/cper-ccix.c
@@ -290,6 +290,110 @@ static int cper_ccix_mem_err_details(const char *pfx,
return 0;
}
+static const char * const ccix_cache_type_strs[] = {
+ "Instruction Cache",
+ "Data Cache",
+ "Generic / Unified Cache",
+ "Snoop Filter Directory",
+};
+
+static const char *cper_ccix_cache_type_str(__u8 type)
+{
+ return type < ARRAY_SIZE(ccix_cache_type_strs) ?
+ ccix_cache_type_strs[type] : "Reserved";
+}
+
+static const char * const ccix_cache_err_type_strs[] = {
+ "Data",
+ "Tag",
+ "Timeout",
+ "Hang",
+ "Data Lost",
+ "Invalid Address",
+};
+
+const char *cper_ccix_cache_err_type_str(__u8 error_type)
+{
+ return error_type < ARRAY_SIZE(ccix_cache_err_type_strs) ?
+ ccix_cache_err_type_strs[error_type] : "Reserved";
+}
+
+static const char * const ccix_cache_err_op_strs[] = {
+ "Generic",
+ "Generic Read",
+ "Generic Write",
+ "Data Read",
+ "Data Write",
+ "Instruction Fetch",
+ "Prefetch",
+ "Eviction",
+ "Snooping",
+ "Snooped",
+ "Management / Command Error",
+};
+
+static const char *cper_ccix_cache_err_op_str(__u8 op)
+{
+ return op < ARRAY_SIZE(ccix_cache_err_op_strs) ?
+ ccix_cache_err_op_strs[op] : "Reserved";
+}
+
+static int cper_ccix_cache_err_details(const char *pfx,
+ struct acpi_hest_generic_data *gdata)
+{
+ struct cper_ccix_cache_error *full_cache_err;
+ struct cper_sec_ccix_cache_error *cache_err;
+ u16 vendor_data_len;
+ int i;
+
+ if (gdata->error_data_length < sizeof(*full_cache_err))
+ return -ENOSPC;
+
+ full_cache_err = acpi_hest_get_payload(gdata);
+
+ cache_err = &full_cache_err->cache_record;
+
+ if (cache_err->validation_bits & CCIX_CACHE_ERR_TYPE_VALID)
+ printk("%s""Cache Type: %s\n", pfx,
+ cper_ccix_cache_type_str(cache_err->cache_type));
+
+ if (cache_err->validation_bits & CCIX_CACHE_ERR_OP_VALID)
+ printk("%s""Operation: %s\n", pfx,
+ cper_ccix_cache_err_op_str(cache_err->op_type));
+
+ if (cache_err->validation_bits & CCIX_CACHE_ERR_CACHE_ERR_TYPE_VALID)
+ printk("%s""Cache Error Type: %s\n", pfx,
+ cper_ccix_cache_err_type_str(cache_err->cache_error_type));
+
+ if (cache_err->validation_bits & CCIX_CACHE_ERR_LEVEL_VALID)
+ printk("%s""Level: %d\n", pfx, cache_err->cache_level);
+
+ if (cache_err->validation_bits & CCIX_CACHE_ERR_SET_VALID)
+ printk("%s""Set: %d\n", pfx, cache_err->set);
+
+ if (cache_err->validation_bits & CCIX_CACHE_ERR_WAY_VALID)
+ printk("%s""Way: %d\n", pfx, cache_err->way);
+
+ if (cache_err->validation_bits & CCIX_CACHE_ERR_INSTANCE_ID_VALID)
+ printk("%s""Instance ID: %d\n", pfx, cache_err->instance);
+
+ if (cache_err->validation_bits & CCIX_CACHE_ERR_VENDOR_DATA_VALID) {
+ if (gdata->error_data_length < sizeof(*full_cache_err) + 4)
+ return -ENOSPC;
+
+ vendor_data_len = cache_err->vendor_data[0] & GENMASK(15, 0);
+ if (gdata->error_data_length <
+ sizeof(*full_cache_err) + vendor_data_len)
+ return -ENOSPC;
+
+ for (i = 0; i < vendor_data_len / 4 - 1; i++)
+ printk("%s""Vendor%d: 0x%08x\n", pfx, i,
+ cache_err->vendor_data[i + 1]);
+ }
+
+ return 0;
+}
+
int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
{
struct cper_sec_ccix_header *header = acpi_hest_get_payload(gdata);
@@ -351,9 +455,75 @@ int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
switch (per_type) {
case CCIX_MEMORY_ERROR:
return cper_ccix_mem_err_details(pfx, gdata);
+ case CCIX_CACHE_ERROR:
+ return cper_ccix_cache_err_details(pfx, gdata);
default:
/* Vendor defined so no formatting be done */
break;
}
return 0;
}
+
+void cper_ccix_cache_err_pack(const struct cper_sec_ccix_cache_error *cache_record,
+ struct cper_ccix_cache_err_compact *ccache_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data)
+{
+ ccache_err->validation_bits = cache_record->validation_bits;
+ ccache_err->set = cache_record->set;
+ ccache_err->way = cache_record->way;
+ ccache_err->cache_type = cache_record->cache_type;
+ ccache_err->op_type = cache_record->op_type;
+ ccache_err->cache_error_type = cache_record->cache_error_type;
+ ccache_err->cache_level = cache_record->cache_level;
+ ccache_err->instance = cache_record->instance;
+ memcpy(vendor_data, &cache_record->vendor_data[1], vendor_data_len);
+}
+
+static int cper_ccix_err_cache_location(struct cper_ccix_cache_err_compact *ccache_err,
+ char *msg)
+{
+ u32 len = CPER_REC_LEN - 1;
+ u32 n = 0;
+
+ if (!msg)
+ return 0;
+
+ if (ccache_err->validation_bits & CCIX_CACHE_ERR_CACHE_ERR_TYPE_VALID)
+ n += snprintf(msg + n, len, "Error: %s ",
+ cper_ccix_cache_err_type_str(ccache_err->cache_error_type));
+
+ if (ccache_err->validation_bits & CCIX_CACHE_ERR_TYPE_VALID)
+ n += snprintf(msg + n, len, "Type: %s ",
+ cper_ccix_cache_type_str(ccache_err->cache_type));
+
+ if (ccache_err->validation_bits & CCIX_CACHE_ERR_OP_VALID)
+ n += snprintf(msg + n, len, "Op: %s ",
+ cper_ccix_cache_err_op_str(ccache_err->op_type));
+
+ if (ccache_err->validation_bits & CCIX_CACHE_ERR_LEVEL_VALID)
+ n += snprintf(msg + n, len, "Level: %d ",
+ ccache_err->cache_level);
+ if (ccache_err->validation_bits & CCIX_CACHE_ERR_SET_VALID)
+ n += snprintf(msg + n, len, "Set: %d ", ccache_err->set);
+ if (ccache_err->validation_bits & CCIX_CACHE_ERR_WAY_VALID)
+ n += snprintf(msg + n, len, "Way: %d ", ccache_err->way);
+ if (ccache_err->validation_bits & CCIX_CACHE_ERR_INSTANCE_ID_VALID)
+ n += snprintf(msg + n, len, "Instance: %d ",
+ ccache_err->instance);
+
+ return n;
+}
+
+const char *cper_ccix_cache_err_unpack(struct trace_seq *p,
+ struct cper_ccix_cache_err_compact *ccache_err)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (cper_ccix_err_cache_location(ccache_err, rcd_decode_str))
+ trace_seq_printf(p, "%s", rcd_decode_str);
+
+ trace_seq_putc(p, '\0');
+
+ return ret;
+}
diff --git a/include/linux/cper.h b/include/linux/cper.h
index df7a34c3ba4f..eef254b8b8b7 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -627,6 +627,54 @@ struct cper_ccix_mem_err_compact {
__u8 fru;
};
+struct cper_sec_ccix_cache_error {
+ __u32 validation_bits;
+#define CCIX_CACHE_ERR_TYPE_VALID BIT(0)
+#define CCIX_CACHE_ERR_OP_VALID BIT(1)
+#define CCIX_CACHE_ERR_CACHE_ERR_TYPE_VALID BIT(2)
+#define CCIX_CACHE_ERR_LEVEL_VALID BIT(3)
+#define CCIX_CACHE_ERR_SET_VALID BIT(4)
+#define CCIX_CACHE_ERR_WAY_VALID BIT(5)
+#define CCIX_CACHE_ERR_INSTANCE_ID_VALID BIT(6)
+#define CCIX_CACHE_ERR_VENDOR_DATA_VALID BIT(7)
+ __u16 length; /* Includes vendor specific log info */
+ __u8 cache_type;
+ __u8 op_type;
+ __u8 cache_error_type;
+ __u8 cache_level;
+ __u32 set;
+ __u32 way;
+ __u8 instance;
+ __u8 reserved;
+ __u32 vendor_data[];
+};
+
+struct cper_ccix_cache_error {
+ struct cper_sec_ccix_header header;
+ __u32 ccix_header[CCIX_PER_LOG_HEADER_DWS];
+ struct cper_sec_ccix_cache_error cache_record;
+};
+
+static inline u16 ccix_cache_err_ven_len_get(struct cper_ccix_cache_error *cache_err)
+{
+ if (cache_err->cache_record.validation_bits &
+ CCIX_CACHE_ERR_VENDOR_DATA_VALID)
+ return cache_err->cache_record.vendor_data[0] & 0xFFFF;
+ else
+ return 0;
+}
+
+struct cper_ccix_cache_err_compact {
+ __u32 validation_bits;
+ __u32 set;
+ __u32 way;
+ __u8 cache_type;
+ __u8 op_type;
+ __u8 cache_error_type;
+ __u8 cache_level;
+ __u8 instance;
+};
+
/* Reset to default packing */
#pragma pack()
@@ -649,6 +697,15 @@ const char *cper_ccix_mem_err_unpack(struct trace_seq *p,
struct cper_ccix_mem_err_compact *cmem_err);
const char *cper_ccix_mem_err_type_str(unsigned int error_type);
const char *cper_ccix_comp_type_str(u8 comp_type);
+
+void cper_ccix_cache_err_pack(const struct cper_sec_ccix_cache_error *cache_record,
+ struct cper_ccix_cache_err_compact *ccache_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data);
+const char *cper_ccix_cache_err_unpack(struct trace_seq *p,
+ struct cper_ccix_cache_err_compact *ccache_err);
+const char *cper_ccix_cache_err_type_str(__u8 error_type);
+
struct acpi_hest_generic_data;
int cper_print_ccix_per(const char *pfx,
struct acpi_hest_generic_data *gdata);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 128728eaeef4..55f2c1900c54 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -415,6 +415,72 @@ TRACE_EVENT(ccix_memory_error_event,
)
);
+TRACE_EVENT(ccix_cache_error_event,
+ TP_PROTO(struct cper_ccix_cache_error *err,
+ u32 err_seq,
+ u8 sev,
+ u16 ven_len),
+
+ TP_ARGS(err, err_seq, sev, ven_len),
+
+ TP_STRUCT__entry(
+ __field(u32, err_seq)
+ __field(u8, sev)
+ __field(u8, sevdetail)
+ __field(u8, source)
+ __field(u8, component)
+ __field(u64, pa)
+ __field(u8, pa_mask_lsb)
+ __field_struct(struct cper_ccix_cache_err_compact, data)
+ __field(u16, vendor_data_length)
+ __dynamic_array(u8, vendor_data, ven_len)
+ ),
+
+ TP_fast_assign(
+ __entry->err_seq = err_seq;
+
+ __entry->sev = sev;
+ __entry->sevdetail = FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
+ CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
+ CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
+ CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
+ err->ccix_header[1]);
+ if (err->header.validation_bits & 0x1)
+ __entry->source = err->header.source_id;
+ else
+ __entry->source = ~0;
+ __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
+ err->ccix_header[1]);
+ if (err->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
+ __entry->pa = (u64)err->ccix_header[2] << 32 |
+ (err->ccix_header[3] & 0xfffffffc);
+ __entry->pa_mask_lsb = err->ccix_header[4] & 0xff;
+ } else {
+ __entry->pa = ~0ull;
+ __entry->pa_mask_lsb = ~0;
+ }
+
+ __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
+ cper_ccix_cache_err_pack(&err->cache_record, &__entry->data,
+ __entry->vendor_data_length,
+ __get_dynamic_array(vendor_data));
+ ),
+
+ TP_printk("{%d} %s CCIX PER Cache Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
+ __entry->err_seq,
+ cper_severity_str(__entry->sev),
+ cper_ccix_comp_type_str(__entry->component),
+ __entry->sevdetail & BIT(0) ? 1 : 0,
+ __entry->sevdetail & BIT(1) ? 1 : 0,
+ __entry->sevdetail & BIT(2) ? 1 : 0,
+ __entry->sevdetail & BIT(3) ? 1 : 0,
+ __entry->pa,
+ __entry->pa_mask_lsb,
+ cper_ccix_cache_err_unpack(p, &__entry->data),
+ __print_hex(__get_dynamic_array(vendor_data),
+ __entry->vendor_data_length)
+ )
+);
/*
* memory-failure recovery action result event
*
--
2.20.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 2/6] efi / ras: CCIX Cache error reporting
2019-11-12 16:52 ` [PATCH v3 2/6] efi / ras: CCIX Cache " Jonathan Cameron
@ 2019-11-13 9:10 ` kbuild test robot
2019-11-13 9:32 ` kbuild test robot
1 sibling, 0 replies; 12+ messages in thread
From: kbuild test robot @ 2019-11-13 9:10 UTC (permalink / raw)
To: Jonathan Cameron
Cc: kbuild-all, linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse, rjw, tony.luck, linuxarm,
ard.biesheuvel, nariman.poushin, Thanu Rangarajan,
Jonathan Cameron
[-- Attachment #1: Type: text/plain, Size: 8253 bytes --]
Hi Jonathan,
I love your patch! Yet something to improve:
[auto build test ERROR on efi/next]
[cannot apply to v5.4-rc7 next-20191112]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Jonathan-Cameron/efi-ras-CCIX-Memory-error-reporting/20191113-133322
base: https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
config: x86_64-randconfig-a004-201945 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
ld: drivers/ras/ras.o: in function `perf_trace_ccix_memory_error_event':
include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_mem_err_pack'
ld: drivers/ras/ras.o: in function `trace_event_raw_event_ccix_memory_error_event':
include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_mem_err_pack'
ld: drivers/ras/ras.o: in function `trace_raw_output_ccix_memory_error_event':
include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_mem_err_unpack'
ld: include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_comp_type_str'
ld: include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_severity_str'
ld: drivers/ras/ras.o: in function `perf_trace_ccix_cache_error_event':
>> include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_ccix_cache_err_pack'
ld: drivers/ras/ras.o: in function `trace_raw_output_ccix_cache_error_event':
>> include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_ccix_cache_err_unpack'
ld: include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_ccix_comp_type_str'
ld: include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_severity_str'
ld: drivers/ras/ras.o: in function `trace_event_raw_event_ccix_cache_error_event':
>> include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_ccix_cache_err_pack'
vim +418 include/trace/../../include/ras/ras_event.h
341
342 /*
343 * CCIX PER log memory error trace event
344 *
345 * These events are generated when hardware detects a corrected or
346 * uncorrected event.
347 *
348 * Some elements of the record are not included
349 * - PER version (tracepoint should remain compatible across versions)
350 * - Multiple Error
351 */
> 352 TRACE_EVENT(ccix_memory_error_event,
353 TP_PROTO(struct cper_ccix_mem_error *mem,
354 u32 err_seq,
355 u8 sev,
356 u16 ven_len),
357
358 TP_ARGS(mem, err_seq, sev, ven_len),
359
360 TP_STRUCT__entry(
361 __field(u32, err_seq)
362 __field(u8, sev)
363 __field(u8, sevdetail)
364 __field(u8, source)
365 __field(u8, component)
366 __field(u64, pa)
367 __field(u8, pa_mask_lsb)
368 __field_struct(struct cper_ccix_mem_err_compact, data)
369 __field(u16, vendor_data_length)
370 __dynamic_array(u8, vendor_data, ven_len)
371 ),
372
373 TP_fast_assign(
374 __entry->err_seq = err_seq;
375 __entry->sev = sev;
376 __entry->sevdetail =
377 FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
378 CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
379 CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
380 CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
381 mem->ccix_header[1]);
382 if (mem->header.validation_bits & 0x1)
383 __entry->source = mem->header.source_id;
384 else
385 __entry->source = ~0;
386 __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
387 mem->ccix_header[1]);
388 if (mem->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
389 __entry->pa = (u64)mem->ccix_header[2] << 32 |
390 (mem->ccix_header[3] & 0xfffffffc);
391 __entry->pa_mask_lsb = mem->ccix_header[4] & 0xff;
392 } else {
393 __entry->pa = ~0ull;
394 __entry->pa_mask_lsb = ~0;
395 }
396 __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
397 cper_ccix_mem_err_pack(&mem->mem_record, &__entry->data,
398 __entry->vendor_data_length,
399 __get_dynamic_array(vendor_data));
400 ),
401
402 TP_printk("{%d} %s CCIX PER Memory Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
403 __entry->err_seq,
404 cper_severity_str(__entry->sev),
405 cper_ccix_comp_type_str(__entry->component),
406 __entry->sevdetail & BIT(0) ? 1 : 0,
407 __entry->sevdetail & BIT(1) ? 1 : 0,
408 __entry->sevdetail & BIT(2) ? 1 : 0,
409 __entry->sevdetail & BIT(3) ? 1 : 0,
410 __entry->pa,
411 __entry->pa_mask_lsb,
412 cper_ccix_mem_err_unpack(p, &__entry->data),
413 __print_hex(__get_dynamic_array(vendor_data),
414 __entry->vendor_data_length)
415 )
416 );
417
> 418 TRACE_EVENT(ccix_cache_error_event,
419 TP_PROTO(struct cper_ccix_cache_error *err,
420 u32 err_seq,
421 u8 sev,
422 u16 ven_len),
423
424 TP_ARGS(err, err_seq, sev, ven_len),
425
426 TP_STRUCT__entry(
427 __field(u32, err_seq)
428 __field(u8, sev)
429 __field(u8, sevdetail)
430 __field(u8, source)
431 __field(u8, component)
432 __field(u64, pa)
433 __field(u8, pa_mask_lsb)
434 __field_struct(struct cper_ccix_cache_err_compact, data)
435 __field(u16, vendor_data_length)
436 __dynamic_array(u8, vendor_data, ven_len)
437 ),
438
439 TP_fast_assign(
440 __entry->err_seq = err_seq;
441
442 __entry->sev = sev;
443 __entry->sevdetail = FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
444 CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
445 CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
446 CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
447 err->ccix_header[1]);
448 if (err->header.validation_bits & 0x1)
449 __entry->source = err->header.source_id;
450 else
451 __entry->source = ~0;
452 __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
453 err->ccix_header[1]);
454 if (err->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
455 __entry->pa = (u64)err->ccix_header[2] << 32 |
456 (err->ccix_header[3] & 0xfffffffc);
457 __entry->pa_mask_lsb = err->ccix_header[4] & 0xff;
458 } else {
459 __entry->pa = ~0ull;
460 __entry->pa_mask_lsb = ~0;
461 }
462
463 __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
464 cper_ccix_cache_err_pack(&err->cache_record, &__entry->data,
465 __entry->vendor_data_length,
466 __get_dynamic_array(vendor_data));
467 ),
468
469 TP_printk("{%d} %s CCIX PER Cache Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
470 __entry->err_seq,
471 cper_severity_str(__entry->sev),
472 cper_ccix_comp_type_str(__entry->component),
473 __entry->sevdetail & BIT(0) ? 1 : 0,
474 __entry->sevdetail & BIT(1) ? 1 : 0,
475 __entry->sevdetail & BIT(2) ? 1 : 0,
476 __entry->sevdetail & BIT(3) ? 1 : 0,
477 __entry->pa,
478 __entry->pa_mask_lsb,
479 cper_ccix_cache_err_unpack(p, &__entry->data),
480 __print_hex(__get_dynamic_array(vendor_data),
481 __entry->vendor_data_length)
482 )
483 );
484 /*
485 * memory-failure recovery action result event
486 *
487 * unsigned long pfn - Page Frame Number of the corrupted page
488 * int type - Page types of the corrupted page
489 * int result - Result of recovery action
490 */
491
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 32398 bytes --]
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 2/6] efi / ras: CCIX Cache error reporting
2019-11-12 16:52 ` [PATCH v3 2/6] efi / ras: CCIX Cache " Jonathan Cameron
2019-11-13 9:10 ` kbuild test robot
@ 2019-11-13 9:32 ` kbuild test robot
1 sibling, 0 replies; 12+ messages in thread
From: kbuild test robot @ 2019-11-13 9:32 UTC (permalink / raw)
To: Jonathan Cameron
Cc: kbuild-all, linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse, rjw, tony.luck, linuxarm,
ard.biesheuvel, nariman.poushin, Thanu Rangarajan,
Jonathan Cameron
[-- Attachment #1: Type: text/plain, Size: 2277 bytes --]
Hi Jonathan,
I love your patch! Yet something to improve:
[auto build test ERROR on efi/next]
[also build test ERROR on v5.4-rc7 next-20191112]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Jonathan-Cameron/efi-ras-CCIX-Memory-error-reporting/20191113-133322
base: https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
config: i386-defconfig (attached as .config)
compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
ld: drivers/ras/ras.o: in function `perf_trace_ccix_memory_error_event':
ras.c:(.text+0x794): undefined reference to `cper_ccix_mem_err_pack'
ld: drivers/ras/ras.o: in function `trace_raw_output_ccix_memory_error_event':
ras.c:(.text+0x83d): undefined reference to `cper_ccix_mem_err_unpack'
ld: ras.c:(.text+0x863): undefined reference to `cper_ccix_comp_type_str'
ld: ras.c:(.text+0x86f): undefined reference to `cper_severity_str'
ld: drivers/ras/ras.o: in function `perf_trace_ccix_cache_error_event':
>> ras.c:(.text+0x9f4): undefined reference to `cper_ccix_cache_err_pack'
ld: drivers/ras/ras.o: in function `trace_raw_output_ccix_cache_error_event':
>> ras.c:(.text+0xa9d): undefined reference to `cper_ccix_cache_err_unpack'
ld: ras.c:(.text+0xac3): undefined reference to `cper_ccix_comp_type_str'
ld: ras.c:(.text+0xacf): undefined reference to `cper_severity_str'
ld: drivers/ras/ras.o: in function `trace_event_raw_event_ccix_memory_error_event':
ras.c:(.text+0x1349): undefined reference to `cper_ccix_mem_err_pack'
ld: drivers/ras/ras.o: in function `trace_event_raw_event_ccix_cache_error_event':
ras.c:(.text+0x14b9): undefined reference to `cper_ccix_cache_err_pack'
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28148 bytes --]
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 3/6] efi / ras: CCIX Address Translation Cache error reporting
2019-11-12 16:52 [PATCH v3 0/6] CCIX Protocol error reporting Jonathan Cameron
2019-11-12 16:52 ` [PATCH v3 1/6] efi / ras: CCIX Memory " Jonathan Cameron
2019-11-12 16:52 ` [PATCH v3 2/6] efi / ras: CCIX Cache " Jonathan Cameron
@ 2019-11-12 16:52 ` Jonathan Cameron
2019-11-13 9:55 ` kbuild test robot
2019-11-12 16:52 ` [PATCH v3 4/6] efi / ras: CCIX Port " Jonathan Cameron
` (2 subsequent siblings)
5 siblings, 1 reply; 12+ messages in thread
From: Jonathan Cameron @ 2019-11-12 16:52 UTC (permalink / raw)
To: linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse
Cc: rjw, tony.luck, linuxarm, ard.biesheuvel, nariman.poushin,
Thanu Rangarajan, Jonathan Cameron
CCIX devices tend to make heavy use of ATCs. The CCIX base
specification defines a protocol error message (PER) that
describes errors reported by such caches. The UEFI 2.8
specification includes a CCIX CPER record for firmware first
handling to report these errors to the operating system.
This patch is very similar to the support previously added
for CCIX Memory Errors and provides both logging and RAS
tracepoint for this error class.
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
drivers/acpi/apei/ghes.c | 4 ++
drivers/firmware/efi/cper-ccix.c | 84 ++++++++++++++++++++++++++++++++
include/linux/cper.h | 39 +++++++++++++++
include/ras/ras_event.h | 67 +++++++++++++++++++++++++
4 files changed, 194 insertions(+)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index c99a4216b67d..ba73d3a5d564 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -521,6 +521,10 @@ static void ghes_handle_ccix_per(struct acpi_hest_generic_data *gdata, int sev)
trace_ccix_cache_error_event(payload, err_seq, sev,
ccix_cache_err_ven_len_get(payload));
break;
+ case CCIX_ATC_ERROR:
+ trace_ccix_atc_error_event(payload, err_seq, sev,
+ ccix_atc_err_ven_len_get(payload));
+ break;
default:
/* Unknown error type */
pr_info("CCIX error of unknown or vendor defined type\n");
diff --git a/drivers/firmware/efi/cper-ccix.c b/drivers/firmware/efi/cper-ccix.c
index 14d3f5b8ceab..a33683d057e9 100644
--- a/drivers/firmware/efi/cper-ccix.c
+++ b/drivers/firmware/efi/cper-ccix.c
@@ -394,6 +394,43 @@ static int cper_ccix_cache_err_details(const char *pfx,
return 0;
}
+static int cper_ccix_atc_err_details(const char *pfx,
+ struct acpi_hest_generic_data *gdata)
+{
+ struct cper_ccix_atc_error *full_atc_err;
+ struct cper_sec_ccix_atc_error *atc_err;
+ u16 vendor_data_len;
+ int i;
+
+ if (gdata->error_data_length < sizeof(*full_atc_err))
+ return -ENOSPC;
+
+ full_atc_err = acpi_hest_get_payload(gdata);
+
+ atc_err = &full_atc_err->atc_record;
+
+ if (atc_err->validation_bits & CCIX_ATC_ERR_OP_VALID)
+ printk("%s""Operation: %s\n", pfx,
+ cper_ccix_cache_err_op_str(atc_err->op_type));
+
+ if (atc_err->validation_bits & CCIX_ATC_ERR_INSTANCE_ID_VALID)
+ printk("%s""Instance ID: %d\n", pfx, atc_err->instance);
+
+ if (atc_err->validation_bits & CCIX_ATC_ERR_VENDOR_DATA_VALID) {
+ if (gdata->error_data_length < sizeof(*full_atc_err) + 4)
+ return -ENOSPC;
+ vendor_data_len = atc_err->vendor_data[0] & GENMASK(15, 0);
+ if (gdata->error_data_length < sizeof(*full_atc_err) + vendor_data_len)
+ return -ENOSPC;
+
+ for (i = 0; i < vendor_data_len / 4 - 1; i++)
+ printk("%s""Vendor%d: 0x%08x\n", pfx, i,
+ atc_err->vendor_data[i + 1]);
+ }
+
+ return 0;
+}
+
int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
{
struct cper_sec_ccix_header *header = acpi_hest_get_payload(gdata);
@@ -457,6 +494,8 @@ int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
return cper_ccix_mem_err_details(pfx, gdata);
case CCIX_CACHE_ERROR:
return cper_ccix_cache_err_details(pfx, gdata);
+ case CCIX_ATC_ERROR:
+ return cper_ccix_atc_err_details(pfx, gdata);
default:
/* Vendor defined so no formatting be done */
break;
@@ -527,3 +566,48 @@ const char *cper_ccix_cache_err_unpack(struct trace_seq *p,
return ret;
}
+
+void cper_ccix_atc_err_pack(const struct cper_sec_ccix_atc_error *atc_record,
+ struct cper_ccix_atc_err_compact *catc_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data)
+{
+ catc_err->validation_bits = atc_record->validation_bits;
+ catc_err->op_type = atc_record->op_type;
+ catc_err->instance = atc_record->instance;
+ memcpy(vendor_data, &atc_record->vendor_data[1], vendor_data_len);
+}
+
+static int cper_ccix_err_atc_location(struct cper_ccix_atc_err_compact *catc_err,
+ char *msg)
+{
+ u32 len = CPER_REC_LEN - 1;
+ u32 n = 0;
+
+ if (!msg)
+ return 0;
+
+ if (catc_err->validation_bits & CCIX_ATC_ERR_OP_VALID)
+ n += snprintf(msg + n, len, "Op: %s ",
+ cper_ccix_cache_err_op_str(catc_err->op_type));
+
+ if (catc_err->validation_bits & CCIX_ATC_ERR_INSTANCE_ID_VALID)
+ n += snprintf(msg + n, len, "Instance: %d ",
+ catc_err->instance);
+
+ return n;
+}
+
+const char *cper_ccix_atc_err_unpack(struct trace_seq *p,
+ struct cper_ccix_atc_err_compact *catc_err)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (cper_ccix_err_atc_location(catc_err, rcd_decode_str))
+ trace_seq_printf(p, "%s", rcd_decode_str);
+
+ trace_seq_putc(p, '\0');
+
+ return ret;
+}
+
diff --git a/include/linux/cper.h b/include/linux/cper.h
index eef254b8b8b7..6bb603e9a97a 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -675,6 +675,38 @@ struct cper_ccix_cache_err_compact {
__u8 instance;
};
+struct cper_sec_ccix_atc_error {
+ __u32 validation_bits;
+#define CCIX_ATC_ERR_OP_VALID BIT(0)
+#define CCIX_ATC_ERR_INSTANCE_ID_VALID BIT(1)
+#define CCIX_ATC_ERR_VENDOR_DATA_VALID BIT(2)
+ __u16 length; /* Includes vendor specific log info */
+ __u8 op_type;
+ __u8 instance;
+ __u32 reserved;
+ __u32 vendor_data[];
+};
+
+struct cper_ccix_atc_error {
+ struct cper_sec_ccix_header header;
+ __u32 ccix_header[CCIX_PER_LOG_HEADER_DWS];
+ struct cper_sec_ccix_atc_error atc_record;
+};
+
+static inline u16 ccix_atc_err_ven_len_get(struct cper_ccix_atc_error *atc_err)
+{
+ if (atc_err->atc_record.validation_bits & CCIX_ATC_ERR_VENDOR_DATA_VALID)
+ return atc_err->atc_record.vendor_data[0] & 0xFFFF;
+ else
+ return 0;
+}
+
+struct cper_ccix_atc_err_compact {
+ __u32 validation_bits;
+ __u8 op_type;
+ __u8 instance;
+};
+
/* Reset to default packing */
#pragma pack()
@@ -706,6 +738,13 @@ const char *cper_ccix_cache_err_unpack(struct trace_seq *p,
struct cper_ccix_cache_err_compact *ccache_err);
const char *cper_ccix_cache_err_type_str(__u8 error_type);
+void cper_ccix_atc_err_pack(const struct cper_sec_ccix_atc_error *atc_record,
+ struct cper_ccix_atc_err_compact *catc_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data);
+const char *cper_ccix_atc_err_unpack(struct trace_seq *p,
+ struct cper_ccix_atc_err_compact *catc_err);
+
struct acpi_hest_generic_data;
int cper_print_ccix_per(const char *pfx,
struct acpi_hest_generic_data *gdata);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 55f2c1900c54..bab49e297551 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -481,6 +481,73 @@ TRACE_EVENT(ccix_cache_error_event,
__entry->vendor_data_length)
)
);
+
+TRACE_EVENT(ccix_atc_error_event,
+ TP_PROTO(struct cper_ccix_atc_error *err,
+ u32 err_seq,
+ u8 sev,
+ u16 ven_len),
+
+ TP_ARGS(err, err_seq, sev, ven_len),
+
+ TP_STRUCT__entry(
+ __field(u32, err_seq)
+ __field(u8, sev)
+ __field(u8, sevdetail)
+ __field(u8, source)
+ __field(u8, component)
+ __field(u64, pa)
+ __field(u8, pa_mask_lsb)
+ __field_struct(struct cper_ccix_atc_err_compact, data)
+ __field(u16, vendor_data_length)
+ __dynamic_array(u8, vendor_data, ven_len)
+ ),
+
+ TP_fast_assign(
+ __entry->err_seq = err_seq;
+
+ __entry->sev = sev;
+ __entry->sevdetail = FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
+ CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
+ CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
+ CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
+ err->ccix_header[1]);
+ if (err->header.validation_bits & 0x1)
+ __entry->source = err->header.source_id;
+ else
+ __entry->source = ~0;
+ __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
+ err->ccix_header[1]);
+ if (err->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
+ __entry->pa = (u64)err->ccix_header[2] << 32 |
+ (err->ccix_header[3] & 0xfffffffc);
+ __entry->pa_mask_lsb = err->ccix_header[4] & 0xff;
+ } else {
+ __entry->pa = ~0ull;
+ __entry->pa_mask_lsb = ~0;
+ }
+
+ __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
+ cper_ccix_atc_err_pack(&err->atc_record, &__entry->data,
+ __entry->vendor_data_length,
+ __get_dynamic_array(vendor_data));
+ ),
+
+ TP_printk("{%d} %s CCIX PER ATC Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
+ __entry->err_seq,
+ cper_severity_str(__entry->sev),
+ cper_ccix_comp_type_str(__entry->component),
+ __entry->sevdetail & BIT(0) ? 1 : 0,
+ __entry->sevdetail & BIT(1) ? 1 : 0,
+ __entry->sevdetail & BIT(2) ? 1 : 0,
+ __entry->sevdetail & BIT(3) ? 1 : 0,
+ __entry->pa,
+ __entry->pa_mask_lsb,
+ cper_ccix_atc_err_unpack(p, &__entry->data),
+ __print_hex(__get_dynamic_array(vendor_data), __entry->vendor_data_length)
+ )
+);
+
/*
* memory-failure recovery action result event
*
--
2.20.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 3/6] efi / ras: CCIX Address Translation Cache error reporting
2019-11-12 16:52 ` [PATCH v3 3/6] efi / ras: CCIX Address Translation " Jonathan Cameron
@ 2019-11-13 9:55 ` kbuild test robot
0 siblings, 0 replies; 12+ messages in thread
From: kbuild test robot @ 2019-11-13 9:55 UTC (permalink / raw)
To: Jonathan Cameron
Cc: kbuild-all, linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse, rjw, tony.luck, linuxarm,
ard.biesheuvel, nariman.poushin, Thanu Rangarajan,
Jonathan Cameron
[-- Attachment #1: Type: text/plain, Size: 8348 bytes --]
Hi Jonathan,
I love your patch! Yet something to improve:
[auto build test ERROR on efi/next]
[cannot apply to v5.4-rc7 next-20191112]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Jonathan-Cameron/efi-ras-CCIX-Memory-error-reporting/20191113-133322
base: https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
config: x86_64-randconfig-a004-201945 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
ld: drivers/ras/ras.o: in function `perf_trace_ccix_memory_error_event':
include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_mem_err_pack'
ld: drivers/ras/ras.o: in function `trace_raw_output_ccix_memory_error_event':
include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_mem_err_unpack'
ld: include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_comp_type_str'
ld: include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_severity_str'
ld: drivers/ras/ras.o: in function `perf_trace_ccix_cache_error_event':
include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_ccix_cache_err_pack'
ld: drivers/ras/ras.o: in function `trace_event_raw_event_ccix_cache_error_event':
include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_ccix_cache_err_pack'
ld: drivers/ras/ras.o: in function `trace_raw_output_ccix_cache_error_event':
include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_ccix_cache_err_unpack'
ld: include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_ccix_comp_type_str'
ld: include/trace/../../include/ras/ras_event.h:418: undefined reference to `cper_severity_str'
ld: drivers/ras/ras.o: in function `perf_trace_ccix_atc_error_event':
>> include/trace/../../include/ras/ras_event.h:485: undefined reference to `cper_ccix_atc_err_pack'
ld: drivers/ras/ras.o: in function `trace_raw_output_ccix_atc_error_event':
>> include/trace/../../include/ras/ras_event.h:485: undefined reference to `cper_ccix_atc_err_unpack'
ld: include/trace/../../include/ras/ras_event.h:485: undefined reference to `cper_ccix_comp_type_str'
ld: include/trace/../../include/ras/ras_event.h:485: undefined reference to `cper_severity_str'
ld: drivers/ras/ras.o: in function `trace_event_raw_event_ccix_atc_error_event':
>> include/trace/../../include/ras/ras_event.h:485: undefined reference to `cper_ccix_atc_err_pack'
ld: drivers/ras/ras.o: in function `trace_event_raw_event_ccix_memory_error_event':
include/trace/../../include/ras/ras_event.h:352: undefined reference to `cper_ccix_mem_err_pack'
vim +485 include/trace/../../include/ras/ras_event.h
417
> 418 TRACE_EVENT(ccix_cache_error_event,
419 TP_PROTO(struct cper_ccix_cache_error *err,
420 u32 err_seq,
421 u8 sev,
422 u16 ven_len),
423
424 TP_ARGS(err, err_seq, sev, ven_len),
425
426 TP_STRUCT__entry(
427 __field(u32, err_seq)
428 __field(u8, sev)
429 __field(u8, sevdetail)
430 __field(u8, source)
431 __field(u8, component)
432 __field(u64, pa)
433 __field(u8, pa_mask_lsb)
434 __field_struct(struct cper_ccix_cache_err_compact, data)
435 __field(u16, vendor_data_length)
436 __dynamic_array(u8, vendor_data, ven_len)
437 ),
438
439 TP_fast_assign(
440 __entry->err_seq = err_seq;
441
442 __entry->sev = sev;
443 __entry->sevdetail = FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
444 CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
445 CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
446 CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
447 err->ccix_header[1]);
448 if (err->header.validation_bits & 0x1)
449 __entry->source = err->header.source_id;
450 else
451 __entry->source = ~0;
452 __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
453 err->ccix_header[1]);
454 if (err->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
455 __entry->pa = (u64)err->ccix_header[2] << 32 |
456 (err->ccix_header[3] & 0xfffffffc);
457 __entry->pa_mask_lsb = err->ccix_header[4] & 0xff;
458 } else {
459 __entry->pa = ~0ull;
460 __entry->pa_mask_lsb = ~0;
461 }
462
463 __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
464 cper_ccix_cache_err_pack(&err->cache_record, &__entry->data,
465 __entry->vendor_data_length,
466 __get_dynamic_array(vendor_data));
467 ),
468
469 TP_printk("{%d} %s CCIX PER Cache Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
470 __entry->err_seq,
471 cper_severity_str(__entry->sev),
472 cper_ccix_comp_type_str(__entry->component),
473 __entry->sevdetail & BIT(0) ? 1 : 0,
474 __entry->sevdetail & BIT(1) ? 1 : 0,
475 __entry->sevdetail & BIT(2) ? 1 : 0,
476 __entry->sevdetail & BIT(3) ? 1 : 0,
477 __entry->pa,
478 __entry->pa_mask_lsb,
479 cper_ccix_cache_err_unpack(p, &__entry->data),
480 __print_hex(__get_dynamic_array(vendor_data),
481 __entry->vendor_data_length)
482 )
483 );
484
> 485 TRACE_EVENT(ccix_atc_error_event,
486 TP_PROTO(struct cper_ccix_atc_error *err,
487 u32 err_seq,
488 u8 sev,
489 u16 ven_len),
490
491 TP_ARGS(err, err_seq, sev, ven_len),
492
493 TP_STRUCT__entry(
494 __field(u32, err_seq)
495 __field(u8, sev)
496 __field(u8, sevdetail)
497 __field(u8, source)
498 __field(u8, component)
499 __field(u64, pa)
500 __field(u8, pa_mask_lsb)
501 __field_struct(struct cper_ccix_atc_err_compact, data)
502 __field(u16, vendor_data_length)
503 __dynamic_array(u8, vendor_data, ven_len)
504 ),
505
506 TP_fast_assign(
507 __entry->err_seq = err_seq;
508
509 __entry->sev = sev;
510 __entry->sevdetail = FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
511 CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
512 CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
513 CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
514 err->ccix_header[1]);
515 if (err->header.validation_bits & 0x1)
516 __entry->source = err->header.source_id;
517 else
518 __entry->source = ~0;
519 __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
520 err->ccix_header[1]);
521 if (err->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
522 __entry->pa = (u64)err->ccix_header[2] << 32 |
523 (err->ccix_header[3] & 0xfffffffc);
524 __entry->pa_mask_lsb = err->ccix_header[4] & 0xff;
525 } else {
526 __entry->pa = ~0ull;
527 __entry->pa_mask_lsb = ~0;
528 }
529
530 __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
531 cper_ccix_atc_err_pack(&err->atc_record, &__entry->data,
532 __entry->vendor_data_length,
533 __get_dynamic_array(vendor_data));
534 ),
535
536 TP_printk("{%d} %s CCIX PER ATC Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
537 __entry->err_seq,
538 cper_severity_str(__entry->sev),
539 cper_ccix_comp_type_str(__entry->component),
540 __entry->sevdetail & BIT(0) ? 1 : 0,
541 __entry->sevdetail & BIT(1) ? 1 : 0,
542 __entry->sevdetail & BIT(2) ? 1 : 0,
543 __entry->sevdetail & BIT(3) ? 1 : 0,
544 __entry->pa,
545 __entry->pa_mask_lsb,
546 cper_ccix_atc_err_unpack(p, &__entry->data),
547 __print_hex(__get_dynamic_array(vendor_data), __entry->vendor_data_length)
548 )
549 );
550
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 32398 bytes --]
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 4/6] efi / ras: CCIX Port error reporting
2019-11-12 16:52 [PATCH v3 0/6] CCIX Protocol error reporting Jonathan Cameron
` (2 preceding siblings ...)
2019-11-12 16:52 ` [PATCH v3 3/6] efi / ras: CCIX Address Translation " Jonathan Cameron
@ 2019-11-12 16:52 ` Jonathan Cameron
2019-11-12 16:52 ` [PATCH v3 5/6] efi / ras: CCIX Link " Jonathan Cameron
2019-11-12 16:52 ` [PATCH v3 6/6] efi / ras: CCIX Agent internal " Jonathan Cameron
5 siblings, 0 replies; 12+ messages in thread
From: Jonathan Cameron @ 2019-11-12 16:52 UTC (permalink / raw)
To: linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse
Cc: rjw, tony.luck, linuxarm, ard.biesheuvel, nariman.poushin,
Thanu Rangarajan, Jonathan Cameron
The CCIX 1.0 base specification defines a CCIX protocol layer port.
The specification provides a mechanism for detailed error logging
for these ports. The UEFI 2.8 specification includes a CCIX CPER
record for firmware first handling to report these errors to the
operating system.
This patch is very similar to the support previously added for
for CCIX Memory Errors and provides both logging and RAS tracepoint
for this error class.
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
drivers/acpi/apei/ghes.c | 4 +
drivers/firmware/efi/cper-ccix.c | 123 +++++++++++++++++++++++++++++++
include/linux/cper.h | 42 +++++++++++
include/ras/ras_event.h | 66 +++++++++++++++++
4 files changed, 235 insertions(+)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ba73d3a5d564..7455db97319c 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -525,6 +525,10 @@ static void ghes_handle_ccix_per(struct acpi_hest_generic_data *gdata, int sev)
trace_ccix_atc_error_event(payload, err_seq, sev,
ccix_atc_err_ven_len_get(payload));
break;
+ case CCIX_PORT_ERROR:
+ trace_ccix_port_error_event(payload, err_seq, sev,
+ ccix_port_err_ven_len_get(payload));
+ break;
default:
/* Unknown error type */
pr_info("CCIX error of unknown or vendor defined type\n");
diff --git a/drivers/firmware/efi/cper-ccix.c b/drivers/firmware/efi/cper-ccix.c
index a33683d057e9..b936661b3db3 100644
--- a/drivers/firmware/efi/cper-ccix.c
+++ b/drivers/firmware/efi/cper-ccix.c
@@ -431,6 +431,81 @@ static int cper_ccix_atc_err_details(const char *pfx,
return 0;
}
+static const char * const ccix_port_err_type_strs[] = {
+ "Generic Bus / Slave Error",
+ "Bus Parity / ECC Error",
+ "BDF Not Present",
+ "Invalid Address",
+ "Invalid AgentID",
+ "Bus Timeout",
+ "Hang",
+ "Egress Blocked",
+};
+
+static const char *cper_ccix_port_err_type_str(__u8 op)
+{
+ return op < ARRAY_SIZE(ccix_port_err_type_strs) ?
+ ccix_port_err_type_strs[op] : "Reserved";
+}
+
+static const char * const ccix_port_err_op_strs[] = {
+ "Command",
+ "Read",
+ "Write",
+};
+
+static const char *cper_ccix_port_err_op_str(__u8 op)
+{
+ return op < ARRAY_SIZE(ccix_port_err_op_strs) ?
+ ccix_port_err_op_strs[op] : "Reserved";
+}
+
+static int cper_ccix_port_err_details(const char *pfx,
+ struct acpi_hest_generic_data *gdata)
+{
+ struct cper_ccix_port_error *full_port_err;
+ struct cper_sec_ccix_port_error *port_err;
+ u16 vendor_data_len;
+ int i;
+
+ if (gdata->error_data_length < sizeof(*full_port_err))
+ return -ENOSPC;
+
+ full_port_err = acpi_hest_get_payload(gdata);
+
+ port_err = &full_port_err->port_record;
+
+ if (port_err->validation_bits & CCIX_PORT_ERR_TYPE_VALID)
+ printk("%s""Error Type: %s\n", pfx,
+ cper_ccix_port_err_type_str(port_err->err_type));
+
+ if (port_err->validation_bits & CCIX_PORT_ERR_OP_VALID)
+ printk("%s""Operation: %s\n", pfx,
+ cper_ccix_port_err_op_str(port_err->op_type));
+
+ /* CHECK THE AER EQUIVALENT */
+ if (port_err->validation_bits & CCIX_PORT_ERR_MESSAGE_VALID) {
+ for (i = 0; i < ARRAY_SIZE(port_err->message); i++)
+ printk("%s""Message%d: 0x%08x\n", pfx, i,
+ port_err->message[i]);
+ }
+
+ if (port_err->validation_bits & CCIX_PORT_ERR_VENDOR_DATA_VALID) {
+ if (gdata->error_data_length < sizeof(*full_port_err) + 4)
+ return -ENOSPC;
+
+ vendor_data_len = port_err->vendor_data[0] & GENMASK(15, 0);
+ if (gdata->error_data_length < sizeof(*full_port_err) + vendor_data_len)
+ return -ENOSPC;
+
+ for (i = 0; i < vendor_data_len / 4 - 1; i++)
+ printk("%s""Vendor%d: 0x%08x\n", pfx, i,
+ port_err->vendor_data[i + 1]);
+ }
+
+ return 0;
+}
+
int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
{
struct cper_sec_ccix_header *header = acpi_hest_get_payload(gdata);
@@ -496,6 +571,8 @@ int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
return cper_ccix_cache_err_details(pfx, gdata);
case CCIX_ATC_ERROR:
return cper_ccix_atc_err_details(pfx, gdata);
+ case CCIX_PORT_ERROR:
+ return cper_ccix_port_err_details(pfx, gdata);
default:
/* Vendor defined so no formatting be done */
break;
@@ -611,3 +688,49 @@ const char *cper_ccix_atc_err_unpack(struct trace_seq *p,
return ret;
}
+void cper_ccix_port_err_pack(const struct cper_sec_ccix_port_error *port_record,
+ struct cper_ccix_port_err_compact *cport_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data)
+{
+ cport_err->validation_bits = port_record->validation_bits;
+ cport_err->err_type = port_record->err_type;
+ cport_err->op_type = port_record->op_type;
+ memcpy(cport_err->message, port_record->message,
+ sizeof(cport_err->message));
+ memcpy(vendor_data, &port_record->vendor_data[1], vendor_data_len);
+}
+
+static int cper_ccix_err_port_location(struct cper_ccix_port_err_compact *cport_err,
+ char *msg)
+{
+ u32 len = CPER_REC_LEN - 1;
+ u32 n = 0;
+
+ if (!msg)
+ return 0;
+
+ if (cport_err->validation_bits & CCIX_PORT_ERR_TYPE_VALID)
+ n += snprintf(msg + n, len, "Error Type: %s ",
+ cper_ccix_port_err_type_str(cport_err->err_type));
+
+
+ if (cport_err->validation_bits & CCIX_PORT_ERR_OP_VALID)
+ n += snprintf(msg + n, len, "Op: %s ",
+ cper_ccix_port_err_op_str(cport_err->op_type));
+
+ return n;
+}
+
+const char *cper_ccix_port_err_unpack(struct trace_seq *p,
+ struct cper_ccix_port_err_compact *cport_err)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (cper_ccix_err_port_location(cport_err, rcd_decode_str))
+ trace_seq_printf(p, "%s", rcd_decode_str);
+
+ trace_seq_putc(p, '\0');
+
+ return ret;
+}
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 6bb603e9a97a..5e315afc210e 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -707,6 +707,41 @@ struct cper_ccix_atc_err_compact {
__u8 instance;
};
+
+struct cper_sec_ccix_port_error {
+ __u32 validation_bits;
+#define CCIX_PORT_ERR_OP_VALID BIT(0)
+#define CCIX_PORT_ERR_TYPE_VALID BIT(1)
+#define CCIX_PORT_ERR_MESSAGE_VALID BIT(2)
+#define CCIX_PORT_ERR_VENDOR_DATA_VALID BIT(3)
+ __u16 length; /* Includes vendor specific log info */
+ __u8 op_type;
+ __u8 err_type;
+ __u32 message[8];
+ __u32 vendor_data[];
+};
+
+struct cper_ccix_port_error {
+ struct cper_sec_ccix_header header;
+ __u32 ccix_header[CCIX_PER_LOG_HEADER_DWS];
+ struct cper_sec_ccix_port_error port_record;
+};
+
+static inline u16 ccix_port_err_ven_len_get(struct cper_ccix_port_error *port_err)
+{
+ if (port_err->port_record.validation_bits & CCIX_PORT_ERR_VENDOR_DATA_VALID)
+ return port_err->port_record.vendor_data[0] & 0xFFFF;
+ else
+ return 0;
+}
+
+struct cper_ccix_port_err_compact {
+ __u32 validation_bits;
+ __u32 message[8];
+ __u8 err_type;
+ __u8 op_type;
+};
+
/* Reset to default packing */
#pragma pack()
@@ -745,6 +780,13 @@ void cper_ccix_atc_err_pack(const struct cper_sec_ccix_atc_error *atc_record,
const char *cper_ccix_atc_err_unpack(struct trace_seq *p,
struct cper_ccix_atc_err_compact *catc_err);
+void cper_ccix_port_err_pack(const struct cper_sec_ccix_port_error *port_record,
+ struct cper_ccix_port_err_compact *cport_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data);
+const char *cper_ccix_port_err_unpack(struct trace_seq *p,
+ struct cper_ccix_port_err_compact *cport_err);
+
struct acpi_hest_generic_data;
int cper_print_ccix_per(const char *pfx,
struct acpi_hest_generic_data *gdata);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index bab49e297551..52ac3f2d4c11 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -548,6 +548,72 @@ TRACE_EVENT(ccix_atc_error_event,
)
);
+TRACE_EVENT(ccix_port_error_event,
+ TP_PROTO(struct cper_ccix_port_error *err,
+ u32 err_seq,
+ u8 sev,
+ u16 ven_len),
+
+ TP_ARGS(err, err_seq, sev, ven_len),
+ TP_STRUCT__entry(
+ __field(u32, err_seq)
+ __field(u8, sev)
+ __field(u8, sevdetail)
+ __field(u8, source)
+ __field(u8, component)
+ __field(u64, pa)
+ __field(u8, pa_mask_lsb)
+ __field_struct(struct cper_ccix_port_err_compact, data)
+ __field(u16, vendor_data_length)
+ __dynamic_array(u8, vendor_data, ven_len)
+ ),
+
+ TP_fast_assign(
+ __entry->err_seq = err_seq;
+
+ __entry->sev = sev;
+ __entry->sevdetail = FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
+ CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
+ CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
+ CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
+ err->ccix_header[1]);
+ if (err->header.validation_bits & 0x1)
+ __entry->source = err->header.source_id;
+ else
+ __entry->source = ~0;
+
+ __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
+ err->ccix_header[1]);
+ if (err->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
+ __entry->pa = (u64)err->ccix_header[2] << 32 |
+ (err->ccix_header[3] & 0xfffffffc);
+ __entry->pa_mask_lsb = err->ccix_header[4] & 0xff;
+ } else {
+ __entry->pa = ~0ull;
+ __entry->pa_mask_lsb = ~0;
+ }
+
+ __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
+ cper_ccix_port_err_pack(&err->port_record, &__entry->data,
+ __entry->vendor_data_length,
+ __get_dynamic_array(vendor_data));
+ ),
+
+ TP_printk("{%d} %s CCIX PER Port Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
+ __entry->err_seq,
+ cper_severity_str(__entry->sev),
+ cper_ccix_comp_type_str(__entry->component),
+ __entry->sevdetail & BIT(0) ? 1 : 0,
+ __entry->sevdetail & BIT(1) ? 1 : 0,
+ __entry->sevdetail & BIT(2) ? 1 : 0,
+ __entry->sevdetail & BIT(3) ? 1 : 0,
+ __entry->pa,
+ __entry->pa_mask_lsb,
+ cper_ccix_port_err_unpack(p, &__entry->data),
+ __print_hex(__get_dynamic_array(vendor_data), __entry->vendor_data_length)
+ )
+);
+
/*
* memory-failure recovery action result event
*
--
2.20.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v3 5/6] efi / ras: CCIX Link error reporting
2019-11-12 16:52 [PATCH v3 0/6] CCIX Protocol error reporting Jonathan Cameron
` (3 preceding siblings ...)
2019-11-12 16:52 ` [PATCH v3 4/6] efi / ras: CCIX Port " Jonathan Cameron
@ 2019-11-12 16:52 ` Jonathan Cameron
2019-11-12 16:52 ` [PATCH v3 6/6] efi / ras: CCIX Agent internal " Jonathan Cameron
5 siblings, 0 replies; 12+ messages in thread
From: Jonathan Cameron @ 2019-11-12 16:52 UTC (permalink / raw)
To: linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse
Cc: rjw, tony.luck, linuxarm, ard.biesheuvel, nariman.poushin,
Thanu Rangarajan, Jonathan Cameron
The CCIX 1.0 Base Specification defines a protocol layer
CCIX link and related error reporting mechanism.
The UEFI 2.8 specification includes a CCIX CPER record for
firmware first handling to report these errors to the
operating system.
This patch is very similar to the support previously added
for CCIX Memory Errors and provides both logging and RAS
tracepoint for this error class.
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
drivers/acpi/apei/ghes.c | 4 +
drivers/firmware/efi/cper-ccix.c | 140 +++++++++++++++++++++++++++++++
include/linux/cper.h | 48 +++++++++++
include/ras/ras_event.h | 65 ++++++++++++++
4 files changed, 257 insertions(+)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 7455db97319c..22df8c14ec13 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -529,6 +529,10 @@ static void ghes_handle_ccix_per(struct acpi_hest_generic_data *gdata, int sev)
trace_ccix_port_error_event(payload, err_seq, sev,
ccix_port_err_ven_len_get(payload));
break;
+ case CCIX_LINK_ERROR:
+ trace_ccix_link_error_event(payload, err_seq, sev,
+ ccix_link_err_ven_len_get(payload));
+ break;
default:
/* Unknown error type */
pr_info("CCIX error of unknown or vendor defined type\n");
diff --git a/drivers/firmware/efi/cper-ccix.c b/drivers/firmware/efi/cper-ccix.c
index b936661b3db3..03630f4abaab 100644
--- a/drivers/firmware/efi/cper-ccix.c
+++ b/drivers/firmware/efi/cper-ccix.c
@@ -506,6 +506,86 @@ static int cper_ccix_port_err_details(const char *pfx,
return 0;
}
+static const char * const ccix_link_err_type_strs[] = {
+ "Generic Error",
+ "Credit Underflow",
+ "Credit Overflow",
+ "Unusable Credit Received",
+ "Link Credit Timeout",
+};
+
+static const char *cper_ccix_link_err_type_str(__u8 op)
+{
+ return op < ARRAY_SIZE(ccix_link_err_type_strs) ?
+ ccix_link_err_type_strs[op] : "Reserved";
+}
+
+static const char * const ccix_link_credit_type_strs[] = {
+ "Memory",
+ "Snoop",
+ "Data",
+ "Misc",
+};
+
+static const char *cper_ccix_link_credit_type_str(__u8 op)
+{
+ return op < ARRAY_SIZE(ccix_link_credit_type_strs) ?
+ ccix_link_credit_type_strs[op] : "Reserved";
+}
+
+static int cper_ccix_link_err_details(const char *pfx,
+ struct acpi_hest_generic_data *gdata)
+{
+ struct cper_ccix_link_error *full_link_err;
+ struct cper_sec_ccix_link_error *link_err;
+ u16 vendor_data_len;
+ int i;
+
+ if (gdata->error_data_length < sizeof(*full_link_err))
+ return -ENOSPC;
+
+ full_link_err = acpi_hest_get_payload(gdata);
+
+ link_err = &full_link_err->link_record;
+
+ if (link_err->validation_bits & CCIX_LINK_ERR_TYPE_VALID)
+ printk("%s""Error Type: %s\n", pfx,
+ cper_ccix_link_err_type_str(link_err->err_type));
+
+ if (link_err->validation_bits & CCIX_LINK_ERR_OP_VALID)
+ printk("%s""Operation: %s\n", pfx,
+ cper_ccix_port_err_op_str(link_err->op_type));
+
+ if (link_err->validation_bits & CCIX_LINK_ERR_LINK_ID_VALID)
+ printk("%s""Link ID: %d\n", pfx, link_err->link_id);
+
+ if (link_err->validation_bits & CCIX_LINK_ERR_CREDIT_TYPE_VALID)
+ printk("%s""Credit Type: %s\n", pfx,
+ cper_ccix_link_credit_type_str(link_err->credit_type));
+
+ /* CHECK THE AER EQUIVALENT */
+ if (link_err->validation_bits & CCIX_LINK_ERR_MESSAGE_VALID) {
+ for (i = 0; i < ARRAY_SIZE(link_err->message); i++)
+ printk("%s""Message%d: 0x%08x\n", pfx, i, link_err->message[i]);
+ }
+
+ if (link_err->validation_bits & CCIX_LINK_ERR_VENDOR_DATA_VALID) {
+ if (gdata->error_data_length < sizeof(*full_link_err) + 4)
+ return -ENOSPC;
+
+ vendor_data_len = link_err->vendor_data[0] & GENMASK(15, 0);
+ if (gdata->error_data_length < sizeof(*full_link_err) + vendor_data_len)
+ return -ENOSPC;
+
+ for (i = 0; i < vendor_data_len/4 - 1; i++)
+ printk("%s""Vendor%d: 0x%08x\n", pfx, i,
+ link_err->vendor_data[i + 1]);
+
+ }
+
+ return 0;
+}
+
int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
{
struct cper_sec_ccix_header *header = acpi_hest_get_payload(gdata);
@@ -573,6 +653,8 @@ int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
return cper_ccix_atc_err_details(pfx, gdata);
case CCIX_PORT_ERROR:
return cper_ccix_port_err_details(pfx, gdata);
+ case CCIX_LINK_ERROR:
+ return cper_ccix_link_err_details(pfx, gdata);
default:
/* Vendor defined so no formatting be done */
break;
@@ -734,3 +816,61 @@ const char *cper_ccix_port_err_unpack(struct trace_seq *p,
return ret;
}
+
+void cper_ccix_link_err_pack(const struct cper_sec_ccix_link_error *link_record,
+ struct cper_ccix_link_err_compact *clink_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data)
+{
+ clink_err->validation_bits = link_record->validation_bits;
+ clink_err->err_type = link_record->err_type;
+ clink_err->op_type = link_record->op_type;
+ clink_err->link_id = link_record->link_id;
+ clink_err->credit_type = link_record->credit_type;
+ memcpy(clink_err->message, link_record->message,
+ sizeof(clink_err->message));
+
+ memcpy(vendor_data, &link_record->vendor_data[1], vendor_data_len);
+}
+
+static int cper_ccix_err_link_location(struct cper_ccix_link_err_compact *clink_err,
+ char *msg)
+{
+ u32 len = CPER_REC_LEN - 1;
+ u32 n = 0;
+
+ if (!msg)
+ return 0;
+
+ if (clink_err->validation_bits & CCIX_LINK_ERR_TYPE_VALID)
+ n += snprintf(msg + n, len, "Error Type: %s ",
+ cper_ccix_link_err_type_str(clink_err->err_type));
+
+
+ if (clink_err->validation_bits & CCIX_LINK_ERR_OP_VALID)
+ n += snprintf(msg + n, len, "Op: %s ",
+ cper_ccix_port_err_op_str(clink_err->op_type));
+
+ if (clink_err->validation_bits & CCIX_LINK_ERR_LINK_ID_VALID)
+ n += snprintf(msg + n, len, "Link ID: %d ", clink_err->link_id);
+
+ if (clink_err->validation_bits & CCIX_LINK_ERR_CREDIT_TYPE_VALID)
+ n += snprintf(msg + n, len, "Credit Type: %s ",
+ cper_ccix_link_credit_type_str(clink_err->credit_type));
+
+ /* MESSAGE TODO */
+ return n;
+}
+
+const char *cper_ccix_link_err_unpack(struct trace_seq *p,
+ struct cper_ccix_link_err_compact *clink_err)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (cper_ccix_err_link_location(clink_err, rcd_decode_str))
+ trace_seq_printf(p, "%s", rcd_decode_str);
+
+ trace_seq_putc(p, '\0');
+
+ return ret;
+}
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 5e315afc210e..d35be55351e3 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -742,6 +742,47 @@ struct cper_ccix_port_err_compact {
__u8 op_type;
};
+struct cper_sec_ccix_link_error {
+ __u32 validation_bits;
+#define CCIX_LINK_ERR_OP_VALID BIT(0)
+#define CCIX_LINK_ERR_TYPE_VALID BIT(1)
+#define CCIX_LINK_ERR_LINK_ID_VALID BIT(2)
+#define CCIX_LINK_ERR_CREDIT_TYPE_VALID BIT(3)
+#define CCIX_LINK_ERR_MESSAGE_VALID BIT(4)
+#define CCIX_LINK_ERR_VENDOR_DATA_VALID BIT(5)
+ __u16 length; /* Includes vendor specific log info */
+ __u8 op_type;
+ __u8 err_type;
+ __u8 link_id;
+ __u8 credit_type;
+ __u16 reserved;
+ __u32 message[8];
+ __u32 vendor_data[];
+};
+
+struct cper_ccix_link_error {
+ struct cper_sec_ccix_header header;
+ __u32 ccix_header[CCIX_PER_LOG_HEADER_DWS];
+ struct cper_sec_ccix_link_error link_record;
+};
+
+static inline u16 ccix_link_err_ven_len_get(struct cper_ccix_link_error *link_err)
+{
+ if (link_err->link_record.validation_bits & CCIX_LINK_ERR_VENDOR_DATA_VALID)
+ return link_err->link_record.vendor_data[0] & 0xFFFF;
+ else
+ return 0;
+}
+
+struct cper_ccix_link_err_compact {
+ __u32 validation_bits;
+ __u32 message[8];
+ __u8 err_type;
+ __u8 op_type;
+ __u8 link_id;
+ __u8 credit_type;
+};
+
/* Reset to default packing */
#pragma pack()
@@ -787,6 +828,13 @@ void cper_ccix_port_err_pack(const struct cper_sec_ccix_port_error *port_record,
const char *cper_ccix_port_err_unpack(struct trace_seq *p,
struct cper_ccix_port_err_compact *cport_err);
+void cper_ccix_link_err_pack(const struct cper_sec_ccix_link_error *link_record,
+ struct cper_ccix_link_err_compact *clink_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data);
+const char *cper_ccix_link_err_unpack(struct trace_seq *p,
+ struct cper_ccix_link_err_compact *clink_err);
+
struct acpi_hest_generic_data;
int cper_print_ccix_per(const char *pfx,
struct acpi_hest_generic_data *gdata);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 52ac3f2d4c11..bfe1c64b9db0 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -614,6 +614,71 @@ TRACE_EVENT(ccix_port_error_event,
)
);
+TRACE_EVENT(ccix_link_error_event,
+ TP_PROTO(struct cper_ccix_link_error *err,
+ u32 err_seq,
+ u8 sev, u16 ven_len),
+
+ TP_ARGS(err, err_seq, sev, ven_len),
+
+ TP_STRUCT__entry(
+ __field(u32, err_seq)
+ __field(u8, sev)
+ __field(u8, sevdetail)
+ __field(u8, source)
+ __field(u8, component)
+ __field(u64, pa)
+ __field(u8, pa_mask_lsb)
+ __field_struct(struct cper_ccix_link_err_compact, data)
+ __field(u16, vendor_data_length)
+ __dynamic_array(u8, vendor_data, ven_len)
+ ),
+
+ TP_fast_assign(
+ __entry->err_seq = err_seq;
+
+ __entry->sev = sev;
+ __entry->sevdetail = FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
+ CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
+ CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
+ CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
+ err->ccix_header[1]);
+ if (err->header.validation_bits & 0x1)
+ __entry->source = err->header.source_id;
+ else
+ __entry->source = ~0;
+ __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
+ err->ccix_header[1]);
+ if (err->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
+ __entry->pa = (u64)err->ccix_header[2] << 32 |
+ (err->ccix_header[3] & 0xfffffffc);
+ __entry->pa_mask_lsb = err->ccix_header[4] & 0xff;
+ } else {
+ __entry->pa = ~0ull;
+ __entry->pa_mask_lsb = ~0;
+ }
+ /* Do not store the vendor data header length */
+ __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
+ cper_ccix_link_err_pack(&err->link_record, &__entry->data,
+ __entry->vendor_data_length,
+ __get_dynamic_array(vendor_data));
+ ),
+
+ TP_printk("{%d} %s CCIX PER Link Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
+ __entry->err_seq,
+ cper_severity_str(__entry->sev),
+ cper_ccix_comp_type_str(__entry->component),
+ __entry->sevdetail & BIT(0) ? 1 : 0,
+ __entry->sevdetail & BIT(1) ? 1 : 0,
+ __entry->sevdetail & BIT(2) ? 1 : 0,
+ __entry->sevdetail & BIT(3) ? 1 : 0,
+ __entry->pa,
+ __entry->pa_mask_lsb,
+ cper_ccix_link_err_unpack(p, &__entry->data),
+ __print_hex(__get_dynamic_array(vendor_data), __entry->vendor_data_length)
+ )
+);
+
/*
* memory-failure recovery action result event
*
--
2.20.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v3 6/6] efi / ras: CCIX Agent internal error reporting
2019-11-12 16:52 [PATCH v3 0/6] CCIX Protocol error reporting Jonathan Cameron
` (4 preceding siblings ...)
2019-11-12 16:52 ` [PATCH v3 5/6] efi / ras: CCIX Link " Jonathan Cameron
@ 2019-11-12 16:52 ` Jonathan Cameron
5 siblings, 0 replies; 12+ messages in thread
From: Jonathan Cameron @ 2019-11-12 16:52 UTC (permalink / raw)
To: linux-edac, linux-acpi, linux-efi, Borislav Petkov,
Mauro Carvalho Chehab, james.morse
Cc: rjw, tony.luck, linuxarm, ard.biesheuvel, nariman.poushin,
Thanu Rangarajan, Jonathan Cameron
The CCIX 1.0 Base specification defines an internal agent error,
for which the specific data present afte the header is vendor
defined.
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
drivers/acpi/apei/ghes.c | 4 ++
drivers/firmware/efi/cper-ccix.c | 43 +++++++++++++++++++++
include/linux/cper.h | 29 +++++++++++++++
include/ras/ras_event.h | 64 ++++++++++++++++++++++++++++++++
4 files changed, 140 insertions(+)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 22df8c14ec13..5a75fb0374dd 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -533,6 +533,10 @@ static void ghes_handle_ccix_per(struct acpi_hest_generic_data *gdata, int sev)
trace_ccix_link_error_event(payload, err_seq, sev,
ccix_link_err_ven_len_get(payload));
break;
+ case CCIX_AGENT_INTERNAL_ERROR:
+ trace_ccix_agent_error_event(payload, err_seq, sev,
+ ccix_agent_err_ven_len_get(payload));
+ break;
default:
/* Unknown error type */
pr_info("CCIX error of unknown or vendor defined type\n");
diff --git a/drivers/firmware/efi/cper-ccix.c b/drivers/firmware/efi/cper-ccix.c
index 03630f4abaab..250857146077 100644
--- a/drivers/firmware/efi/cper-ccix.c
+++ b/drivers/firmware/efi/cper-ccix.c
@@ -586,6 +586,38 @@ static int cper_ccix_link_err_details(const char *pfx,
return 0;
}
+static int cper_ccix_agent_err_details(const char *pfx,
+ struct acpi_hest_generic_data *gdata)
+{
+ struct cper_ccix_agent_err *full_agent_err;
+ struct cper_sec_ccix_agent_err *agent_err;
+ u16 vendor_data_len;
+ int i;
+
+ if (gdata->error_data_length < sizeof(*full_agent_err))
+ return -ENOSPC;
+
+ full_agent_err = acpi_hest_get_payload(gdata);
+
+ agent_err = &full_agent_err->agent_record;
+
+ if (agent_err->validation_bits & CCIX_AGENT_INTERNAL_ERR_VENDOR_DATA_VALID) {
+ if (gdata->error_data_length < sizeof(*full_agent_err) + 4)
+ return -ENOSPC;
+
+ vendor_data_len = agent_err->vendor_data[0] & GENMASK(15, 0);
+ if (gdata->error_data_length <
+ sizeof(*full_agent_err) + vendor_data_len)
+ return -ENOSPC;
+
+ for (i = 0; i < vendor_data_len/4 - 1; i++)
+ printk("%s""Vendor%d: 0x%08x\n", pfx, i,
+ agent_err->vendor_data[i + 1]);
+ }
+
+ return 0;
+}
+
int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
{
struct cper_sec_ccix_header *header = acpi_hest_get_payload(gdata);
@@ -655,6 +687,8 @@ int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
return cper_ccix_port_err_details(pfx, gdata);
case CCIX_LINK_ERROR:
return cper_ccix_link_err_details(pfx, gdata);
+ case CCIX_AGENT_INTERNAL_ERROR:
+ return cper_ccix_agent_err_details(pfx, gdata);
default:
/* Vendor defined so no formatting be done */
break;
@@ -874,3 +908,12 @@ const char *cper_ccix_link_err_unpack(struct trace_seq *p,
return ret;
}
+
+void cper_ccix_agent_err_pack(const struct cper_sec_ccix_agent_err *agent_record,
+ struct cper_ccix_agent_err_compact *cagent_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data)
+{
+ cagent_err->validation_bits = agent_record->validation_bits;
+ memcpy(vendor_data, &agent_record->vendor_data[1], vendor_data_len);
+}
diff --git a/include/linux/cper.h b/include/linux/cper.h
index d35be55351e3..373c1d387a70 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -783,6 +783,30 @@ struct cper_ccix_link_err_compact {
__u8 credit_type;
};
+struct cper_sec_ccix_agent_err {
+ __u32 validation_bits;
+#define CCIX_AGENT_INTERNAL_ERR_VENDOR_DATA_VALID BIT(0)
+ __u32 vendor_data[];
+};
+
+struct cper_ccix_agent_err {
+ struct cper_sec_ccix_header header;
+ __u32 ccix_header[CCIX_PER_LOG_HEADER_DWS];
+ struct cper_sec_ccix_agent_err agent_record;
+};
+
+static inline u16 ccix_agent_err_ven_len_get(struct cper_ccix_agent_err *agent_err)
+{
+ if (agent_err->agent_record.validation_bits & CCIX_AGENT_INTERNAL_ERR_VENDOR_DATA_VALID)
+ return agent_err->agent_record.vendor_data[0] & 0xFFFF;
+ else
+ return 0;
+}
+
+struct cper_ccix_agent_err_compact {
+ __u32 validation_bits;
+};
+
/* Reset to default packing */
#pragma pack()
@@ -835,6 +859,11 @@ void cper_ccix_link_err_pack(const struct cper_sec_ccix_link_error *link_record,
const char *cper_ccix_link_err_unpack(struct trace_seq *p,
struct cper_ccix_link_err_compact *clink_err);
+void cper_ccix_agent_err_pack(const struct cper_sec_ccix_agent_err *agent_record,
+ struct cper_ccix_agent_err_compact *cagent_err,
+ const u16 vendor_data_len,
+ u8 *vendor_data);
+
struct acpi_hest_generic_data;
int cper_print_ccix_per(const char *pfx,
struct acpi_hest_generic_data *gdata);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index bfe1c64b9db0..d09e5389a1e6 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -679,6 +679,70 @@ TRACE_EVENT(ccix_link_error_event,
)
);
+TRACE_EVENT(ccix_agent_error_event,
+ TP_PROTO(struct cper_ccix_agent_err *err,
+ u32 err_seq,
+ u8 sev, u16 ven_len),
+
+ TP_ARGS(err, err_seq, sev, ven_len),
+
+ TP_STRUCT__entry(
+ __field(u32, err_seq)
+ __field(u8, sev)
+ __field(u8, sevdetail)
+ __field(u8, source)
+ __field(u8, component)
+ __field(u64, pa)
+ __field(u8, pa_mask_lsb)
+ __field(u16, vendor_data_length)
+ __field_struct(struct cper_ccix_agent_err_compact, data)
+ __dynamic_array(u8, vendor_data, ven_len)
+ ),
+
+ TP_fast_assign(
+ __entry->err_seq = err_seq;
+
+ __entry->sev = sev;
+ __entry->sevdetail = FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
+ CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
+ CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
+ CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
+ err->ccix_header[1]);
+ if (err->header.validation_bits & 0x1)
+ __entry->source = err->header.source_id;
+ else
+ __entry->source = ~0;
+ __entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
+ err->ccix_header[1]);
+ if (err->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
+ __entry->pa = (u64)err->ccix_header[2] << 32 |
+ (err->ccix_header[3] & 0xfffffffc);
+ __entry->pa_mask_lsb = err->ccix_header[4] & 0xff;
+ } else {
+ __entry->pa = ~0ull;
+ __entry->pa_mask_lsb = ~0;
+ }
+ /* Do not store the vendor data header length */
+ __entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
+ cper_ccix_agent_err_pack(&err->agent_record, &__entry->data,
+ __entry->vendor_data_length,
+ __get_dynamic_array(vendor_data));
+ ),
+
+ TP_printk("{%d} %s CCIX PER Agent Internal Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) vendor:%s",
+ __entry->err_seq,
+ cper_severity_str(__entry->sev),
+ cper_ccix_comp_type_str(__entry->component),
+ __entry->sevdetail & BIT(0) ? 1 : 0,
+ __entry->sevdetail & BIT(1) ? 1 : 0,
+ __entry->sevdetail & BIT(2) ? 1 : 0,
+ __entry->sevdetail & BIT(3) ? 1 : 0,
+ __entry->pa,
+ __entry->pa_mask_lsb,
+ __print_hex(__get_dynamic_array(vendor_data), __entry->vendor_data_length)
+ )
+);
+
/*
* memory-failure recovery action result event
*
--
2.20.1
^ permalink raw reply related [flat|nested] 12+ messages in thread