Linux-EFI Archive on lore.kernel.org
 help / color / Atom feed
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
To: <linux-edac@vger.kernel.org>, <linux-acpi@vger.kernel.org>,
	<linux-efi@vger.kernel.org>, Borislav Petkov <bp@alien8.de>,
	"Mauro Carvalho Chehab" <mchehab@kernel.org>,
	<james.morse@arm.com>
Cc: <rjw@rjwysocki.net>, <tony.luck@intel.com>, <linuxarm@huawei.com>,
	<ard.biesheuvel@linaro.org>, <nariman.poushin@linaro.org>,
	<jcm@redhat.com>, <linux-kernel@vger.kernel.org>,
	<peter.maydell@linaro.org>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>
Subject: [PATCH 3/6 V2] efi / ras: CCIX Address Translation Cache error reporting
Date: Tue, 20 Aug 2019 22:47:29 +0800
Message-ID: <20190820144732.2370-4-Jonathan.Cameron@huawei.com> (raw)
In-Reply-To: <20190820144732.2370-1-Jonathan.Cameron@huawei.com>

CCIX devices tend to make heavy use of ATCs. The CCIX base
specification defines a protocol error message (PER) that
describes errors reported by such caches. The UEFI 2.8
specification includes a CCIX CPER record for firmware first
handling to report these errors to the operating system.

This patch is very similar to the support previously added
for CCIX Memory Errors and provides both logging and RAS
tracepoint for this error class.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Changes since v1.
	Drop printing of vendor data to kernel log.
	
 drivers/acpi/apei/ghes.c         |  4 ++
 drivers/firmware/efi/cper-ccix.c | 72 ++++++++++++++++++++++++++++++++
 include/linux/cper.h             | 39 +++++++++++++++++
 include/ras/ras_event.h          | 67 +++++++++++++++++++++++++++++
 4 files changed, 182 insertions(+)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 5bda94e48b1b..a2ae9311ffee 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -508,6 +508,10 @@ static void ghes_handle_ccix_per(struct acpi_hest_generic_data *gdata, int sev)
 		trace_ccix_cache_error_event(payload, err_seq, sev,
 					     ccix_cache_err_ven_len_get(payload));
 		break;
+	case CCIX_ATC_ERROR:
+		trace_ccix_atc_error_event(payload, err_seq, sev,
+					   ccix_atc_err_ven_len_get(payload));
+		break;
 	default:
 		/* Unknown error type */
 		pr_info("CCIX error of unknown or vendor defined type\n");
diff --git a/drivers/firmware/efi/cper-ccix.c b/drivers/firmware/efi/cper-ccix.c
index fa3fafac402b..da8b7e1bb3a9 100644
--- a/drivers/firmware/efi/cper-ccix.c
+++ b/drivers/firmware/efi/cper-ccix.c
@@ -363,6 +363,31 @@ static int cper_ccix_cache_err_details(const char *pfx,
 	return 0;
 }
 
+static int cper_ccix_atc_err_details(const char *pfx,
+				     struct acpi_hest_generic_data *gdata)
+{
+	struct cper_ccix_atc_error *full_atc_err;
+	struct cper_sec_ccix_atc_error *atc_err;
+
+	if (gdata->error_data_length < sizeof(*full_atc_err))
+		return -ENOSPC;
+
+	full_atc_err = acpi_hest_get_payload(gdata);
+
+	atc_err = &full_atc_err->atc_record;
+
+	if (atc_err->validation_bits & CCIX_ATC_ERR_OP_VALID)
+		printk("%s""Operation: %s\n", pfx,
+		       cper_ccix_cache_err_op_str(atc_err->op_type));
+
+	if (atc_err->validation_bits & CCIX_ATC_ERR_INSTANCE_ID_VALID)
+		printk("%s""Instance ID: %d\n", pfx, atc_err->instance);
+
+	/* Vendor data is not printed to the kernel log */
+
+	return 0;
+}
+
 int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
 {
 	struct cper_sec_ccix_header *header = acpi_hest_get_payload(gdata);
@@ -426,6 +451,8 @@ int cper_print_ccix_per(const char *pfx, struct acpi_hest_generic_data *gdata)
 		return cper_ccix_mem_err_details(pfx, gdata);
 	case CCIX_CACHE_ERROR:
 		return cper_ccix_cache_err_details(pfx, gdata);
+	case CCIX_ATC_ERROR:
+		return cper_ccix_atc_err_details(pfx, gdata);
 	default:
 		/* Vendor defined so no formatting be done */
 		break;
@@ -496,3 +523,48 @@ const char *cper_ccix_cache_err_unpack(struct trace_seq *p,
 
 	return ret;
 }
+
+void cper_ccix_atc_err_pack(const struct cper_sec_ccix_atc_error *atc_record,
+			    struct cper_ccix_atc_err_compact *catc_err,
+			    const u16 vendor_data_len,
+			    u8 *vendor_data)
+{
+	catc_err->validation_bits = atc_record->validation_bits;
+	catc_err->op_type = atc_record->op_type;
+	catc_err->instance = atc_record->instance;
+	memcpy(vendor_data, &atc_record->vendor_data[1], vendor_data_len);
+}
+
+static int cper_ccix_err_atc_location(struct cper_ccix_atc_err_compact *catc_err,
+				      char *msg)
+{
+	u32 len = CPER_REC_LEN - 1;
+	u32 n = 0;
+
+	if (!msg)
+		return 0;
+
+	if (catc_err->validation_bits & CCIX_ATC_ERR_OP_VALID)
+		n += snprintf(msg + n, len, "Op: %s ",
+			     cper_ccix_cache_err_op_str(catc_err->op_type));
+
+	if (catc_err->validation_bits & CCIX_ATC_ERR_INSTANCE_ID_VALID)
+		n += snprintf(msg + n, len, "Instance: %d ",
+			      catc_err->instance);
+
+	return n;
+}
+
+const char *cper_ccix_atc_err_unpack(struct trace_seq *p,
+				     struct cper_ccix_atc_err_compact *catc_err)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+
+	if (cper_ccix_err_atc_location(catc_err, rcd_decode_str))
+		trace_seq_printf(p, "%s", rcd_decode_str);
+
+	trace_seq_putc(p, '\0');
+
+	return ret;
+}
+
diff --git a/include/linux/cper.h b/include/linux/cper.h
index eef254b8b8b7..6bb603e9a97a 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -675,6 +675,38 @@ struct cper_ccix_cache_err_compact {
 	__u8	instance;
 };
 
+struct cper_sec_ccix_atc_error {
+	__u32	validation_bits;
+#define CCIX_ATC_ERR_OP_VALID			BIT(0)
+#define CCIX_ATC_ERR_INSTANCE_ID_VALID		BIT(1)
+#define CCIX_ATC_ERR_VENDOR_DATA_VALID		BIT(2)
+	__u16	length; /* Includes vendor specific log info */
+	__u8	op_type;
+	__u8	instance;
+	__u32	reserved;
+	__u32	vendor_data[];
+};
+
+struct cper_ccix_atc_error {
+	struct cper_sec_ccix_header header;
+	__u32 ccix_header[CCIX_PER_LOG_HEADER_DWS];
+	struct cper_sec_ccix_atc_error atc_record;
+};
+
+static inline u16 ccix_atc_err_ven_len_get(struct cper_ccix_atc_error *atc_err)
+{
+	if (atc_err->atc_record.validation_bits & CCIX_ATC_ERR_VENDOR_DATA_VALID)
+		return atc_err->atc_record.vendor_data[0] & 0xFFFF;
+	else
+		return 0;
+}
+
+struct cper_ccix_atc_err_compact {
+	__u32	validation_bits;
+	__u8	op_type;
+	__u8	instance;
+};
+
 /* Reset to default packing */
 #pragma pack()
 
@@ -706,6 +738,13 @@ const char *cper_ccix_cache_err_unpack(struct trace_seq *p,
 				       struct cper_ccix_cache_err_compact *ccache_err);
 const char *cper_ccix_cache_err_type_str(__u8 error_type);
 
+void cper_ccix_atc_err_pack(const struct cper_sec_ccix_atc_error *atc_record,
+			    struct cper_ccix_atc_err_compact *catc_err,
+			    const u16 vendor_data_len,
+			    u8 *vendor_data);
+const char *cper_ccix_atc_err_unpack(struct trace_seq *p,
+				     struct cper_ccix_atc_err_compact *catc_err);
+
 struct acpi_hest_generic_data;
 int cper_print_ccix_per(const char *pfx,
 			struct acpi_hest_generic_data *gdata);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 55f2c1900c54..bab49e297551 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -481,6 +481,73 @@ TRACE_EVENT(ccix_cache_error_event,
 			    __entry->vendor_data_length)
 	)
 );
+
+TRACE_EVENT(ccix_atc_error_event,
+	TP_PROTO(struct cper_ccix_atc_error *err,
+		 u32 err_seq,
+		 u8 sev,
+		 u16 ven_len),
+
+	TP_ARGS(err, err_seq, sev, ven_len),
+
+	TP_STRUCT__entry(
+		__field(u32, err_seq)
+		__field(u8, sev)
+		__field(u8, sevdetail)
+		__field(u8, source)
+		__field(u8, component)
+		__field(u64, pa)
+		__field(u8, pa_mask_lsb)
+		__field_struct(struct cper_ccix_atc_err_compact, data)
+		__field(u16, vendor_data_length)
+		__dynamic_array(u8, vendor_data, ven_len)
+	),
+
+	TP_fast_assign(
+		__entry->err_seq = err_seq;
+
+		__entry->sev = sev;
+		__entry->sevdetail = FIELD_GET(CCIX_PER_LOG_DW1_SEV_UE_M |
+			CCIX_PER_LOG_DW1_SEV_NO_COMM_M |
+			CCIX_PER_LOG_DW1_SEV_DEGRADED_M |
+			CCIX_PER_LOG_DW1_SEV_DEFFERABLE_M,
+			err->ccix_header[1]);
+		if (err->header.validation_bits & 0x1)
+			__entry->source = err->header.source_id;
+		else
+			__entry->source = ~0;
+		__entry->component = FIELD_GET(CCIX_PER_LOG_DW1_COMP_TYPE_M,
+					       err->ccix_header[1]);
+		if (err->ccix_header[1] & CCIX_PER_LOG_DW1_ADDR_VAL_M) {
+			__entry->pa = (u64)err->ccix_header[2] << 32 |
+				(err->ccix_header[3] & 0xfffffffc);
+			__entry->pa_mask_lsb = err->ccix_header[4] & 0xff;
+		} else {
+			__entry->pa = ~0ull;
+			__entry->pa_mask_lsb = ~0;
+		}
+
+		__entry->vendor_data_length = ven_len ? ven_len - 4 : 0;
+		cper_ccix_atc_err_pack(&err->atc_record, &__entry->data,
+				       __entry->vendor_data_length,
+				       __get_dynamic_array(vendor_data));
+	),
+
+	TP_printk("{%d} %s CCIX PER ATC Error in %s SevUE:%d SevNoComm:%d SevDegraded:%d SevDeferred:%d physical addr: %016llx (mask: %x) %s vendor:%s",
+		__entry->err_seq,
+		cper_severity_str(__entry->sev),
+		cper_ccix_comp_type_str(__entry->component),
+		__entry->sevdetail & BIT(0) ? 1 : 0,
+		__entry->sevdetail & BIT(1) ? 1 : 0,
+		__entry->sevdetail & BIT(2) ? 1 : 0,
+		__entry->sevdetail & BIT(3) ? 1 : 0,
+		__entry->pa,
+		__entry->pa_mask_lsb,
+		cper_ccix_atc_err_unpack(p, &__entry->data),
+		__print_hex(__get_dynamic_array(vendor_data), __entry->vendor_data_length)
+	)
+);
+
 /*
  * memory-failure recovery action result event
  *
-- 
2.20.1


  parent reply index

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-20 14:47 [PATCH 0/6 V2] CCIX Protocol " Jonathan Cameron
2019-08-20 14:47 ` [PATCH 1/6 V2] efi / ras: CCIX Memory " Jonathan Cameron
2019-08-20 14:47 ` [PATCH 2/6 V2] efi / ras: CCIX Cache " Jonathan Cameron
2019-08-20 14:47 ` Jonathan Cameron [this message]
2019-08-20 14:47 ` [PATCH 4/6 V2] efi / ras: CCIX Port " Jonathan Cameron
2019-08-20 14:47 ` [PATCH 5/6 V2] efi / ras: CCIX Link " Jonathan Cameron
2019-08-20 14:47 ` [PATCH 6/6 V2] efi / ras: CCIX Agent internal " Jonathan Cameron
2019-08-27 13:44 ` [PATCH 0/6 V2] CCIX Protocol " Thomas Gleixner

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190820144732.2370-4-Jonathan.Cameron@huawei.com \
    --to=jonathan.cameron@huawei.com \
    --cc=ard.biesheuvel@linaro.org \
    --cc=bp@alien8.de \
    --cc=james.morse@arm.com \
    --cc=jcm@redhat.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxarm@huawei.com \
    --cc=mchehab@kernel.org \
    --cc=nariman.poushin@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=rjw@rjwysocki.net \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-EFI Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-efi/0 linux-efi/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-efi linux-efi/ https://lore.kernel.org/linux-efi \
		linux-efi@vger.kernel.org linux-efi@archiver.kernel.org
	public-inbox-index linux-efi

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-efi


AGPL code for this site: git clone https://public-inbox.org/ public-inbox