All of lore.kernel.org
 help / color / mirror / Atom feed
From: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
To: <linux-efi@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-cxl@vger.kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Alison Schofield <alison.schofield@intel.com>,
	Vishal Verma <vishal.l.verma@intel.com>,
	"Ira Weiny" <ira.weiny@intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>,
	Yazen Ghannam <yazen.ghannam@amd.com>,
	Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Subject: [PATCH v2 4/4] acpi/ghes, cxl/pci: Trace FW-First CXL Protocol Errors
Date: Tue, 9 Jan 2024 03:47:55 +0000	[thread overview]
Message-ID: <20240109034755.100555-5-Smita.KoralahalliChannabasappa@amd.com> (raw)
In-Reply-To: <20240109034755.100555-1-Smita.KoralahalliChannabasappa@amd.com>

When PCIe AER is in FW-First, OS should process CXL Protocol errors from
CPER records. These CPER records obtained from GHES module, will rely on
a registered callback to be notified to the CXL subsystem in order to be
processed.

Call the existing cxl_cper_callback to notify the CXL subsystem on a
Protocol error.

The defined trace events cxl_aer_uncorrectable_error and
cxl_aer_correctable_error currently trace native CXL AER errors. Reuse
them to trace FW-First Protocol Errors.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
v2:
	Added warning for serial number check.
	Moved severity determination to previous patch.
---
 drivers/acpi/apei/ghes.c  |  5 +++++
 drivers/cxl/core/pci.c    | 29 +++++++++++++++++++++++++++++
 drivers/cxl/cxlpci.h      |  3 +++
 drivers/cxl/pci.c         |  5 +++++
 include/linux/cxl-event.h |  1 +
 5 files changed, 43 insertions(+)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 1d4f3d68a0bc..4318b602e797 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -716,10 +716,15 @@ static void cxl_cper_post_event(enum cxl_event_type event_type,
 
 void cxl_cper_handle_prot_err(struct acpi_hest_generic_data *gdata)
 {
+	enum cxl_event_type event_type = CXL_CPER_EVENT_PROT_ERR;
 	struct cxl_cper_event_info info;
 
 	if (cxl_cper_handle_prot_err_info(gdata, &info))
 		return;
+
+	guard(rwsem_read)(&cxl_cper_rw_sem);
+	if (cper_callback)
+		cper_callback(event_type, &info);
 }
 
 int cxl_cper_register_callback(cxl_cper_callback callback)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 37e1652afbc7..bde8ebf5e4b3 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -836,6 +836,35 @@ void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL);
 
+void cxl_prot_err_trace_record(struct cxl_dev_state *cxlds,
+			       struct cxl_cper_event_info *info)
+{
+	struct cper_cxl_event_sn *dev_serial_num =  &info->rec.hdr.dev_serial_num;
+	u32 status, fe;
+
+	if (((u64)dev_serial_num->upper_dw << 32 |
+	     dev_serial_num->lower_dw) != cxlds->serial)
+		pr_warn("The device serial number in CPER differs or isn't valid\n");
+
+	if (info->p_err.severity == CXL_AER_CORRECTABLE) {
+		status = info->p_err.cxl_ras.cor_status & ~info->p_err.cxl_ras.cor_mask;
+
+		trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
+	} else {
+		status = info->p_err.cxl_ras.uncor_status & ~info->p_err.cxl_ras.uncor_mask;
+
+		if (hweight32(status) > 1)
+			fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
+					   info->p_err.cxl_ras.cap_control));
+		else
+			fe = status;
+
+		trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe,
+						  info->p_err.cxl_ras.header_log);
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cxl_prot_err_trace_record, CXL);
+
 static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
 					  struct cxl_dport *dport)
 {
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 0fa4799ea316..216003d4aec1 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -93,4 +93,7 @@ void read_cdat_data(struct cxl_port *port);
 void cxl_cor_error_detected(struct pci_dev *pdev);
 pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
 				    pci_channel_state_t state);
+struct cxl_cper_event_info;
+void cxl_prot_err_trace_record(struct cxl_dev_state *cxlds,
+			       struct cxl_cper_event_info *info);
 #endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 1ad240ead4fd..515983e7df10 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -995,6 +995,11 @@ static void cxl_cper_event_call(enum cxl_event_type ev_type,
 	if (!cxlds)
 		return;
 
+	if (ev_type == CXL_CPER_EVENT_PROT_ERR) {
+		cxl_prot_err_trace_record(cxlds, info);
+		return;
+	}
+
 	/* Fabricate a log type */
 	hdr_flags = get_unaligned_le24(info->rec.event.generic.hdr.flags);
 	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h
index 08e3979de9a3..96cc88aa04f3 100644
--- a/include/linux/cxl-event.h
+++ b/include/linux/cxl-event.h
@@ -113,6 +113,7 @@ enum cxl_event_type {
 	CXL_CPER_EVENT_GEN_MEDIA,
 	CXL_CPER_EVENT_DRAM,
 	CXL_CPER_EVENT_MEM_MODULE,
+	CXL_CPER_EVENT_PROT_ERR,
 };
 
 #define CPER_CXL_DEVICE_ID_VALID		BIT(0)
-- 
2.17.1


  parent reply	other threads:[~2024-01-09  3:48 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-09  3:47 [PATCH v2 0/4] acpi/ghes, cper, cxl: Trace FW-First CXL Protocol Errors Smita Koralahalli
2024-01-09  3:47 ` [PATCH v2 1/4] acpi/ghes, cxl: Create a common CXL struct to handle different CXL CPER records Smita Koralahalli
2024-02-15 11:56   ` Jonathan Cameron
2024-01-09  3:47 ` [PATCH v2 2/4] efi/cper, cxl: Make definitions and structures global Smita Koralahalli
2024-02-15 11:58   ` Jonathan Cameron
2024-02-15 14:47   ` Ard Biesheuvel
2024-01-09  3:47 ` [PATCH v2 3/4] acpi/ghes, efi/cper: Recognize and process CXL Protocol Errors Smita Koralahalli
2024-02-15 12:17   ` Jonathan Cameron
2024-01-09  3:47 ` Smita Koralahalli [this message]
2024-02-15 12:22   ` [PATCH v2 4/4] acpi/ghes, cxl/pci: Trace FW-First " Jonathan Cameron
2024-05-07  9:35 ` [PATCH v2 0/4] acpi/ghes, cper, cxl: " Fabio M. De Francesco
2024-05-16 17:59   ` Smita Koralahalli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240109034755.100555-5-Smita.KoralahalliChannabasappa@amd.com \
    --to=smita.koralahallichannabasappa@amd.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=alison.schofield@intel.com \
    --cc=ardb@kernel.org \
    --cc=dan.j.williams@intel.com \
    --cc=ira.weiny@intel.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=vishal.l.verma@intel.com \
    --cc=yazen.ghannam@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.