linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
To: <linux-efi@vger.kernel.org>, <linux-cxl@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Alison Schofield <alison.schofield@intel.com>,
	Vishal Verma <vishal.l.verma@intel.com>,
	"Ira Weiny" <ira.weiny@intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>,
	Yazen Ghannam <yazen.ghannam@amd.com>,
	Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Subject: [PATCH 3/3] efi/cper, cxl: Decode CXL Component Events Memory Module Event Record
Date: Thu, 12 Oct 2023 23:03:01 +0000	[thread overview]
Message-ID: <20231012230301.58500-4-Smita.KoralahalliChannabasappa@amd.com> (raw)
In-Reply-To: <20231012230301.58500-1-Smita.KoralahalliChannabasappa@amd.com>

Add support for decoding CXL Component Events Memory Module Event Record
as defined in CXL rev 3.0 section 8.2.9.2.1.3.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
 drivers/firmware/efi/cper.c     |   8 +++
 drivers/firmware/efi/cper_cxl.c | 110 ++++++++++++++++++++++++++++++++
 drivers/firmware/efi/cper_cxl.h |  32 ++++++++++
 3 files changed, 150 insertions(+)

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 1d182487fa13..5b45bf513512 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -623,6 +623,14 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
 			cper_print_dram(newpfx, dram);
 		else
 			goto err_section_too_small;
+	} else if (guid_equal(sec_type, &CPER_SEC_CXL_MM_MODULE)) {
+		struct cper_sec_comp_event *mm_module = acpi_hest_get_payload(gdata);
+
+		printk("%ssection_type: CXL Memory Module Event\n", newpfx);
+		if (gdata->error_data_length >= sizeof(*mm_module))
+			cper_print_mm_module(newpfx, mm_module);
+		else
+			goto err_section_too_small;
 	} else {
 		const void *err = acpi_hest_get_payload(gdata);
 
diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c
index 3fba360b7dc6..5be10ca20c7c 100644
--- a/drivers/firmware/efi/cper_cxl.c
+++ b/drivers/firmware/efi/cper_cxl.c
@@ -39,6 +39,11 @@
 #define DRAM_VALID_COLUMN			BIT_ULL(6)
 #define DRAM_VALID_CORRECTION_MASK		BIT_ULL(7)
 
+#define DHI_AS_LIFE_USED(as)			(as & GENMASK(1, 0))
+#define DHI_AS_DEV_TEMP(as)			(((as) & GENMASK(3, 2)) >> 2)
+#define DHI_AS_COR_VOL_ERR_CNT(as)		(((as) & GENMASK(4, 4)) >> 4)
+#define DHI_AS_COR_PER_ERR_CNT(as)		(((as) & GENMASK(5, 5)) >> 5)
+
 /* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */
 struct cxl_ras_capability_regs {
 	u32 uncor_status;
@@ -119,6 +124,45 @@ static const char * const dram_mem_type_strs[] = {
 	"data path error",
 };
 
+static const char * const mm_module_event_type_strs[] = {
+	"health status change",
+	"media status change",
+	"life used change",
+	"temperature change",
+	"data path error",
+	"lsa error",
+};
+
+static const char * const dhi_health_status_strs[] = {
+	"maintenance needed",
+	"performance degraded",
+	"hardware replacement needed",
+};
+
+static const char * const dhi_media_status_strs[] = {
+	"normal",
+	"not ready",
+	"write persistency lost",
+	"all data lost",
+	"write persistency loss in the event of power loss",
+	"write persistency loss in event of shutdown",
+	"write persistency loss imminent",
+	"all data loss in the event of power loss",
+	"all data loss in the event of shutdown",
+	"all data loss imminent",
+};
+
+static const char * const dhi_two_bit_status_strs[] = {
+	"normal",
+	"warning",
+	"critical",
+};
+
+static const char * const dhi_one_bit_status_strs[] = {
+	"normal",
+	"warning",
+};
+
 void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err)
 {
 	if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_TYPE)
@@ -409,3 +453,69 @@ void cper_print_dram(const char *pfx, const struct cper_sec_comp_event *event)
 			       dram->cor_mask, sizeof(dram->cor_mask), 0);
 	}
 }
+
+static void cper_print_mm_module_dhi(const char *pfx, const struct dev_health_info *dhi)
+{
+	pr_info("%s health status: 0x%02x\n", pfx, dhi->health_status);
+	cper_print_bits(pfx, dhi->health_status, dhi_health_status_strs,
+			ARRAY_SIZE(dhi_health_status_strs));
+
+	pr_info("%s media status: %d, %s\n", pfx, dhi->media_status,
+		dhi->media_status < ARRAY_SIZE(dhi_media_status_strs)
+		? dhi_media_status_strs[dhi->media_status] : "unknown");
+
+	pr_info("%s current life used: %ld, %s\n", pfx,
+		DHI_AS_LIFE_USED(dhi->add_status),
+		DHI_AS_LIFE_USED(dhi->add_status) < ARRAY_SIZE(dhi_two_bit_status_strs)
+		? dhi_two_bit_status_strs[DHI_AS_LIFE_USED(dhi->add_status)]
+		: "unknown");
+
+	pr_info("%s current device temperature: %ld, %s\n", pfx,
+		DHI_AS_DEV_TEMP(dhi->add_status),
+		DHI_AS_DEV_TEMP(dhi->add_status) < ARRAY_SIZE(dhi_two_bit_status_strs)
+		? dhi_two_bit_status_strs[DHI_AS_DEV_TEMP(dhi->add_status)]
+		: "unknown");
+
+	pr_info("%s current corrected volatile err count: %ld, %s\n", pfx,
+		DHI_AS_COR_VOL_ERR_CNT(dhi->add_status),
+		DHI_AS_COR_VOL_ERR_CNT(dhi->add_status) < ARRAY_SIZE(dhi_one_bit_status_strs)
+		? dhi_one_bit_status_strs[DHI_AS_COR_VOL_ERR_CNT(dhi->add_status)]
+		: "unknown");
+
+	pr_info("%s current corrected persistent err count: %ld, %s\n", pfx,
+		DHI_AS_COR_PER_ERR_CNT(dhi->add_status),
+		DHI_AS_COR_PER_ERR_CNT(dhi->add_status) < ARRAY_SIZE(dhi_one_bit_status_strs)
+		? dhi_one_bit_status_strs[DHI_AS_COR_PER_ERR_CNT(dhi->add_status)]
+		: "unknown");
+
+	pr_info("%s life used percent: 0x%02x\n", pfx, dhi->life_used);
+	pr_info("%s device temperature degree celsius: 0x%04x\n", pfx,
+		dhi->device_temp);
+	pr_info("%s dirty shutdown count: 0x%08x\n", pfx,
+		dhi->dirty_shutdown_cnt);
+	pr_info("%s total corrected volatile error count: 0x%08x\n", pfx,
+		dhi->cor_vol_err_cnt);
+	pr_info("%s total corrected persistent error count: 0x%08x\n", pfx,
+		dhi->cor_per_err_cnt);
+}
+
+void cper_print_mm_module(const char *pfx, const struct cper_sec_comp_event *event)
+{
+	struct cper_sec_mm_module *mm_module;
+
+	cper_print_comp_event(pfx, event);
+
+	if (!(event->valid_bits & COMP_EVENT_VALID_EVENT_LOG))
+		return;
+
+	mm_module = (struct cper_sec_mm_module *)(event + 1);
+
+	cper_print_event_record(pfx, &mm_module->record);
+
+	pr_info("%s device event type: %d, %s\n", pfx, mm_module->event_type,
+		mm_module->event_type < ARRAY_SIZE(mm_module_event_type_strs)
+		? mm_module_event_type_strs[mm_module->event_type]
+		: "unknown");
+
+	cper_print_mm_module_dhi(pfx, &mm_module->dhi);
+}
diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h
index 967847b571cb..c37dd624a522 100644
--- a/drivers/firmware/efi/cper_cxl.h
+++ b/drivers/firmware/efi/cper_cxl.h
@@ -25,6 +25,11 @@
 	GUID_INIT(0x601DCBB3, 0x9C06, 0x4EAB, 0xB8, 0xAF, 0x4E, 0x9B,	\
 		  0xFB, 0x5C, 0x96, 0x24)
 
+/* CXL Memory Module Event Section */
+#define CPER_SEC_CXL_MM_MODULE						\
+	GUID_INIT(0xFE927475, 0xDD59, 0x4339, 0xA5, 0x86, 0x79, 0xBA,	\
+		  0xB1, 0x13, 0xB7, 0x74)
+
 #pragma pack(1)
 
 /* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */
@@ -147,10 +152,37 @@ struct cper_sec_dram {
 	u8 reserved[23];
 };
 
+/*
+ * CXL Memory Module Event
+ * Device Health Information - DHI
+ * CXL rev 3.0 sec 8.2.9.8.3.1; Table 8-100
+ */
+struct dev_health_info {
+	u8 health_status;
+	u8 media_status;
+	u8 add_status;
+	u8 life_used;
+	u16 device_temp;
+	u32 dirty_shutdown_cnt;
+	u32 cor_vol_err_cnt;
+	u32 cor_per_err_cnt;
+};
+
+/* CXL Memory Module Event Record
+ * CXL rev 3.0 sec 8.2.9.2.1.3; Table 8-45
+ */
+struct cper_sec_mm_module {
+	struct common_event_record record;
+	u8 event_type;
+	struct dev_health_info dhi;
+	u8 reserved[61];
+};
+
 #pragma pack()
 
 void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err);
 void cper_print_gen_media(const char *pfx, const struct cper_sec_comp_event *event);
 void cper_print_dram(const char *pfx, const struct cper_sec_comp_event *event);
+void cper_print_mm_module(const char *pfx, const struct cper_sec_comp_event *event);
 
 #endif //__CPER_CXL_
-- 
2.17.1


      parent reply	other threads:[~2023-10-12 23:04 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-12 23:02 [PATCH 0/3] efi/cper, cxl: Decode CXL Component Events CPER Smita Koralahalli
2023-10-12 23:02 ` [PATCH 1/3] efi/cper, cxl: Decode CXL Component Events General Media Event Record Smita Koralahalli
2023-10-12 23:26   ` Dan Williams
2023-10-17 20:45     ` Smita Koralahalli
2023-10-13  0:25   ` Ira Weiny
2023-10-17 20:52     ` Smita Koralahalli
2023-10-18  8:56       ` Ard Biesheuvel
2023-10-18 18:48         ` Dan Williams
2023-10-19 20:52           ` Ard Biesheuvel
2023-10-12 23:03 ` [PATCH 2/3] efi/cper, cxl: Decode CXL Component Events DRAM " Smita Koralahalli
2023-10-12 23:03 ` Smita Koralahalli [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231012230301.58500-4-Smita.KoralahalliChannabasappa@amd.com \
    --to=smita.koralahallichannabasappa@amd.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=alison.schofield@intel.com \
    --cc=ardb@kernel.org \
    --cc=dan.j.williams@intel.com \
    --cc=ira.weiny@intel.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=vishal.l.verma@intel.com \
    --cc=yazen.ghannam@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).