From: Ira Weiny <ira.weiny@intel.com>
To: Dan Williams <dan.j.williams@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>,
Alison Schofield <alison.schofield@intel.com>,
Vishal Verma <vishal.l.verma@intel.com>,
Ira Weiny <ira.weiny@intel.com>,
Davidlohr Bueso <dave@stgolabs.net>,
Jonathan Cameron <Jonathan.Cameron@huawei.com>,
Dave Jiang <dave.jiang@intel.com>,
Ben Widawsky <bwidawsk@kernel.org>,
Steven Rostedt <rostedt@goodmis.org>,
linux-kernel@vger.kernel.org, linux-pci@vger.kernel.org,
linux-acpi@vger.kernel.org, linux-cxl@vger.kernel.org
Subject: [PATCH v7 5/8] cxl/mem: Trace Memory Module Event Record
Date: Tue, 17 Jan 2023 21:53:40 -0800 [thread overview]
Message-ID: <20221216-cxl-ev-log-v7-5-2316a5c8f7d8@intel.com> (raw)
In-Reply-To: <20221216-cxl-ev-log-v7-0-2316a5c8f7d8@intel.com>
CXL rev 3.0 section 8.2.9.2.1.3 defines the Memory Module Event Record.
Determine if the event read is memory module record and if so trace the
record.
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
Changes in v7:
Jonathan
Fix LSA spelling
---
drivers/cxl/core/mbox.c | 13 +++++
drivers/cxl/core/trace.h | 143 +++++++++++++++++++++++++++++++++++++++++++++++
drivers/cxl/cxlmem.h | 26 +++++++++
3 files changed, 182 insertions(+)
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 552d98bc9f93..6ed8e3654939 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -734,6 +734,14 @@ static const uuid_t dram_event_uuid =
UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,
0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24);
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+static const uuid_t mem_mod_event_uuid =
+ UUID_INIT(0xfe927475, 0xdd59, 0x4339,
+ 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
+
static void cxl_event_trace_record(const struct device *dev,
enum cxl_event_log_type type,
struct cxl_event_record_raw *record)
@@ -749,6 +757,11 @@ static void cxl_event_trace_record(const struct device *dev,
struct cxl_event_dram *rec = (struct cxl_event_dram *)record;
trace_cxl_dram(dev, type, rec);
+ } else if (uuid_equal(id, &mem_mod_event_uuid)) {
+ struct cxl_event_mem_module *rec =
+ (struct cxl_event_mem_module *)record;
+
+ trace_cxl_memory_module(dev, type, rec);
} else {
/* For unknown record types print just the header */
trace_cxl_generic_event(dev, type, record);
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index b6321cfb1d9f..c7e2af9d9c4d 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -439,6 +439,149 @@ TRACE_EVENT(cxl_dram,
)
);
+/*
+ * Memory Module Event Record - MMER
+ *
+ * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+#define CXL_MMER_HEALTH_STATUS_CHANGE 0x00
+#define CXL_MMER_MEDIA_STATUS_CHANGE 0x01
+#define CXL_MMER_LIFE_USED_CHANGE 0x02
+#define CXL_MMER_TEMP_CHANGE 0x03
+#define CXL_MMER_DATA_PATH_ERROR 0x04
+#define CXL_MMER_LSA_ERROR 0x05
+#define show_dev_evt_type(type) __print_symbolic(type, \
+ { CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \
+ { CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \
+ { CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \
+ { CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \
+ { CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \
+ { CXL_MMER_LSA_ERROR, "LSA Error" } \
+)
+
+/*
+ * Device Health Information - DHI
+ *
+ * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+#define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0)
+#define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1)
+#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2)
+#define show_health_status_flags(flags) __print_flags(flags, "|", \
+ { CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \
+ { CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \
+ { CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \
+)
+
+#define CXL_DHI_MS_NORMAL 0x00
+#define CXL_DHI_MS_NOT_READY 0x01
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST 0x02
+#define CXL_DHI_MS_ALL_DATA_LOST 0x03
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS 0x04
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN 0x05
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT 0x06
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS 0x07
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN 0x08
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT 0x09
+#define show_media_status(ms) __print_symbolic(ms, \
+ { CXL_DHI_MS_NORMAL, \
+ "Normal" }, \
+ { CXL_DHI_MS_NOT_READY, \
+ "Not Ready" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOST, \
+ "Write Persistency Lost" }, \
+ { CXL_DHI_MS_ALL_DATA_LOST, \
+ "All Data Lost" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS, \
+ "Write Persistency Loss in the Event of Power Loss" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN, \
+ "Write Persistency Loss in Event of Shutdown" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT, \
+ "Write Persistency Loss Imminent" }, \
+ { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS, \
+ "All Data Loss in Event of Power Loss" }, \
+ { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN, \
+ "All Data loss in the Event of Shutdown" }, \
+ { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT, \
+ "All Data Loss Imminent" } \
+)
+
+#define CXL_DHI_AS_NORMAL 0x0
+#define CXL_DHI_AS_WARNING 0x1
+#define CXL_DHI_AS_CRITICAL 0x2
+#define show_two_bit_status(as) __print_symbolic(as, \
+ { CXL_DHI_AS_NORMAL, "Normal" }, \
+ { CXL_DHI_AS_WARNING, "Warning" }, \
+ { CXL_DHI_AS_CRITICAL, "Critical" } \
+)
+#define show_one_bit_status(as) __print_symbolic(as, \
+ { CXL_DHI_AS_NORMAL, "Normal" }, \
+ { CXL_DHI_AS_WARNING, "Warning" } \
+)
+
+#define CXL_DHI_AS_LIFE_USED(as) (as & 0x3)
+#define CXL_DHI_AS_DEV_TEMP(as) ((as & 0xC) >> 2)
+#define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4)
+#define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5)
+
+TRACE_EVENT(cxl_memory_module,
+
+ TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
+ struct cxl_event_mem_module *rec),
+
+ TP_ARGS(dev, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+
+ /* Memory Module Event */
+ __field(u8, event_type)
+
+ /* Device Health Info */
+ __field(u8, health_status)
+ __field(u8, media_status)
+ __field(u8, life_used)
+ __field(u32, dirty_shutdown_cnt)
+ __field(u32, cor_vol_err_cnt)
+ __field(u32, cor_per_err_cnt)
+ __field(s16, device_temp)
+ __field(u8, add_status)
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
+
+ /* Memory Module Event */
+ __entry->event_type = rec->event_type;
+
+ /* Device Health Info */
+ __entry->health_status = rec->info.health_status;
+ __entry->media_status = rec->info.media_status;
+ __entry->life_used = rec->info.life_used;
+ __entry->dirty_shutdown_cnt = get_unaligned_le32(rec->info.dirty_shutdown_cnt);
+ __entry->cor_vol_err_cnt = get_unaligned_le32(rec->info.cor_vol_err_cnt);
+ __entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt);
+ __entry->device_temp = get_unaligned_le16(rec->info.device_temp);
+ __entry->add_status = rec->info.add_status;
+ ),
+
+ CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \
+ "as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \
+ "as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \
+ "dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u",
+ show_dev_evt_type(__entry->event_type),
+ show_health_status_flags(__entry->health_status),
+ show_media_status(__entry->media_status),
+ show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)),
+ show_two_bit_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)),
+ show_one_bit_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)),
+ show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)),
+ __entry->life_used, __entry->device_temp,
+ __entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt,
+ __entry->cor_per_err_cnt
+ )
+);
+
#endif /* _CXL_EVENTS_H */
#define TRACE_INCLUDE_FILE trace
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 82832dd74e4c..af4f0876fdef 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -486,6 +486,32 @@ struct cxl_event_dram {
u8 reserved[0x17];
} __packed;
+/*
+ * Get Health Info Record
+ * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+struct cxl_get_health_info {
+ u8 health_status;
+ u8 media_status;
+ u8 add_status;
+ u8 life_used;
+ u8 device_temp[2];
+ u8 dirty_shutdown_cnt[4];
+ u8 cor_vol_err_cnt[4];
+ u8 cor_per_err_cnt[4];
+} __packed;
+
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+struct cxl_event_mem_module {
+ struct cxl_event_record_hdr hdr;
+ u8 event_type;
+ struct cxl_get_health_info info;
+ u8 reserved[0x3d];
+} __packed;
+
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
--
2.39.0
next prev parent reply other threads:[~2023-01-18 5:56 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-18 5:53 [PATCH v7 0/8] cxl: Process event logs Ira Weiny
2023-01-18 5:53 ` [PATCH v7 1/8] cxl/mem: Read, trace, and clear events on driver load Ira Weiny
2023-01-26 21:53 ` Dan Williams
2023-01-27 0:49 ` Dan Williams
2023-01-27 19:45 ` Ira Weiny
2023-05-05 9:47 ` Huai-Cheng
2023-05-08 2:41 ` Ira Weiny
2023-05-09 21:41 ` Dave Jiang
2023-01-18 5:53 ` [PATCH v7 2/8] cxl/mem: Wire up event interrupts ira.weiny
2023-01-26 22:00 ` Dan Williams
2023-05-09 21:42 ` Dave Jiang
[not found] ` <tencent_D9D9D358330CA573E23D490C6EE13E0DC105@qq.com>
2023-05-19 1:38 ` CXL memory device not created correctly Davidlohr Bueso
2023-05-19 15:10 ` Jonathan Cameron
2023-05-19 15:20 ` Ira Weiny
2023-05-19 15:37 ` Jonathan Cameron
2023-05-31 2:18 ` Luis Chamberlain
2023-06-01 3:12 ` Ira Weiny
2023-06-01 3:46 ` Davidlohr Bueso
2023-01-18 5:53 ` [PATCH v7 3/8] cxl/mem: Trace General Media Event Record Ira Weiny
2023-05-09 21:47 ` Dave Jiang
2023-01-18 5:53 ` [PATCH v7 4/8] cxl/mem: Trace DRAM " Ira Weiny
2023-05-09 21:48 ` Dave Jiang
2023-01-18 5:53 ` Ira Weiny [this message]
2023-05-09 21:52 ` [PATCH v7 5/8] cxl/mem: Trace Memory Module " Dave Jiang
2023-01-18 5:53 ` [PATCH v7 6/8] cxl/test: Add generic mock events Ira Weiny
2023-05-09 22:03 ` Dave Jiang
2023-01-18 5:53 ` [PATCH v7 7/8] cxl/test: Add specific events Ira Weiny
2023-05-09 22:08 ` Dave Jiang
2023-01-18 5:53 ` [PATCH v7 8/8] cxl/test: Simulate event log overflow Ira Weiny
2023-05-09 22:31 ` Dave Jiang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221216-cxl-ev-log-v7-5-2316a5c8f7d8@intel.com \
--to=ira.weiny@intel.com \
--cc=Jonathan.Cameron@huawei.com \
--cc=alison.schofield@intel.com \
--cc=bhelgaas@google.com \
--cc=bwidawsk@kernel.org \
--cc=dan.j.williams@intel.com \
--cc=dave.jiang@intel.com \
--cc=dave@stgolabs.net \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=rostedt@goodmis.org \
--cc=vishal.l.verma@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).