All of lore.kernel.org
 help / color / mirror / Atom feed
From: ira.weiny@intel.com
To: Dan Williams <dan.j.williams@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>,
	Alison Schofield <alison.schofield@intel.com>,
	Vishal Verma <vishal.l.verma@intel.com>,
	Ben Widawsky <bwidawsk@kernel.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	linux-kernel@vger.kernel.org, linux-cxl@vger.kernel.org
Subject: [RFC PATCH 6/9] cxl/mem: Trace Memory Module Event Record
Date: Fri, 12 Aug 2022 22:32:40 -0700	[thread overview]
Message-ID: <20220813053243.757363-7-ira.weiny@intel.com> (raw)
In-Reply-To: <20220813053243.757363-1-ira.weiny@intel.com>

From: Ira Weiny <ira.weiny@intel.com>

CXL v3.0 section 8.2.9.2.1.3 defines the Memory Module Event Record.

Determine if the event read is memory module record and if so trace the
record.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/cxl/core/mbox.c           |  16 +++
 drivers/cxl/cxlmem.h              |  25 +++++
 include/trace/events/cxl-events.h | 155 ++++++++++++++++++++++++++++++
 3 files changed, 196 insertions(+)

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 6414588a3c7b..99b09bfeaff5 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -725,6 +725,14 @@ static const uuid_t dram_event_uuid =
 	UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,
 		  0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24);
 
+/*
+ * Memory Module Event Record
+ * CXL v3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+static const uuid_t mem_mod_event_uuid =
+	UUID_INIT(0xfe927475, 0xdd59, 0x4339,
+		  0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
+
 static void cxl_trace_event_record(const char *dev_name,
 				   enum cxl_event_log_type type,
 				   struct cxl_get_event_payload *payload)
@@ -747,6 +755,14 @@ static void cxl_trace_event_record(const char *dev_name,
 		return;
 	}
 
+	if (uuid_equal(id, &mem_mod_event_uuid)) {
+		struct cxl_evt_mem_mod_rec *rec =
+				(struct cxl_evt_mem_mod_rec *)&payload->record;
+
+		trace_cxl_mem_mod_event(dev_name, type, rec);
+		return;
+	}
+
 	/* For unknown record types print just the header */
 	trace_cxl_event(dev_name, type, &payload->record);
 }
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 50536c0a7850..a02a41dfd988 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -445,6 +445,31 @@ struct cxl_evt_dram_rec {
 	u8 correction_mask[CXL_EVT_DER_CORRECTION_MASK_SIZE];
 } __packed;
 
+/*
+ * Get Health Info Record
+ * CXL v3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+struct cxl_get_health_info {
+	u8 health_status;
+	u8 media_status;
+	u8 add_status;
+	u8 life_used;
+	u16 device_temp;
+	u32 dirty_shutdown_cnt;
+	u32 cor_vol_err_cnt;
+	u32 cor_per_err_cnt;
+} __packed;
+
+/*
+ * Memory Module Event Record
+ * CXL v3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+struct cxl_evt_mem_mod_rec {
+	struct cxl_event_record_hdr hdr;
+	u8 event_type;
+	struct cxl_get_health_info info;
+} __packed;
+
 struct cxl_mbox_get_partition_info {
 	__le64 active_volatile_cap;
 	__le64 active_persistent_cap;
diff --git a/include/trace/events/cxl-events.h b/include/trace/events/cxl-events.h
index db9b34ddd240..dbbe25fee25c 100644
--- a/include/trace/events/cxl-events.h
+++ b/include/trace/events/cxl-events.h
@@ -358,6 +358,161 @@ TRACE_EVENT(cxl_dram_event,
 		)
 );
 
+/*
+ * Memory Module Event Record - MMER
+ *
+ * CXL v2.0 section 8.2.9.1.1.3; Table 156, Table 181
+ *
+ * Device Health Information - DHI; Table 181
+ */
+#define CXL_MMER_HEALTH_STATUS_CHANGE		0x00
+#define CXL_MMER_MEDIA_STATUS_CHANGE		0x01
+#define CXL_MMER_LIFE_USED_CHANGE		0x02
+#define CXL_MMER_TEMP_CHANGE			0x03
+#define CXL_MMER_DATA_PATH_ERROR		0x04
+#define CXL_MMER_LAS_ERROR			0x05
+#define show_dev_evt_type(type)	__print_symbolic(type,			   \
+	{ CXL_MMER_HEALTH_STATUS_CHANGE,	"Health Status Change"	}, \
+	{ CXL_MMER_MEDIA_STATUS_CHANGE,		"Media Status Change"	}, \
+	{ CXL_MMER_LIFE_USED_CHANGE,		"Life Used Change"	}, \
+	{ CXL_MMER_TEMP_CHANGE,			"Temperature Change"	}, \
+	{ CXL_MMER_DATA_PATH_ERROR,		"Data Path Error"	}, \
+	{ CXL_MMER_LAS_ERROR,			"LSA Error"		}  \
+)
+
+#define CXL_DHI_HS_MAINTENANCE_NEEDED				BIT(0)
+#define CXL_DHI_HS_PERFORMANCE_DEGRADED				BIT(1)
+#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED			BIT(2)
+#define show_health_status_flags(flags)	__print_flags(flags, "|",	   \
+	{ CXL_DHI_HS_MAINTENANCE_NEEDED,	"Maintenance Needed"	}, \
+	{ CXL_DHI_HS_PERFORMANCE_DEGRADED,	"Performance Degraded"	}, \
+	{ CXL_DHI_HS_HW_REPLACEMENT_NEEDED,	"Replacement Needed"	}  \
+)
+
+#define CXL_DHI_MS_NORMAL							0x00
+#define CXL_DHI_MS_NOT_READY							0x01
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST					0x02
+#define CXL_DHI_MS_ALL_DATA_LOST						0x03
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS			0x04
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN			0x05
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT				0x06
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS				0x07
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN				0x08
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT					0x09
+#define show_media_status(ms)	__print_symbolic(ms,			   \
+	{ CXL_DHI_MS_NORMAL,						   \
+		"Normal"						}, \
+	{ CXL_DHI_MS_NOT_READY,						   \
+		"Not Ready"						}, \
+	{ CXL_DHI_MS_WRITE_PERSISTENCY_LOST,				   \
+		"Write Persistency Lost"				}, \
+	{ CXL_DHI_MS_ALL_DATA_LOST,					   \
+		"All Data Lost"						}, \
+	{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS,		   \
+		"Write Persistency Loss in the Event of Power Loss"	}, \
+	{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN,		   \
+		"Write Persistency Loss in Event of Shutdown"		}, \
+	{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT,			   \
+		"Write Persistency Loss Imminent"			}, \
+	{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS,		   \
+		"All Data Loss in Event of Power Loss"			}, \
+	{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN,		   \
+		"All Data loss in the Event of Shutdown"		}, \
+	{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT,			   \
+		"All Data Loss Imminent"				}  \
+)
+
+#define CXL_DHI_AS_NORMAL		0x0
+#define CXL_DHI_AS_WARNING		0x1
+#define CXL_DHI_AS_CRITICAL		0x2
+#define show_add_status(as) __print_symbolic(as,	   \
+	{ CXL_DHI_AS_NORMAL,		"Normal"	}, \
+	{ CXL_DHI_AS_WARNING,		"Warning"	}, \
+	{ CXL_DHI_AS_CRITICAL,		"Critical"	}  \
+)
+
+#define CXL_DHI_AS_LIFE_USED(as)			(as & 0x3)
+#define CXL_DHI_AS_DEV_TEMP(as)				((as & 0xC) >> 2)
+#define CXL_DHI_AS_COR_VOL_ERR_CNT(as)			((as & 0x10) >> 4)
+#define CXL_DHI_AS_COR_PER_ERR_CNT(as)			((as & 0x20) >> 5)
+
+TRACE_EVENT(cxl_mem_mod_event,
+
+	TP_PROTO(const char *dev_name, enum cxl_event_log_type log,
+		 struct cxl_evt_mem_mod_rec *rec),
+
+	TP_ARGS(dev_name, log, rec),
+
+	TP_STRUCT__entry(
+		/* Common */
+		__string(dev_name, dev_name)
+		__field(int, log)
+		__array(u8, id, UUID_SIZE)
+		__field(u32, flags)
+		__field(u16, handle)
+		__field(u16, related_handle)
+		__field(u64, timestamp)
+
+		/* Memory Module Event */
+		__field(u8, event_type)
+
+		/* Device Health Info */
+		__field(u8, health_status)
+		__field(u8, media_status)
+		__field(u8, life_used)
+		__field(u32, dirty_shutdown_cnt)
+		__field(u32, cor_vol_err_cnt)
+		__field(u32, cor_per_err_cnt)
+		__field(s16, device_temp)
+		__field(u8, add_status)
+	),
+
+	TP_fast_assign(
+		/* Common */
+		__assign_str(dev_name, dev_name);
+		memcpy(__entry->id, &rec->hdr.id, UUID_SIZE);
+		__entry->log = log;
+		__entry->flags = le32_to_cpu(rec->hdr.flags_length) >> 8;
+		__entry->handle = le16_to_cpu(rec->hdr.handle);
+		__entry->related_handle = le16_to_cpu(rec->hdr.related_handle);
+		__entry->timestamp = le64_to_cpu(rec->hdr.timestamp);
+
+		/* Memory Module Event */
+		__entry->event_type = rec->event_type;
+
+		/* Device Health Info */
+		__entry->health_status = rec->info.health_status;
+		__entry->media_status = rec->info.media_status;
+		__entry->life_used = rec->info.life_used;
+		__entry->dirty_shutdown_cnt = le32_to_cpu(rec->info.dirty_shutdown_cnt);
+		__entry->cor_vol_err_cnt = le32_to_cpu(rec->info.cor_vol_err_cnt);
+		__entry->cor_per_err_cnt = le32_to_cpu(rec->info.cor_per_err_cnt);
+		__entry->device_temp = le16_to_cpu(rec->info.device_temp);
+		__entry->add_status = rec->info.add_status;
+	),
+
+	TP_printk("%s: %s time=%llu id=%pUl handle=%x related_handle=%x hdr_flags='%s': " \
+		  "evt_type='%s' health_status='%s' media_status='%s' as_life_used=%s " \
+		  "as_dev_temp=%s as_cor_vol_err_cnt=%s as_cor_per_err_cnt=%s " \
+		  "life_used=%u dev_temp=%d dirty_shutdown_cnt=%u cor_vol_err_cnt=%u " \
+		  "cor_per_err_cnt=%u",
+		__get_str(dev_name), show_log_type(__entry->log),
+		__entry->timestamp, __entry->id, __entry->handle,
+		__entry->related_handle, show_hdr_flags(__entry->flags),
+
+		show_dev_evt_type(__entry->event_type),
+		show_health_status_flags(__entry->health_status),
+		show_media_status(__entry->media_status),
+		show_add_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)),
+		show_add_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)),
+		show_add_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)),
+		show_add_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)),
+		__entry->life_used, __entry->device_temp,
+		__entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt,
+		__entry->cor_per_err_cnt)
+);
+
+
 #endif /* _CXL_TRACE_EVENTS_H */
 
 /* This part must be outside protection */
-- 
2.35.3


  parent reply	other threads:[~2022-08-13  5:33 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-13  5:32 [RFC PATCH 0/9] CXL: Read and clear event logs ira.weiny
2022-08-13  5:32 ` [RFC PATCH 1/9] cxl/mem: Implement Get Event Records command ira.weiny
2022-08-16 16:39   ` Steven Rostedt
2022-08-16 16:41     ` Steven Rostedt
2022-08-16 23:11       ` Ira Weiny
2022-08-16 23:35     ` Ira Weiny
2022-08-17 22:54   ` Dave Jiang
2022-09-07  4:53     ` Ira Weiny
2022-08-24 15:50   ` Jonathan Cameron
2022-09-07  4:28     ` Ira Weiny
2022-09-08 12:52       ` Jonathan Cameron
2022-09-09 20:53         ` Ira Weiny
2022-09-20 15:49           ` Jonathan Cameron
2022-09-20 20:23             ` Dave Jiang
2022-09-20 22:10               ` Ira Weiny
2022-09-21 16:36                 ` Jonathan Cameron
2022-09-22  4:16                   ` Ira Weiny
2022-08-13  5:32 ` [RFC PATCH 2/9] cxl/mem: Implement Clear " ira.weiny
2022-08-24 15:55   ` Jonathan Cameron
2022-09-09 21:35     ` Ira Weiny
2022-08-13  5:32 ` [RFC PATCH 3/9] cxl/mem: Clear events on driver load ira.weiny
2022-08-24 15:57   ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 4/9] cxl/mem: Trace General Media Event Record ira.weiny
2022-08-24 16:11   ` Jonathan Cameron
2022-09-12 22:38     ` Ira Weiny
2022-09-20 15:52       ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 5/9] cxl/mem: Trace DRAM " ira.weiny
2022-08-25 10:46   ` Jonathan Cameron
2022-09-12 23:04     ` Ira Weiny
2022-09-20 16:02       ` Jonathan Cameron
2022-08-13  5:32 ` ira.weiny [this message]
2022-08-25 10:58   ` [RFC PATCH 6/9] cxl/mem: Trace Memory Module " Jonathan Cameron
2022-09-14 21:17     ` Ira Weiny
2022-09-20 16:11       ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 7/9] cxl/test: Add generic mock events ira.weiny
2022-08-25 11:31   ` Jonathan Cameron
2022-09-15 18:53     ` Ira Weiny
2022-09-20 16:17       ` Jonathan Cameron
2022-09-26 21:39         ` Ira Weiny
2022-09-27 13:56           ` Jonathan Cameron
2022-09-27 16:13             ` Ira Weiny
2022-09-28  9:49               ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 8/9] cxl/test: Add specific events ira.weiny
2022-08-25 11:37   ` Jonathan Cameron
2022-08-13  5:32 ` [RFC PATCH 9/9] cxl/test: Simulate event log overflow ira.weiny
2022-08-16 16:44   ` Steven Rostedt
2022-08-22 16:18 ` [RFC PATCH 0/9] CXL: Read and clear event logs Davidlohr Bueso
2022-08-22 22:53   ` Ira Weiny
2022-08-23 16:12     ` Davidlohr Bueso
2022-08-24 10:07     ` Jonathan Cameron
2022-09-01 18:10       ` Dave Jiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220813053243.757363-7-ira.weiny@intel.com \
    --to=ira.weiny@intel.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=alison.schofield@intel.com \
    --cc=bwidawsk@kernel.org \
    --cc=dan.j.williams@intel.com \
    --cc=dave@stgolabs.net \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=vishal.l.verma@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.