linux-cxl.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/7] rasdaemon: Process the generic CXL trace events
@ 2023-04-12  8:33 shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 1/7] rasdaemon: Add common function to convert timestamp in the CXL event records to the broken-down time format shiju.jose
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: shiju.jose @ 2023-04-12  8:33 UTC (permalink / raw)
  To: mchehab, linux-cxl, linux-edac; +Cc: jonathan.cameron, linuxarm, shiju.jose

From: Shiju Jose <shiju.jose@huawei.com>

Log and record the following generic CXL error records reported through
the kernel trace events.
- cxl_overflow
- cxl_generic_event
- cxl_general_media
- cxl_dram
- cxl_memory_module

Shiju Jose (7):
  rasdaemon: Add common function to convert timestamp in the CXL event
    records to the broken-down time format
  rasdaemon: Add common function to get timestamp for the event
  rasdaemon: Add support for the CXL overflow events
  rasdaemon: Add support for the CXL generic events
  rasdaemon: Add support for the CXL general media events
  rasdaemon: Add support for the CXL dram events
  rasdaemon: Add support for the CXL memory module events

 ras-cxl-handler.c | 826 +++++++++++++++++++++++++++++++++++++++++++---
 ras-cxl-handler.h |  15 +
 ras-events.c      |  45 +++
 ras-events.h      |   5 +
 ras-record.c      | 420 +++++++++++++++++++++++
 ras-record.h      |  99 ++++++
 ras-report.c      | 476 ++++++++++++++++++++++++++
 ras-report.h      |  10 +
 8 files changed, 1853 insertions(+), 43 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC PATCH 1/7] rasdaemon: Add common function to convert timestamp in the CXL event records to the broken-down time format
  2023-04-12  8:33 [RFC PATCH 0/7] rasdaemon: Process the generic CXL trace events shiju.jose
@ 2023-04-12  8:33 ` shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 2/7] rasdaemon: Add common function to get timestamp for the event shiju.jose
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: shiju.jose @ 2023-04-12  8:33 UTC (permalink / raw)
  To: mchehab, linux-cxl, linux-edac; +Cc: jonathan.cameron, linuxarm, shiju.jose

From: Shiju Jose <shiju.jose@huawei.com>

Add common function to convert the timestamp in the CXL event records
in nanoseconds to the broken-down time format.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 ras-cxl-handler.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c
index adc2fa3..ad93558 100644
--- a/ras-cxl-handler.c
+++ b/ras-cxl-handler.c
@@ -23,6 +23,25 @@
 #include "ras-report.h"
 #include <endian.h>
 
+/* Common Functions */
+static void convert_timestamp(unsigned long long ts, char *ts_ptr, uint16_t size)
+{
+	/* CXL Specification 3.0
+	 * Overflow timestamp - The number of unsigned nanoseconds
+	 * that have elapsed since midnight, 01-Jan-1970 UTC
+	 */
+	time_t ts_secs = ts / 1000000000ULL;
+	struct tm *tm;
+
+	tm = localtime(&ts_secs);
+	if (tm)
+		strftime(ts_ptr, size, "%Y-%m-%d %H:%M:%S %z", tm);
+
+	if (!ts || !tm)
+		strncpy(ts_ptr, "1970-01-01 00:00:00 +0000",
+			size);
+}
+
 /* Poison List: Payload out flags */
 #define CXL_POISON_FLAG_MORE            BIT(0)
 #define CXL_POISON_FLAG_OVERFLOW        BIT(1)
@@ -160,22 +179,7 @@ int ras_cxl_poison_event_handler(struct trace_seq *s,
 	if (ev.flags & CXL_POISON_FLAG_OVERFLOW) {
 		if (tep_get_field_val(s,  event, "overflow_t", record, &val, 1) < 0)
 			return -1;
-		if (val) {
-			/* CXL Specification 3.0
-			 * Overflow timestamp - The number of unsigned nanoseconds
-			 * that have elapsed since midnight, 01-Jan-1970 UTC
-			 */
-			time_t ovf_ts_secs = val / 1000000000ULL;
-
-			tm = localtime(&ovf_ts_secs);
-			if (tm) {
-				strftime(ev.overflow_ts, sizeof(ev.overflow_ts),
-					 "%Y-%m-%d %H:%M:%S %z", tm);
-			}
-		}
-		if (!val || !tm)
-			strncpy(ev.overflow_ts, "1970-01-01 00:00:00 +0000",
-				sizeof(ev.overflow_ts));
+		convert_timestamp(val, ev.overflow_ts, sizeof(ev.overflow_ts));
 	} else
 		strncpy(ev.overflow_ts, "1970-01-01 00:00:00 +0000", sizeof(ev.overflow_ts));
 	if (trace_seq_printf(s, "overflow timestamp:%s\n", ev.overflow_ts) <= 0)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 2/7] rasdaemon: Add common function to get timestamp for the event
  2023-04-12  8:33 [RFC PATCH 0/7] rasdaemon: Process the generic CXL trace events shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 1/7] rasdaemon: Add common function to convert timestamp in the CXL event records to the broken-down time format shiju.jose
@ 2023-04-12  8:33 ` shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 3/7] rasdaemon: Add support for the CXL overflow events shiju.jose
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: shiju.jose @ 2023-04-12  8:33 UTC (permalink / raw)
  To: mchehab, linux-cxl, linux-edac; +Cc: jonathan.cameron, linuxarm, shiju.jose

From: Shiju Jose <shiju.jose@huawei.com>

Add common function to get the timestamp for the event
reported.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 ras-cxl-handler.c | 44 +++++++++++++++++---------------------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c
index ad93558..025e582 100644
--- a/ras-cxl-handler.c
+++ b/ras-cxl-handler.c
@@ -42,6 +42,20 @@ static void convert_timestamp(unsigned long long ts, char *ts_ptr, uint16_t size
 			size);
 }
 
+static void get_timestamp(struct trace_seq *s, struct tep_record *record,
+			  struct ras_events *ras, char *ts_ptr, uint16_t size)
+{
+	time_t now;
+	struct tm *tm;
+
+	now = record->ts / user_hz + ras->uptime_diff;
+	tm = localtime(&now);
+	if (tm)
+		strftime(ts_ptr, size, "%Y-%m-%d %H:%M:%S %z", tm);
+	else
+		strncpy(ts_ptr, "1970-01-01 00:00:00 +0000", size);
+}
+
 /* Poison List: Payload out flags */
 #define CXL_POISON_FLAG_MORE            BIT(0)
 #define CXL_POISON_FLAG_OVERFLOW        BIT(1)
@@ -68,17 +82,9 @@ int ras_cxl_poison_event_handler(struct trace_seq *s,
 	int len;
 	unsigned long long val;
 	struct ras_events *ras = context;
-	time_t now;
-	struct tm *tm;
 	struct ras_cxl_poison_event ev;
 
-	now = record->ts / user_hz + ras->uptime_diff;
-	tm = localtime(&now);
-	if (tm)
-		strftime(ev.timestamp, sizeof(ev.timestamp),
-			 "%Y-%m-%d %H:%M:%S %z", tm);
-	else
-		strncpy(ev.timestamp, "1970-01-01 00:00:00 +0000", sizeof(ev.timestamp));
+	get_timestamp(s, record, ras, (char *)&ev.timestamp, sizeof(ev.timestamp));
 	if (trace_seq_printf(s, "%s ", ev.timestamp) <= 0)
 		return -1;
 
@@ -277,19 +283,11 @@ int ras_cxl_aer_ue_event_handler(struct trace_seq *s,
 {
 	int len, i;
 	unsigned long long val;
-	time_t now;
-	struct tm *tm;
 	struct ras_events *ras = context;
 	struct ras_cxl_aer_ue_event ev;
 
 	memset(&ev, 0, sizeof(ev));
-	now = record->ts / user_hz + ras->uptime_diff;
-	tm = localtime(&now);
-	if (tm)
-		strftime(ev.timestamp, sizeof(ev.timestamp),
-			 "%Y-%m-%d %H:%M:%S %z", tm);
-	else
-		strncpy(ev.timestamp, "1970-01-01 00:00:00 +0000", sizeof(ev.timestamp));
+	get_timestamp(s, record, ras, (char *)&ev.timestamp, sizeof(ev.timestamp));
 	if (trace_seq_printf(s, "%s ", ev.timestamp) <= 0)
 		return -1;
 
@@ -372,18 +370,10 @@ int ras_cxl_aer_ce_event_handler(struct trace_seq *s,
 {
 	int len;
 	unsigned long long val;
-	time_t now;
-	struct tm *tm;
 	struct ras_events *ras = context;
 	struct ras_cxl_aer_ce_event ev;
 
-	now = record->ts / user_hz + ras->uptime_diff;
-	tm = localtime(&now);
-	if (tm)
-		strftime(ev.timestamp, sizeof(ev.timestamp),
-			 "%Y-%m-%d %H:%M:%S %z", tm);
-	else
-		strncpy(ev.timestamp, "1970-01-01 00:00:00 +0000", sizeof(ev.timestamp));
+	get_timestamp(s, record, ras, (char *)&ev.timestamp, sizeof(ev.timestamp));
 	if (trace_seq_printf(s, "%s ", ev.timestamp) <= 0)
 		return -1;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 3/7] rasdaemon: Add support for the CXL overflow events
  2023-04-12  8:33 [RFC PATCH 0/7] rasdaemon: Process the generic CXL trace events shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 1/7] rasdaemon: Add common function to convert timestamp in the CXL event records to the broken-down time format shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 2/7] rasdaemon: Add common function to get timestamp for the event shiju.jose
@ 2023-04-12  8:33 ` shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 4/7] rasdaemon: Add support for the CXL generic events shiju.jose
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: shiju.jose @ 2023-04-12  8:33 UTC (permalink / raw)
  To: mchehab, linux-cxl, linux-edac; +Cc: jonathan.cameron, linuxarm, shiju.jose

From: Shiju Jose <shiju.jose@huawei.com>

Add support to log and record the CXL overflow events.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 ras-cxl-handler.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 ras-cxl-handler.h |  3 ++
 ras-events.c      |  9 +++++
 ras-events.h      |  1 +
 ras-record.c      | 69 +++++++++++++++++++++++++++++++++
 ras-record.h      | 15 ++++++++
 ras-report.c      | 77 +++++++++++++++++++++++++++++++++++++
 ras-report.h      |  2 +
 8 files changed, 274 insertions(+)

diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c
index 025e582..b08c5e3 100644
--- a/ras-cxl-handler.c
+++ b/ras-cxl-handler.c
@@ -418,3 +418,101 @@ int ras_cxl_aer_ce_event_handler(struct trace_seq *s,
 
 	return 0;
 }
+
+/*
+ * CXL rev 3.0 section 8.2.9.2.2; Table 8-49
+ */
+enum cxl_event_log_type {
+	CXL_EVENT_TYPE_INFO = 0x00,
+	CXL_EVENT_TYPE_WARN,
+	CXL_EVENT_TYPE_FAIL,
+	CXL_EVENT_TYPE_FATAL,
+	CXL_EVENT_TYPE_UNKNOWN
+};
+
+static char *cxl_event_log_type_str(uint32_t log_type)
+{
+
+	switch (log_type) {
+	case CXL_EVENT_TYPE_INFO:
+		return "Informational";
+	case CXL_EVENT_TYPE_WARN:
+		return "Warning";
+	case CXL_EVENT_TYPE_FAIL:
+		return "Failure";
+	case CXL_EVENT_TYPE_FATAL:
+		return "Fatal";
+	default:
+		break;
+	}
+
+	return "Unknown";
+}
+
+int ras_cxl_overflow_event_handler(struct trace_seq *s,
+				   struct tep_record *record,
+				   struct tep_event *event, void *context)
+{
+	int len;
+	unsigned long long val;
+	struct ras_events *ras = context;
+	struct ras_cxl_overflow_event ev;
+
+	memset(&ev, 0, sizeof(ev));
+	get_timestamp(s, record, ras, (char *)&ev.timestamp, sizeof(ev.timestamp));
+	if (trace_seq_printf(s, "%s ", ev.timestamp) <= 0)
+		return -1;
+
+	ev.memdev = tep_get_field_raw(s, event, "memdev", record, &len, 1);
+	if (!ev.memdev)
+		return -1;
+	if (trace_seq_printf(s, "memdev:%s ", ev.memdev) <= 0)
+		return -1;
+
+	ev.host = tep_get_field_raw(s, event, "host", record, &len, 1);
+	if (!ev.host)
+		return -1;
+	if (trace_seq_printf(s, "host:%s ", ev.host) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "serial", record, &val, 1) < 0)
+		return -1;
+	ev.serial = val;
+	if (trace_seq_printf(s, "serial:0x%llx ", (unsigned long long)ev.serial) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "log", record, &val, 1) < 0)
+		return -1;
+	ev.log_type = cxl_event_log_type_str(val);
+	if (trace_seq_printf(s, "log type:%s ", ev.log_type) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "count", record, &val, 1) < 0)
+		return -1;
+	ev.count = val;
+
+	if (tep_get_field_val(s,  event, "first_ts", record, &val, 1) < 0)
+		return -1;
+	convert_timestamp(val, ev.first_ts, sizeof(ev.first_ts));
+
+	if (tep_get_field_val(s,  event, "last_ts", record, &val, 1) < 0)
+		return -1;
+	convert_timestamp(val, ev.last_ts, sizeof(ev.last_ts));
+
+	if (ev.count) {
+		if (trace_seq_printf(s, "%u errors from %s to %s\n",
+				     ev.count, ev.first_ts, ev.last_ts) <= 0)
+			return -1;
+	}
+	/* Insert data into the SGBD */
+#ifdef HAVE_SQLITE3
+	ras_store_cxl_overflow_event(ras, &ev);
+#endif
+
+#ifdef HAVE_ABRT_REPORT
+	/* Report event to ABRT */
+	ras_report_cxl_overflow_event(ras, &ev);
+#endif
+
+	return 0;
+}
diff --git a/ras-cxl-handler.h b/ras-cxl-handler.h
index 711daf4..e7847ec 100644
--- a/ras-cxl-handler.h
+++ b/ras-cxl-handler.h
@@ -29,4 +29,7 @@ int ras_cxl_aer_ue_event_handler(struct trace_seq *s,
 int ras_cxl_aer_ce_event_handler(struct trace_seq *s,
 				 struct tep_record *record,
 				 struct tep_event *event, void *context);
+int ras_cxl_overflow_event_handler(struct trace_seq *s,
+				   struct tep_record *record,
+				   struct tep_event *event, void *context);
 #endif
diff --git a/ras-events.c b/ras-events.c
index 716317b..ded8648 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -248,6 +248,7 @@ int toggle_ras_mc_event(int enable)
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_poison", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_aer_uncorrectable_error", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_aer_correctable_error", enable);
+	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_overflow", enable);
 #endif
 
 free_ras:
@@ -1004,6 +1005,14 @@ int handle_ras_events(int record_events)
 	else
 		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
 		    "cxl", "cxl_aer_correctable_error");
+
+	rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_overflow",
+			       ras_cxl_overflow_event_handler, NULL, CXL_OVERFLOW_EVENT);
+	if (!rc)
+		num_events++;
+	else
+		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
+		    "cxl", "cxl_overflow");
 #endif
 
 	if (!num_events) {
diff --git a/ras-events.h b/ras-events.h
index dc7bdfb..ea590c9 100644
--- a/ras-events.h
+++ b/ras-events.h
@@ -42,6 +42,7 @@ enum {
 	CXL_POISON_EVENT,
 	CXL_AER_UE_EVENT,
 	CXL_AER_CE_EVENT,
+	CXL_OVERFLOW_EVENT,
 	NR_EVENTS
 };
 
diff --git a/ras-record.c b/ras-record.c
index 82e310b..57fe117 100644
--- a/ras-record.c
+++ b/ras-record.c
@@ -720,6 +720,59 @@ int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_eve
 
 	return rc;
 }
+
+/*
+ * Table and functions to handle cxl:cxl_overflow
+ */
+static const struct db_fields cxl_overflow_event_fields[] = {
+	{ .name = "id",			.type = "INTEGER PRIMARY KEY" },
+	{ .name = "timestamp",		.type = "TEXT" },
+	{ .name = "memdev",		.type = "TEXT" },
+	{ .name = "host",		.type = "TEXT" },
+	{ .name = "serial",		.type = "INTEGER" },
+	{ .name = "log_type",		.type = "TEXT" },
+	{ .name = "count",		.type = "INTEGER" },
+	{ .name = "first_ts",		.type = "TEXT" },
+	{ .name = "last_ts",		.type = "TEXT" },
+};
+
+static const struct db_table_descriptor cxl_overflow_event_tab = {
+	.name = "cxl_overflow_event",
+	.fields = cxl_overflow_event_fields,
+	.num_fields = ARRAY_SIZE(cxl_overflow_event_fields),
+};
+
+int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev)
+{
+	int rc;
+	struct sqlite3_priv *priv = ras->db_priv;
+
+	if (!priv || !priv->stmt_cxl_overflow_event)
+		return 0;
+	log(TERM, LOG_INFO, "cxl_overflow_event store: %p\n", priv->stmt_cxl_overflow_event);
+
+	sqlite3_bind_text(priv->stmt_cxl_overflow_event, 1, ev->timestamp, -1, NULL);
+	sqlite3_bind_text(priv->stmt_cxl_overflow_event, 2, ev->memdev, -1, NULL);
+	sqlite3_bind_text(priv->stmt_cxl_overflow_event, 3, ev->host, -1, NULL);
+	sqlite3_bind_int64(priv->stmt_cxl_overflow_event, 4, ev->serial);
+	sqlite3_bind_text(priv->stmt_cxl_overflow_event, 5, ev->log_type, -1, NULL);
+	sqlite3_bind_int(priv->stmt_cxl_overflow_event, 6, ev->count);
+	sqlite3_bind_text(priv->stmt_cxl_overflow_event, 7, ev->first_ts, -1, NULL);
+	sqlite3_bind_text(priv->stmt_cxl_overflow_event, 8, ev->last_ts, -1, NULL);
+
+	rc = sqlite3_step(priv->stmt_cxl_overflow_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed to do cxl_overflow_event step on sqlite: error = %d\n", rc);
+	rc = sqlite3_reset(priv->stmt_cxl_overflow_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed reset cxl_overflow_event on sqlite: error = %d\n",
+		    rc);
+	log(TERM, LOG_INFO, "register inserted at db\n");
+
+	return rc;
+}
 #endif
 
 /*
@@ -1083,6 +1136,14 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras)
 		if (rc != SQLITE_OK)
 			goto error;
 	}
+
+	rc = ras_mc_create_table(priv, &cxl_overflow_event_tab);
+	if (rc == SQLITE_OK) {
+		rc = ras_mc_prepare_stmt(priv, &priv->stmt_cxl_overflow_event,
+					 &cxl_overflow_event_tab);
+		if (rc != SQLITE_OK)
+			goto error;
+	}
 #endif
 
 	ras->db_priv = priv;
@@ -1221,6 +1282,14 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras)
 			    "cpu %u: Failed to finalize cxl_aer_ce_event sqlite: error = %d\n",
 			    cpu, rc);
 	}
+
+	if (priv->stmt_cxl_overflow_event) {
+		rc = sqlite3_finalize(priv->stmt_cxl_overflow_event);
+		if (rc != SQLITE_OK)
+			log(TERM, LOG_ERR,
+			    "cpu %u: Failed to finalize cxl_overflow_event sqlite: error = %d\n",
+			    cpu, rc);
+	}
 #endif
 
 	rc = sqlite3_close_v2(db);
diff --git a/ras-record.h b/ras-record.h
index ab7153d..90db6ad 100644
--- a/ras-record.h
+++ b/ras-record.h
@@ -152,6 +152,17 @@ struct ras_cxl_aer_ce_event {
 	uint32_t error_status;
 };
 
+struct ras_cxl_overflow_event {
+	char timestamp[64];
+	const char *memdev;
+	const char *host;
+	uint64_t serial;
+	const char *log_type;
+	char first_ts[64];
+	char last_ts[64];
+	uint16_t count;
+};
+
 struct ras_mc_event;
 struct ras_aer_event;
 struct ras_extlog_event;
@@ -164,6 +175,7 @@ struct ras_mf_event;
 struct ras_cxl_poison_event;
 struct ras_cxl_aer_ue_event;
 struct ras_cxl_aer_ce_event;
+struct ras_cxl_overflow_event;
 
 #ifdef HAVE_SQLITE3
 
@@ -200,6 +212,7 @@ struct sqlite3_priv {
 	sqlite3_stmt	*stmt_cxl_poison_event;
 	sqlite3_stmt	*stmt_cxl_aer_ue_event;
 	sqlite3_stmt	*stmt_cxl_aer_ce_event;
+	sqlite3_stmt	*stmt_cxl_overflow_event;
 #endif
 };
 
@@ -231,6 +244,7 @@ int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev);
 int ras_store_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev);
 int ras_store_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_event *ev);
 int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev);
+int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev);
 
 #else
 static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; };
@@ -247,6 +261,7 @@ static inline int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event
 static inline int ras_store_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev) { return 0; };
 static inline int ras_store_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_event *ev) { return 0; };
 static inline int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev) { return 0; };
+static inline int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; };
 
 #endif
 
diff --git a/ras-report.c b/ras-report.c
index 63b47f5..dbed454 100644
--- a/ras-report.c
+++ b/ras-report.c
@@ -421,6 +421,36 @@ static int set_cxl_aer_ce_event_backtrace(char *buf, struct ras_cxl_aer_ce_event
 	return 0;
 }
 
+static int set_cxl_overflow_event_backtrace(char *buf, struct ras_cxl_overflow_event *ev)
+{
+	char bt_buf[MAX_BACKTRACE_SIZE];
+
+	if (!buf || !ev)
+		return -1;
+
+	sprintf(bt_buf, "BACKTRACE="	\
+						"timestamp=%s\n"	\
+						"memdev=%s\n"		\
+						"host=%s\n"		\
+						"serial=0x%lx\n"	\
+						"log_type=%s\n"		\
+						"count=%u\n"		\
+						"first_ts=%s\n"		\
+						"last_ts=%s\n",		\
+						ev->timestamp,		\
+						ev->memdev,		\
+						ev->host,		\
+						ev->serial,		\
+						ev->log_type,		\
+						ev->count,		\
+						ev->first_ts,		\
+						ev->last_ts);
+
+	strcat(buf, bt_buf);
+
+	return 0;
+}
+
 static int commit_report_backtrace(int sockfd, int type, void *ev){
 	char buf[MAX_BACKTRACE_SIZE];
 	char *pbuf = buf;
@@ -467,6 +497,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){
 	case CXL_AER_CE_EVENT:
 		rc = set_cxl_aer_ce_event_backtrace(buf, (struct ras_cxl_aer_ce_event *)ev);
 		break;
+	case CXL_OVERFLOW_EVENT:
+		rc = set_cxl_overflow_event_backtrace(buf, (struct ras_cxl_overflow_event *)ev);
+		break;
 	default:
 		return -1;
 	}
@@ -1007,3 +1040,47 @@ cxl_aer_ce_fail:
 	else
 		return -1;
 }
+
+int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev)
+{
+	char buf[MAX_MESSAGE_SIZE];
+	int sockfd = 0;
+	int done = 0;
+	int rc = -1;
+
+	memset(buf, 0, sizeof(buf));
+
+	sockfd = setup_report_socket();
+	if (sockfd < 0)
+		return -1;
+
+	rc = commit_report_basic(sockfd);
+	if (rc < 0)
+		goto cxl_overflow_fail;
+
+	rc = commit_report_backtrace(sockfd, CXL_OVERFLOW_EVENT, ev);
+	if (rc < 0)
+		goto cxl_overflow_fail;
+
+	sprintf(buf, "ANALYZER=%s", "rasdaemon-cxl-overflow");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_overflow_fail;
+
+	sprintf(buf, "REASON=%s", "CXL overflow");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_overflow_fail;
+
+	done = 1;
+
+cxl_overflow_fail:
+
+	if (sockfd >= 0)
+		close(sockfd);
+
+	if (done)
+		return 0;
+	else
+		return -1;
+}
diff --git a/ras-report.h b/ras-report.h
index 46155ee..204d485 100644
--- a/ras-report.h
+++ b/ras-report.h
@@ -42,6 +42,7 @@ int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev);
 int ras_report_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev);
 int ras_report_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_event *ev);
 int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev);
+int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev);
 
 #else
 
@@ -56,6 +57,7 @@ static inline int ras_report_mf_event(struct ras_events *ras, struct ras_mf_even
 static inline int ras_report_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev) { return 0; };
 static inline int ras_report_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_event *ev) { return 0; };
 static inline int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev) { return 0; };
+static inline int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; };
 
 #endif
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 4/7] rasdaemon: Add support for the CXL generic events
  2023-04-12  8:33 [RFC PATCH 0/7] rasdaemon: Process the generic CXL trace events shiju.jose
                   ` (2 preceding siblings ...)
  2023-04-12  8:33 ` [RFC PATCH 3/7] rasdaemon: Add support for the CXL overflow events shiju.jose
@ 2023-04-12  8:33 ` shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 5/7] rasdaemon: Add support for the CXL general media events shiju.jose
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: shiju.jose @ 2023-04-12  8:33 UTC (permalink / raw)
  To: mchehab, linux-cxl, linux-edac; +Cc: jonathan.cameron, linuxarm, shiju.jose

From: Shiju Jose <shiju.jose@huawei.com>

Add support to log and record the CXL generic events.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 ras-cxl-handler.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++
 ras-cxl-handler.h |   3 +
 ras-events.c      |   9 +++
 ras-events.h      |   1 +
 ras-record.c      |  89 ++++++++++++++++++++++
 ras-record.h      |  25 +++++++
 ras-report.c      |  86 +++++++++++++++++++++
 ras-report.h      |   2 +
 8 files changed, 400 insertions(+)

diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c
index b08c5e3..59f87c0 100644
--- a/ras-cxl-handler.c
+++ b/ras-cxl-handler.c
@@ -56,6 +56,49 @@ static void get_timestamp(struct trace_seq *s, struct tep_record *record,
 		strncpy(ts_ptr, "1970-01-01 00:00:00 +0000", size);
 }
 
+struct cxl_event_flags {
+	uint32_t bit;
+	const char *flag;
+};
+
+static int decode_cxl_event_flags(struct trace_seq *s, uint32_t flags,
+				  const struct cxl_event_flags *cxl_ev_flags,
+				  uint8_t num_elems)
+{
+	int i;
+
+	for (i = 0; i < num_elems; i++) {
+		if (flags & cxl_ev_flags[i].bit)
+			if (trace_seq_printf(s, "\'%s\' ", cxl_ev_flags[i].flag) <= 0)
+				return -1;
+	}
+	return 0;
+}
+
+static char *uuid_be(const char *uu)
+{
+	static char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
+	char *p = uuid;
+	int i;
+	static const unsigned char be[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+	for (i = 0; i < 16; i++) {
+		p += sprintf(p, "%.2x", (unsigned char) uu[be[i]]);
+		switch (i) {
+		case 3:
+		case 5:
+		case 7:
+		case 9:
+			*p++ = '-';
+			break;
+		}
+	}
+
+	*p = 0;
+
+	return uuid;
+}
+
 /* Poison List: Payload out flags */
 #define CXL_POISON_FLAG_MORE            BIT(0)
 #define CXL_POISON_FLAG_OVERFLOW        BIT(1)
@@ -516,3 +559,145 @@ int ras_cxl_overflow_event_handler(struct trace_seq *s,
 
 	return 0;
 }
+
+/*
+ * Common Event Record Format
+ * CXL 3.0 section 8.2.9.2.1; Table 8-42
+ */
+#define CXL_EVENT_RECORD_FLAG_PERMANENT		BIT(2)
+#define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED	BIT(3)
+#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED	BIT(4)
+#define CXL_EVENT_RECORD_FLAG_HW_REPLACE	BIT(5)
+
+static const struct  cxl_event_flags cxl_hdr_flags[] = {
+	{ .bit = CXL_EVENT_RECORD_FLAG_PERMANENT, .flag = "PERMANENT_CONDITION" },
+	{ .bit = CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, .flag = "MAINTENANCE_NEEDED" },
+	{ .bit = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, .flag = "PERFORMANCE_DEGRADED" },
+	{ .bit = CXL_EVENT_RECORD_FLAG_HW_REPLACE, .flag = "HARDWARE_REPLACEMENT_NEEDED" },
+};
+
+static int handle_ras_cxl_common_hdr(struct trace_seq *s,
+				     struct tep_record *record,
+				     struct tep_event *event, void *context,
+				     struct ras_cxl_event_common_hdr *hdr)
+{
+	int len;
+	unsigned long long val;
+	struct ras_events *ras = context;
+
+	get_timestamp(s, record, ras, (char *)&hdr->timestamp, sizeof(hdr->timestamp));
+	if (trace_seq_printf(s, "%s ", hdr->timestamp) <= 0)
+		return -1;
+
+	hdr->memdev = tep_get_field_raw(s, event, "memdev", record, &len, 1);
+	if (!hdr->memdev)
+		return -1;
+	if (trace_seq_printf(s, "memdev:%s ", hdr->memdev) <= 0)
+		return -1;
+
+	hdr->host = tep_get_field_raw(s, event, "host", record, &len, 1);
+	if (!hdr->host)
+		return -1;
+	if (trace_seq_printf(s, "host:%s ", hdr->host) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "serial", record, &val, 1) < 0)
+		return -1;
+	hdr->serial = val;
+	if (trace_seq_printf(s, "serial:0x%llx ", (unsigned long long)hdr->serial) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "log", record, &val, 1) < 0)
+		return -1;
+	hdr->log_type = cxl_event_log_type_str(val);
+	if (trace_seq_printf(s, "log type:%s ", hdr->log_type) <= 0)
+		return -1;
+
+	hdr->hdr_uuid = tep_get_field_raw(s, event, "hdr_uuid", record, &len, 1);
+	if (!hdr->hdr_uuid)
+		return -1;
+	hdr->hdr_uuid = uuid_be(hdr->hdr_uuid);
+	if (trace_seq_printf(s, "hdr_uuid:%s ", hdr->hdr_uuid) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "hdr_flags", record, &val, 1) < 0)
+		return -1;
+	hdr->hdr_flags = val;
+	if (decode_cxl_event_flags(s, hdr->hdr_flags, cxl_hdr_flags,
+				   ARRAY_SIZE(cxl_hdr_flags)) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "hdr_handle", record, &val, 1) < 0)
+		return -1;
+	hdr->hdr_handle = val;
+	if (trace_seq_printf(s, "hdr_handle:0x%x ", hdr->hdr_handle) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "hdr_related_handle", record, &val, 1) < 0)
+		return -1;
+	hdr->hdr_related_handle = val;
+	if (trace_seq_printf(s, "hdr_related_handle:0x%x ", hdr->hdr_related_handle) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "hdr_timestamp", record, &val, 1) < 0)
+		return -1;
+	convert_timestamp(val, hdr->hdr_timestamp, sizeof(hdr->hdr_timestamp));
+	if (trace_seq_printf(s, "hdr_timestamp:%s ", hdr->hdr_timestamp) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "hdr_length", record, &val, 1) < 0)
+		return -1;
+	hdr->hdr_length = val;
+	if (trace_seq_printf(s, "hdr_length:%u ", hdr->hdr_length) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "hdr_maint_op_class", record, &val, 1) < 0)
+		return -1;
+	hdr->hdr_maint_op_class = val;
+	if (trace_seq_printf(s, "hdr_maint_op_class:%u ", hdr->hdr_maint_op_class) <= 0)
+		return -1;
+
+	return 0;
+}
+
+int ras_cxl_generic_event_handler(struct trace_seq *s,
+				  struct tep_record *record,
+				  struct tep_event *event, void *context)
+{
+	int len, i;
+	struct ras_events *ras = context;
+	struct ras_cxl_generic_event ev;
+	const uint8_t *buf;
+
+	memset(&ev, 0, sizeof(ev));
+	if (handle_ras_cxl_common_hdr(s, record, event, context, &ev.hdr) < 0)
+		return -1;
+
+	ev.data = tep_get_field_raw(s, event, "data", record, &len, 1);
+	if (!ev.data)
+		return -1;
+	i = 0;
+	buf = ev.data;
+	if (trace_seq_printf(s, "\ndata:\n  %08x: ", i) <= 0)
+		return -1;
+	for (i = 0; i < CXL_EVENT_RECORD_DATA_LENGTH; i += 4) {
+		if ((i > 0) && ((i % 16) == 0))
+			if (trace_seq_printf(s, "\n  %08x: ", i) <= 0)
+				break;
+		if (trace_seq_printf(s, "%02x%02x%02x%02x ",
+				     buf[i], buf[i+1], buf[i+2], buf[i+3]) <= 0)
+			break;
+	}
+
+	/* Insert data into the SGBD */
+#ifdef HAVE_SQLITE3
+	ras_store_cxl_generic_event(ras, &ev);
+#endif
+
+#ifdef HAVE_ABRT_REPORT
+	/* Report event to ABRT */
+	ras_report_cxl_generic_event(ras, &ev);
+#endif
+
+	return 0;
+}
diff --git a/ras-cxl-handler.h b/ras-cxl-handler.h
index e7847ec..9f77cb7 100644
--- a/ras-cxl-handler.h
+++ b/ras-cxl-handler.h
@@ -32,4 +32,7 @@ int ras_cxl_aer_ce_event_handler(struct trace_seq *s,
 int ras_cxl_overflow_event_handler(struct trace_seq *s,
 				   struct tep_record *record,
 				   struct tep_event *event, void *context);
+int ras_cxl_generic_event_handler(struct trace_seq *s,
+				  struct tep_record *record,
+				  struct tep_event *event, void *context);
 #endif
diff --git a/ras-events.c b/ras-events.c
index ded8648..debdc87 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -249,6 +249,7 @@ int toggle_ras_mc_event(int enable)
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_aer_uncorrectable_error", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_aer_correctable_error", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_overflow", enable);
+	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_generic_event", enable);
 #endif
 
 free_ras:
@@ -1013,6 +1014,14 @@ int handle_ras_events(int record_events)
 	else
 		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
 		    "cxl", "cxl_overflow");
+
+	rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_generic_event",
+			       ras_cxl_generic_event_handler, NULL, CXL_GENERIC_EVENT);
+	if (!rc)
+		num_events++;
+	else
+		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
+		    "cxl", "cxl_generic_event");
 #endif
 
 	if (!num_events) {
diff --git a/ras-events.h b/ras-events.h
index ea590c9..989ab29 100644
--- a/ras-events.h
+++ b/ras-events.h
@@ -43,6 +43,7 @@ enum {
 	CXL_AER_UE_EVENT,
 	CXL_AER_CE_EVENT,
 	CXL_OVERFLOW_EVENT,
+	CXL_GENERIC_EVENT,
 	NR_EVENTS
 };
 
diff --git a/ras-record.c b/ras-record.c
index 57fe117..36665aa 100644
--- a/ras-record.c
+++ b/ras-record.c
@@ -773,6 +773,79 @@ int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow
 
 	return rc;
 }
+
+static int ras_store_cxl_common_hdr(sqlite3_stmt *stmt, struct ras_cxl_event_common_hdr *hdr)
+{
+	if (!stmt || !hdr)
+		return 0;
+
+	sqlite3_bind_text(stmt, 1, hdr->timestamp, -1, NULL);
+	sqlite3_bind_text(stmt, 2, hdr->memdev, -1, NULL);
+	sqlite3_bind_text(stmt, 3, hdr->host, -1, NULL);
+	sqlite3_bind_int64(stmt, 4, hdr->serial);
+	sqlite3_bind_text(stmt, 5, hdr->log_type, -1, NULL);
+	sqlite3_bind_text(stmt, 6, hdr->hdr_uuid, -1, NULL);
+	sqlite3_bind_int(stmt, 7, hdr->hdr_flags);
+	sqlite3_bind_int(stmt, 8, hdr->hdr_handle);
+	sqlite3_bind_int(stmt, 9, hdr->hdr_related_handle);
+	sqlite3_bind_text(stmt, 10, hdr->hdr_timestamp, -1, NULL);
+	sqlite3_bind_int(stmt, 11, hdr->hdr_length);
+	sqlite3_bind_int(stmt, 12, hdr->hdr_maint_op_class);
+
+	return 0;
+}
+
+/*
+ * Table and functions to handle cxl:cxl_generic_event
+ */
+static const struct db_fields cxl_generic_event_fields[] = {
+	{ .name = "id",			.type = "INTEGER PRIMARY KEY" },
+	{ .name = "timestamp",		.type = "TEXT" },
+	{ .name = "memdev",		.type = "TEXT" },
+	{ .name = "host",		.type = "TEXT" },
+	{ .name = "serial",		.type = "INTEGER" },
+	{ .name = "log_type",		.type = "TEXT" },
+	{ .name = "hdr_uuid",		.type = "TEXT" },
+	{ .name = "hdr_flags",		.type = "INTEGER" },
+	{ .name = "hdr_handle",		.type = "INTEGER" },
+	{ .name = "hdr_related_handle",	.type = "INTEGER" },
+	{ .name = "hdr_ts",		.type = "TEXT" },
+	{ .name = "hdr_length",		.type = "INTEGER" },
+	{ .name = "hdr_maint_op_class",	.type = "INTEGER" },
+	{ .name = "data",		.type = "BLOB" },
+};
+
+static const struct db_table_descriptor cxl_generic_event_tab = {
+	.name = "cxl_generic_event",
+	.fields = cxl_generic_event_fields,
+	.num_fields = ARRAY_SIZE(cxl_generic_event_fields),
+};
+
+int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev)
+{
+	int rc;
+	struct sqlite3_priv *priv = ras->db_priv;
+
+	if (!priv || !priv->stmt_cxl_generic_event)
+		return 0;
+	log(TERM, LOG_INFO, "cxl_generic_event store: %p\n", priv->stmt_cxl_generic_event);
+
+	ras_store_cxl_common_hdr(priv->stmt_cxl_generic_event, &ev->hdr);
+	sqlite3_bind_blob(priv->stmt_cxl_generic_event, 13, ev->data,
+			  CXL_EVENT_RECORD_DATA_LENGTH, NULL);
+
+	rc = sqlite3_step(priv->stmt_cxl_generic_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed to do stmt_cxl_generic_event step on sqlite: error = %d\n", rc);
+	rc = sqlite3_reset(priv->stmt_cxl_generic_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed reset stmt_cxl_generic_event on sqlite: error = %d\n", rc);
+	log(TERM, LOG_INFO, "register inserted at db\n");
+
+	return rc;
+}
 #endif
 
 /*
@@ -1144,6 +1217,14 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras)
 		if (rc != SQLITE_OK)
 			goto error;
 	}
+
+	rc = ras_mc_create_table(priv, &cxl_generic_event_tab);
+	if (rc == SQLITE_OK) {
+		rc = ras_mc_prepare_stmt(priv, &priv->stmt_cxl_generic_event,
+					 &cxl_generic_event_tab);
+		if (rc != SQLITE_OK)
+			goto error;
+	}
 #endif
 
 	ras->db_priv = priv;
@@ -1290,6 +1371,14 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras)
 			    "cpu %u: Failed to finalize cxl_overflow_event sqlite: error = %d\n",
 			    cpu, rc);
 	}
+
+	if (priv->stmt_cxl_generic_event) {
+		rc = sqlite3_finalize(priv->stmt_cxl_generic_event);
+		if (rc != SQLITE_OK)
+			log(TERM, LOG_ERR,
+			    "cpu %u: Failed to finalize cxl_generic_event sqlite: error = %d\n",
+			    cpu, rc);
+	}
 #endif
 
 	rc = sqlite3_close_v2(db);
diff --git a/ras-record.h b/ras-record.h
index 90db6ad..9ecfcda 100644
--- a/ras-record.h
+++ b/ras-record.h
@@ -133,6 +133,7 @@ struct ras_cxl_poison_event {
 #define SZ_512                          0x200
 #define CXL_HEADERLOG_SIZE              SZ_512
 #define CXL_HEADERLOG_SIZE_U32          (SZ_512 / sizeof(uint32_t))
+#define CXL_EVENT_RECORD_DATA_LENGTH	0x50
 
 struct ras_cxl_aer_ue_event {
 	char timestamp[64];
@@ -163,6 +164,26 @@ struct ras_cxl_overflow_event {
 	uint16_t count;
 };
 
+struct ras_cxl_event_common_hdr {
+	char timestamp[64];
+	const char *memdev;
+	const char *host;
+	uint64_t serial;
+	const char *log_type;
+	const char *hdr_uuid;
+	uint32_t hdr_flags;
+	uint16_t hdr_handle;
+	uint16_t hdr_related_handle;
+	char hdr_timestamp[64];
+	uint8_t hdr_length;
+	uint8_t hdr_maint_op_class;
+};
+
+struct ras_cxl_generic_event {
+	struct ras_cxl_event_common_hdr hdr;
+	uint8_t *data;
+};
+
 struct ras_mc_event;
 struct ras_aer_event;
 struct ras_extlog_event;
@@ -176,6 +197,7 @@ struct ras_cxl_poison_event;
 struct ras_cxl_aer_ue_event;
 struct ras_cxl_aer_ce_event;
 struct ras_cxl_overflow_event;
+struct ras_cxl_generic_event;
 
 #ifdef HAVE_SQLITE3
 
@@ -213,6 +235,7 @@ struct sqlite3_priv {
 	sqlite3_stmt	*stmt_cxl_aer_ue_event;
 	sqlite3_stmt	*stmt_cxl_aer_ce_event;
 	sqlite3_stmt	*stmt_cxl_overflow_event;
+	sqlite3_stmt	*stmt_cxl_generic_event;
 #endif
 };
 
@@ -245,6 +268,7 @@ int ras_store_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_eve
 int ras_store_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_event *ev);
 int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev);
 int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev);
+int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev);
 
 #else
 static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; };
@@ -262,6 +286,7 @@ static inline int ras_store_cxl_poison_event(struct ras_events *ras, struct ras_
 static inline int ras_store_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_event *ev) { return 0; };
 static inline int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev) { return 0; };
 static inline int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; };
+static inline int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; };
 
 #endif
 
diff --git a/ras-report.c b/ras-report.c
index dbed454..8d7b76a 100644
--- a/ras-report.c
+++ b/ras-report.c
@@ -451,6 +451,44 @@ static int set_cxl_overflow_event_backtrace(char *buf, struct ras_cxl_overflow_e
 	return 0;
 }
 
+static int set_cxl_generic_event_backtrace(char *buf, struct ras_cxl_generic_event *ev)
+{
+	char bt_buf[MAX_BACKTRACE_SIZE];
+
+	if (!buf || !ev)
+		return -1;
+
+	sprintf(bt_buf, "BACKTRACE="	\
+						"timestamp=%s\n"	\
+						"memdev=%s\n"		\
+						"host=%s\n"		\
+						"serial=0x%lx\n"	\
+						"log_type=%s\n"		\
+						"hdr_uuid=%s\n"		\
+						"hdr_flags=0x%x\n"	\
+						"hdr_handle=0x%x\n"	\
+						"hdr_related_handle=0x%x\n"	\
+						"hdr_timestamp=%s\n"	\
+						"hdr_length=%u\n"	\
+						"hdr_maint_op_class=%u\n",	\
+						ev->hdr.timestamp,	\
+						ev->hdr.memdev,		\
+						ev->hdr.host,		\
+						ev->hdr.serial,		\
+						ev->hdr.log_type,	\
+						ev->hdr.hdr_uuid,	\
+						ev->hdr.hdr_flags,	\
+						ev->hdr.hdr_handle,	\
+						ev->hdr.hdr_related_handle,	\
+						ev->hdr.hdr_timestamp,	\
+						ev->hdr.hdr_length,	\
+						ev->hdr.hdr_maint_op_class);
+
+	strcat(buf, bt_buf);
+
+	return 0;
+}
+
 static int commit_report_backtrace(int sockfd, int type, void *ev){
 	char buf[MAX_BACKTRACE_SIZE];
 	char *pbuf = buf;
@@ -500,6 +538,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){
 	case CXL_OVERFLOW_EVENT:
 		rc = set_cxl_overflow_event_backtrace(buf, (struct ras_cxl_overflow_event *)ev);
 		break;
+	case CXL_GENERIC_EVENT:
+		rc = set_cxl_generic_event_backtrace(buf, (struct ras_cxl_generic_event *)ev);
+		break;
 	default:
 		return -1;
 	}
@@ -1084,3 +1125,48 @@ cxl_overflow_fail:
 	else
 		return -1;
 }
+
+int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev)
+{
+	char buf[MAX_MESSAGE_SIZE];
+	int sockfd = 0;
+	int done = 0;
+	int rc = -1;
+
+	memset(buf, 0, sizeof(buf));
+
+	sockfd = setup_report_socket();
+	if (sockfd < 0)
+		return -1;
+
+	rc = commit_report_basic(sockfd);
+	if (rc < 0)
+		goto cxl_generic_fail;
+
+	rc = commit_report_backtrace(sockfd, CXL_GENERIC_EVENT, ev);
+	if (rc < 0)
+		goto cxl_generic_fail;
+
+	sprintf(buf, "ANALYZER=%s", "rasdaemon-cxl_generic_event");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_generic_fail;
+
+	sprintf(buf, "REASON=%s", "CXL Generic Event ");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_generic_fail;
+
+	done = 1;
+
+cxl_generic_fail:
+
+	if (sockfd >= 0)
+		close(sockfd);
+
+	if (done)
+		return 0;
+	else
+		return -1;
+
+}
diff --git a/ras-report.h b/ras-report.h
index 204d485..bf591a6 100644
--- a/ras-report.h
+++ b/ras-report.h
@@ -43,6 +43,7 @@ int ras_report_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_ev
 int ras_report_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_event *ev);
 int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev);
 int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev);
+int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev);
 
 #else
 
@@ -58,6 +59,7 @@ static inline int ras_report_cxl_poison_event(struct ras_events *ras, struct ras
 static inline int ras_report_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_event *ev) { return 0; };
 static inline int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev) { return 0; };
 static inline int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; };
+static inline int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; };
 
 #endif
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 5/7] rasdaemon: Add support for the CXL general media events
  2023-04-12  8:33 [RFC PATCH 0/7] rasdaemon: Process the generic CXL trace events shiju.jose
                   ` (3 preceding siblings ...)
  2023-04-12  8:33 ` [RFC PATCH 4/7] rasdaemon: Add support for the CXL generic events shiju.jose
@ 2023-04-12  8:33 ` shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 6/7] rasdaemon: Add support for the CXL dram events shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 7/7] rasdaemon: Add support for the CXL memory module events shiju.jose
  6 siblings, 0 replies; 8+ messages in thread
From: shiju.jose @ 2023-04-12  8:33 UTC (permalink / raw)
  To: mchehab, linux-cxl, linux-edac; +Cc: jonathan.cameron, linuxarm, shiju.jose

From: Shiju Jose <shiju.jose@huawei.com>

Add support to log and record the CXL general media events.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 ras-cxl-handler.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++
 ras-cxl-handler.h |   3 +
 ras-events.c      |   9 +++
 ras-events.h      |   1 +
 ras-record.c      |  85 +++++++++++++++++++++++++
 ras-record.h      |  19 ++++++
 ras-report.c      | 101 ++++++++++++++++++++++++++++++
 ras-report.h      |   2 +
 8 files changed, 376 insertions(+)

diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c
index 59f87c0..e2e80ff 100644
--- a/ras-cxl-handler.c
+++ b/ras-cxl-handler.c
@@ -99,6 +99,14 @@ static char *uuid_be(const char *uu)
 	return uuid;
 }
 
+static const char* get_cxl_type_str(const char** type_array, uint8_t num_elems, uint8_t type)
+{
+	if (type >= num_elems)
+		return "Unknown";
+
+	return type_array[type];
+}
+
 /* Poison List: Payload out flags */
 #define CXL_POISON_FLAG_MORE            BIT(0)
 #define CXL_POISON_FLAG_OVERFLOW        BIT(1)
@@ -701,3 +709,151 @@ int ras_cxl_generic_event_handler(struct trace_seq *s,
 
 	return 0;
 }
+
+#define CXL_DPA_VOLATILE		BIT(0)
+#define CXL_DPA_NOT_REPAIRABLE		BIT(1)
+
+static const struct cxl_event_flags cxl_dpa_flags[] = {
+	{ .bit = CXL_DPA_VOLATILE, .flag = "VOLATILE" },
+	{ .bit = CXL_DPA_NOT_REPAIRABLE, .flag = "NOT_REPAIRABLE" },
+};
+
+/*
+ * General Media Event Record - GMER
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT		BIT(0)
+#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT		BIT(1)
+#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW		BIT(2)
+
+static const struct cxl_event_flags cxl_gmer_event_desc_flags[] = {
+	{ .bit = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, .flag = "UNCORRECTABLE EVENT" },
+	{ .bit = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, .flag = "THRESHOLD EVENT" },
+	{ .bit = CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, .flag = "POISON LIST OVERFLOW" },
+};
+
+#define CXL_GMER_VALID_CHANNEL			BIT(0)
+#define CXL_GMER_VALID_RANK			BIT(1)
+#define CXL_GMER_VALID_DEVICE			BIT(2)
+#define CXL_GMER_VALID_COMPONENT		BIT(3)
+
+static const char* cxl_gmer_mem_event_type[] = {
+	"ECC Error",
+	"Invalid Address",
+	"Data Path Error",
+};
+
+static const char* cxl_gmer_trans_type[] = {
+	"Unknown",
+	"Host Read",
+	"Host Write",
+	"Host Scan Media",
+	"Host Inject Poison",
+	"Internal Media Scrub",
+	"Internal Media Management",
+};
+
+int ras_cxl_general_media_event_handler(struct trace_seq *s,
+					struct tep_record *record,
+					struct tep_event *event, void *context)
+{
+	int len, i;
+	unsigned long long val;
+	struct ras_events *ras = context;
+	struct ras_cxl_general_media_event ev;
+
+	memset(&ev, 0, sizeof(ev));
+	if (handle_ras_cxl_common_hdr(s, record, event, context, &ev.hdr) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "dpa", record, &val, 1) < 0)
+		return -1;
+	ev.dpa = val;
+	if (trace_seq_printf(s, "dpa:0x%llx ", (unsigned long long)ev.dpa) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "dpa_flags", record, &val, 1) < 0)
+		return -1;
+	ev.dpa_flags = val;
+	if (trace_seq_printf(s, "dpa_flags:") <= 0)
+		return -1;
+	if (decode_cxl_event_flags(s, ev.dpa_flags, cxl_dpa_flags, ARRAY_SIZE(cxl_dpa_flags)) < 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "descriptor", record, &val, 1) < 0)
+		return -1;
+	ev.descriptor = val;
+	if (trace_seq_printf(s, "descriptor:") <= 0)
+		return -1;
+	if (decode_cxl_event_flags(s, ev.descriptor, cxl_gmer_event_desc_flags,
+				   ARRAY_SIZE(cxl_gmer_event_desc_flags)) < 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "type", record, &val, 1) < 0)
+		return -1;
+	ev.type = val;
+	if (trace_seq_printf(s, "type:%s ", get_cxl_type_str(cxl_gmer_mem_event_type,
+			     ARRAY_SIZE(cxl_gmer_mem_event_type), ev.type)) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "transaction_type", record, &val, 1) < 0)
+		return -1;
+	ev.transaction_type = val;
+	if (trace_seq_printf(s, "transaction_type:%s ",
+			     get_cxl_type_str(cxl_gmer_trans_type,
+					      ARRAY_SIZE(cxl_gmer_trans_type),
+					      ev.transaction_type)) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "validity_flags", record, &val, 1) < 0)
+		return -1;
+	ev.validity_flags = val;
+
+	if (ev.validity_flags & CXL_GMER_VALID_CHANNEL) {
+		if (tep_get_field_val(s,  event, "channel", record, &val, 1) < 0)
+			return -1;
+		ev.channel = val;
+		if (trace_seq_printf(s, "channel:%u ", ev.channel) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_GMER_VALID_RANK) {
+		if (tep_get_field_val(s,  event, "rank", record, &val, 1) < 0)
+			return -1;
+		ev.rank = val;
+		if (trace_seq_printf(s, "rank:%u ", ev.rank) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_GMER_VALID_DEVICE) {
+		if (tep_get_field_val(s,  event, "device", record, &val, 1) < 0)
+			return -1;
+		ev.device = val;
+		if (trace_seq_printf(s, "device:%x ", ev.device) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_GMER_VALID_COMPONENT) {
+		ev.comp_id = tep_get_field_raw(s, event, "comp_id", record, &len, 1);
+		if (!ev.comp_id)
+			return -1;
+		if (trace_seq_printf(s, "comp_id:") <= 0)
+			return -1;
+		for (i = 0; i < CXL_EVENT_GEN_MED_COMP_ID_SIZE; i++) {
+			if (trace_seq_printf(s, "%02x ", ev.comp_id[i]) <= 0)
+				break;
+		}
+	}
+
+	/* Insert data into the SGBD */
+#ifdef HAVE_SQLITE3
+	ras_store_cxl_general_media_event(ras, &ev);
+#endif
+
+#ifdef HAVE_ABRT_REPORT
+	/* Report event to ABRT */
+	ras_report_cxl_general_media_event(ras, &ev);
+#endif
+
+	return 0;
+}
diff --git a/ras-cxl-handler.h b/ras-cxl-handler.h
index 9f77cb7..3adca4a 100644
--- a/ras-cxl-handler.h
+++ b/ras-cxl-handler.h
@@ -35,4 +35,7 @@ int ras_cxl_overflow_event_handler(struct trace_seq *s,
 int ras_cxl_generic_event_handler(struct trace_seq *s,
 				  struct tep_record *record,
 				  struct tep_event *event, void *context);
+int ras_cxl_general_media_event_handler(struct trace_seq *s,
+					struct tep_record *record,
+					struct tep_event *event, void *context);
 #endif
diff --git a/ras-events.c b/ras-events.c
index debdc87..0858b51 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -250,6 +250,7 @@ int toggle_ras_mc_event(int enable)
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_aer_correctable_error", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_overflow", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_generic_event", enable);
+	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_general_media", enable);
 #endif
 
 free_ras:
@@ -1022,6 +1023,14 @@ int handle_ras_events(int record_events)
 	else
 		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
 		    "cxl", "cxl_generic_event");
+
+	rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_general_media",
+			       ras_cxl_general_media_event_handler, NULL, CXL_GENERAL_MEDIA_EVENT);
+	if (!rc)
+		num_events++;
+	else
+		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
+		    "cxl", "cxl_general_media");
 #endif
 
 	if (!num_events) {
diff --git a/ras-events.h b/ras-events.h
index 989ab29..0a3edf5 100644
--- a/ras-events.h
+++ b/ras-events.h
@@ -44,6 +44,7 @@ enum {
 	CXL_AER_CE_EVENT,
 	CXL_OVERFLOW_EVENT,
 	CXL_GENERIC_EVENT,
+	CXL_GENERAL_MEDIA_EVENT,
 	NR_EVENTS
 };
 
diff --git a/ras-record.c b/ras-record.c
index 36665aa..0546b29 100644
--- a/ras-record.c
+++ b/ras-record.c
@@ -846,6 +846,75 @@ int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_e
 
 	return rc;
 }
+
+/*
+ * Table and functions to handle cxl:cxl_general_media_event
+ */
+static const struct db_fields cxl_general_media_event_fields[] = {
+	{ .name = "id",			.type = "INTEGER PRIMARY KEY" },
+	{ .name = "timestamp",		.type = "TEXT" },
+	{ .name = "memdev",		.type = "TEXT" },
+	{ .name = "host",		.type = "TEXT" },
+	{ .name = "serial",		.type = "INTEGER" },
+	{ .name = "log_type",		.type = "TEXT" },
+	{ .name = "hdr_uuid",		.type = "TEXT" },
+	{ .name = "hdr_flags",		.type = "INTEGER" },
+	{ .name = "hdr_handle",		.type = "INTEGER" },
+	{ .name = "hdr_related_handle",	.type = "INTEGER" },
+	{ .name = "hdr_ts",		.type = "TEXT" },
+	{ .name = "hdr_length",		.type = "INTEGER" },
+	{ .name = "hdr_maint_op_class",	.type = "INTEGER" },
+	{ .name = "dpa",		.type = "INTEGER" },
+	{ .name = "dpa_flags",		.type = "INTEGER" },
+	{ .name = "descriptor",		.type = "INTEGER" },
+	{ .name = "type",		.type = "INTEGER" },
+	{ .name = "transaction_type",	.type = "INTEGER" },
+	{ .name = "channel",		.type = "INTEGER" },
+	{ .name = "rank",		.type = "INTEGER" },
+	{ .name = "device",		.type = "INTEGER" },
+	{ .name = "comp_id",		.type = "BLOB" },
+};
+
+static const struct db_table_descriptor cxl_general_media_event_tab = {
+	.name = "cxl_general_media_event",
+	.fields = cxl_general_media_event_fields,
+	.num_fields = ARRAY_SIZE(cxl_general_media_event_fields),
+};
+
+int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev)
+{
+	int rc;
+	struct sqlite3_priv *priv = ras->db_priv;
+
+	if (!priv || !priv->stmt_cxl_general_media_event)
+		return 0;
+	log(TERM, LOG_INFO, "cxl_general_media_event store: %p\n",
+	    priv->stmt_cxl_general_media_event);
+
+	ras_store_cxl_common_hdr(priv->stmt_cxl_general_media_event, &ev->hdr);
+	sqlite3_bind_int64(priv->stmt_cxl_general_media_event, 13, ev->dpa);
+	sqlite3_bind_int(priv->stmt_cxl_general_media_event, 14, ev->dpa_flags);
+	sqlite3_bind_int(priv->stmt_cxl_general_media_event, 15, ev->descriptor);
+	sqlite3_bind_int(priv->stmt_cxl_general_media_event, 16, ev->type);
+	sqlite3_bind_int(priv->stmt_cxl_general_media_event, 17, ev->transaction_type);
+	sqlite3_bind_int(priv->stmt_cxl_general_media_event, 18, ev->channel);
+	sqlite3_bind_int(priv->stmt_cxl_general_media_event, 19, ev->rank);
+	sqlite3_bind_int(priv->stmt_cxl_general_media_event, 20, ev->device);
+	sqlite3_bind_blob(priv->stmt_cxl_general_media_event, 21, ev->comp_id,
+			  CXL_EVENT_GEN_MED_COMP_ID_SIZE, NULL);
+
+	rc = sqlite3_step(priv->stmt_cxl_general_media_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed to do stmt_cxl_general_media_event step on sqlite: error = %d\n", rc);
+	rc = sqlite3_reset(priv->stmt_cxl_general_media_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed reset stmt_cxl_general_media_event on sqlite: error = %d\n", rc);
+	log(TERM, LOG_INFO, "register inserted at db\n");
+
+	return rc;
+}
 #endif
 
 /*
@@ -1225,6 +1294,14 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras)
 		if (rc != SQLITE_OK)
 			goto error;
 	}
+
+	rc = ras_mc_create_table(priv, &cxl_general_media_event_tab);
+	if (rc == SQLITE_OK) {
+		rc = ras_mc_prepare_stmt(priv, &priv->stmt_cxl_general_media_event,
+					 &cxl_general_media_event_tab);
+		if (rc != SQLITE_OK)
+			goto error;
+	}
 #endif
 
 	ras->db_priv = priv;
@@ -1379,6 +1456,14 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras)
 			    "cpu %u: Failed to finalize cxl_generic_event sqlite: error = %d\n",
 			    cpu, rc);
 	}
+
+	if (priv->stmt_cxl_general_media_event) {
+		rc = sqlite3_finalize(priv->stmt_cxl_general_media_event);
+		if (rc != SQLITE_OK)
+			log(TERM, LOG_ERR,
+			    "cpu %u: Failed to finalize cxl_general_media_event sqlite: error = %d\n",
+			    cpu, rc);
+	}
 #endif
 
 	rc = sqlite3_close_v2(db);
diff --git a/ras-record.h b/ras-record.h
index 9ecfcda..37c32de 100644
--- a/ras-record.h
+++ b/ras-record.h
@@ -134,6 +134,7 @@ struct ras_cxl_poison_event {
 #define CXL_HEADERLOG_SIZE              SZ_512
 #define CXL_HEADERLOG_SIZE_U32          (SZ_512 / sizeof(uint32_t))
 #define CXL_EVENT_RECORD_DATA_LENGTH	0x50
+#define CXL_EVENT_GEN_MED_COMP_ID_SIZE	0x10
 
 struct ras_cxl_aer_ue_event {
 	char timestamp[64];
@@ -184,6 +185,20 @@ struct ras_cxl_generic_event {
 	uint8_t *data;
 };
 
+struct ras_cxl_general_media_event {
+	struct ras_cxl_event_common_hdr hdr;
+	uint64_t dpa;
+	uint8_t dpa_flags;
+	uint8_t descriptor;
+	uint8_t type;
+	uint8_t transaction_type;
+	uint8_t channel;
+	uint8_t rank;
+	uint32_t device;
+	uint8_t *comp_id;
+	uint16_t validity_flags;
+};
+
 struct ras_mc_event;
 struct ras_aer_event;
 struct ras_extlog_event;
@@ -198,6 +213,7 @@ struct ras_cxl_aer_ue_event;
 struct ras_cxl_aer_ce_event;
 struct ras_cxl_overflow_event;
 struct ras_cxl_generic_event;
+struct ras_cxl_general_media_event;
 
 #ifdef HAVE_SQLITE3
 
@@ -236,6 +252,7 @@ struct sqlite3_priv {
 	sqlite3_stmt	*stmt_cxl_aer_ce_event;
 	sqlite3_stmt	*stmt_cxl_overflow_event;
 	sqlite3_stmt	*stmt_cxl_generic_event;
+	sqlite3_stmt	*stmt_cxl_general_media_event;
 #endif
 };
 
@@ -269,6 +286,7 @@ int ras_store_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_eve
 int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev);
 int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev);
 int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev);
+int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev);
 
 #else
 static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; };
@@ -287,6 +305,7 @@ static inline int ras_store_cxl_aer_ue_event(struct ras_events *ras, struct ras_
 static inline int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev) { return 0; };
 static inline int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; };
 static inline int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; };
+static inline int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; };
 
 #endif
 
diff --git a/ras-report.c b/ras-report.c
index 8d7b76a..725dc9b 100644
--- a/ras-report.c
+++ b/ras-report.c
@@ -489,6 +489,60 @@ static int set_cxl_generic_event_backtrace(char *buf, struct ras_cxl_generic_eve
 	return 0;
 }
 
+static int set_cxl_general_media_event_backtrace(char *buf, struct ras_cxl_general_media_event *ev)
+{
+	char bt_buf[MAX_BACKTRACE_SIZE];
+
+	if (!buf || !ev)
+		return -1;
+
+	sprintf(bt_buf, "BACKTRACE="	\
+						"timestamp=%s\n"	\
+						"memdev=%s\n"		\
+						"host=%s\n"		\
+						"serial=0x%lx\n"	\
+						"log_type=%s\n"		\
+						"hdr_uuid=%s\n"		\
+						"hdr_flags=0x%x\n"	\
+						"hdr_handle=0x%x\n"	\
+						"hdr_related_handle=0x%x\n"	\
+						"hdr_timestamp=%s\n"	\
+						"hdr_length=%u\n"	\
+						"hdr_maint_op_class=%u\n"	\
+						"dpa=0x%lx\n"		\
+						"dpa_flags=%u\n"	\
+						"descriptor=%u\n"	\
+						"type=%u\n"		\
+						"transaction_type=%u\n"	\
+						"channel=%u\n"		\
+						"rank=%u\n"		\
+						"device=0x%x\n",	\
+						ev->hdr.timestamp,	\
+						ev->hdr.memdev,		\
+						ev->hdr.host,		\
+						ev->hdr.serial,		\
+						ev->hdr.log_type,	\
+						ev->hdr.hdr_uuid,	\
+						ev->hdr.hdr_flags,	\
+						ev->hdr.hdr_handle,	\
+						ev->hdr.hdr_related_handle,	\
+						ev->hdr.hdr_timestamp,	\
+						ev->hdr.hdr_length,	\
+						ev->hdr.hdr_maint_op_class,	\
+						ev->dpa,		\
+						ev->dpa_flags,		\
+						ev->descriptor,		\
+						ev->type,		\
+						ev->transaction_type,	\
+						ev->channel,		\
+						ev->rank,		\
+						ev->device);
+
+	strcat(buf, bt_buf);
+
+	return 0;
+}
+
 static int commit_report_backtrace(int sockfd, int type, void *ev){
 	char buf[MAX_BACKTRACE_SIZE];
 	char *pbuf = buf;
@@ -541,6 +595,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){
 	case CXL_GENERIC_EVENT:
 		rc = set_cxl_generic_event_backtrace(buf, (struct ras_cxl_generic_event *)ev);
 		break;
+	case CXL_GENERAL_MEDIA_EVENT:
+		rc = set_cxl_general_media_event_backtrace(buf, (struct ras_cxl_general_media_event *)ev);
+		break;
 	default:
 		return -1;
 	}
@@ -1170,3 +1227,47 @@ cxl_generic_fail:
 		return -1;
 
 }
+
+int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev)
+{
+	char buf[MAX_MESSAGE_SIZE];
+	int sockfd = 0;
+	int done = 0;
+	int rc = -1;
+
+	memset(buf, 0, sizeof(buf));
+
+	sockfd = setup_report_socket();
+	if (sockfd < 0)
+		return -1;
+
+	rc = commit_report_basic(sockfd);
+	if (rc < 0)
+		goto cxl_general_media_fail;
+
+	rc = commit_report_backtrace(sockfd, CXL_GENERAL_MEDIA_EVENT, ev);
+	if (rc < 0)
+		goto cxl_general_media_fail;
+
+	sprintf(buf, "ANALYZER=%s", "rasdaemon-cxl_general_media_event");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_general_media_fail;
+
+	sprintf(buf, "REASON=%s", "CXL General Media Event");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_general_media_fail;
+
+	done = 1;
+
+cxl_general_media_fail:
+
+	if (sockfd >= 0)
+		close(sockfd);
+
+	if (done)
+		return 0;
+	else
+		return -1;
+}
diff --git a/ras-report.h b/ras-report.h
index bf591a6..d9ec7df 100644
--- a/ras-report.h
+++ b/ras-report.h
@@ -44,6 +44,7 @@ int ras_report_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_ev
 int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev);
 int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev);
 int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev);
+int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev);
 
 #else
 
@@ -60,6 +61,7 @@ static inline int ras_report_cxl_aer_ue_event(struct ras_events *ras, struct ras
 static inline int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev) { return 0; };
 static inline int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; };
 static inline int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; };
+static inline int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; };
 
 #endif
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 6/7] rasdaemon: Add support for the CXL dram events
  2023-04-12  8:33 [RFC PATCH 0/7] rasdaemon: Process the generic CXL trace events shiju.jose
                   ` (4 preceding siblings ...)
  2023-04-12  8:33 ` [RFC PATCH 5/7] rasdaemon: Add support for the CXL general media events shiju.jose
@ 2023-04-12  8:33 ` shiju.jose
  2023-04-12  8:33 ` [RFC PATCH 7/7] rasdaemon: Add support for the CXL memory module events shiju.jose
  6 siblings, 0 replies; 8+ messages in thread
From: shiju.jose @ 2023-04-12  8:33 UTC (permalink / raw)
  To: mchehab, linux-cxl, linux-edac; +Cc: jonathan.cameron, linuxarm, shiju.jose

From: Shiju Jose <shiju.jose@huawei.com>

Add support to log and record the CXL dram events.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 ras-cxl-handler.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++
 ras-cxl-handler.h |   3 +
 ras-events.c      |   9 +++
 ras-events.h      |   1 +
 ras-record.c      |  93 ++++++++++++++++++++++++++++
 ras-record.h      |  23 +++++++
 ras-report.c      | 109 +++++++++++++++++++++++++++++++++
 ras-report.h      |   2 +
 8 files changed, 391 insertions(+)

diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c
index e2e80ff..fadf5db 100644
--- a/ras-cxl-handler.c
+++ b/ras-cxl-handler.c
@@ -857,3 +857,154 @@ int ras_cxl_general_media_event_handler(struct trace_seq *s,
 
 	return 0;
 }
+
+/*
+ * DRAM Event Record - DER
+ *
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+#define CXL_DER_VALID_CHANNEL			BIT(0)
+#define CXL_DER_VALID_RANK			BIT(1)
+#define CXL_DER_VALID_NIBBLE			BIT(2)
+#define CXL_DER_VALID_BANK_GROUP		BIT(3)
+#define CXL_DER_VALID_BANK			BIT(4)
+#define CXL_DER_VALID_ROW			BIT(5)
+#define CXL_DER_VALID_COLUMN			BIT(6)
+#define CXL_DER_VALID_CORRECTION_MASK		BIT(7)
+
+int ras_cxl_dram_event_handler(struct trace_seq *s,
+			       struct tep_record *record,
+			       struct tep_event *event, void *context)
+{
+	int len, i;
+	unsigned long long val;
+	struct ras_events *ras = context;
+	struct ras_cxl_dram_event ev;
+
+	memset(&ev, 0, sizeof(ev));
+	if (handle_ras_cxl_common_hdr(s, record, event, context, &ev.hdr) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "dpa", record, &val, 1) < 0)
+		return -1;
+	ev.dpa = val;
+	if (trace_seq_printf(s, "dpa:0x%llx ", (unsigned long long)ev.dpa) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "dpa_flags", record, &val, 1) < 0)
+		return -1;
+	ev.dpa_flags = val;
+	if (trace_seq_printf(s, "dpa_flags:") <= 0)
+		return -1;
+	if (decode_cxl_event_flags(s, ev.dpa_flags, cxl_dpa_flags, ARRAY_SIZE(cxl_dpa_flags)) < 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "descriptor", record, &val, 1) < 0)
+		return -1;
+	ev.descriptor = val;
+	if (trace_seq_printf(s, "descriptor:") <= 0)
+		return -1;
+	if (decode_cxl_event_flags(s, ev.descriptor, cxl_gmer_event_desc_flags,
+				   ARRAY_SIZE(cxl_gmer_event_desc_flags)) < 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "type", record, &val, 1) < 0)
+		return -1;
+	ev.type = val;
+	if (trace_seq_printf(s, "type:%s ", get_cxl_type_str(cxl_gmer_mem_event_type,
+			     ARRAY_SIZE(cxl_gmer_mem_event_type), ev.type)) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "transaction_type", record, &val, 1) < 0)
+		return -1;
+	ev.transaction_type = val;
+	if (trace_seq_printf(s, "transaction_type:%s ",
+			     get_cxl_type_str(cxl_gmer_trans_type,
+					      ARRAY_SIZE(cxl_gmer_trans_type),
+					      ev.transaction_type)) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s,  event, "validity_flags", record, &val, 1) < 0)
+		return -1;
+	ev.validity_flags = val;
+
+	if (ev.validity_flags & CXL_DER_VALID_CHANNEL) {
+		if (tep_get_field_val(s,  event, "channel", record, &val, 1) < 0)
+			return -1;
+		ev.channel = val;
+		if (trace_seq_printf(s, "channel:%u ", ev.channel) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_DER_VALID_RANK) {
+		if (tep_get_field_val(s,  event, "rank", record, &val, 1) < 0)
+			return -1;
+		ev.rank = val;
+		if (trace_seq_printf(s, "rank:%u ", ev.rank) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_DER_VALID_NIBBLE) {
+		if (tep_get_field_val(s,  event, "nibble_mask", record, &val, 1) < 0)
+			return -1;
+		ev.nibble_mask = val;
+		if (trace_seq_printf(s, "nibble_mask:%u ", ev.nibble_mask) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_DER_VALID_BANK_GROUP) {
+		if (tep_get_field_val(s,  event, "bank_group", record, &val, 1) < 0)
+			return -1;
+		ev.bank_group = val;
+		if (trace_seq_printf(s, "bank_group:%u ", ev.bank_group) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_DER_VALID_BANK) {
+		if (tep_get_field_val(s,  event, "bank", record, &val, 1) < 0)
+			return -1;
+		ev.bank = val;
+		if (trace_seq_printf(s, "bank:%u ", ev.bank) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_DER_VALID_ROW) {
+		if (tep_get_field_val(s,  event, "row", record, &val, 1) < 0)
+			return -1;
+		ev.row = val;
+		if (trace_seq_printf(s, "row:%u ", ev.row) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_DER_VALID_COLUMN) {
+		if (tep_get_field_val(s,  event, "column", record, &val, 1) < 0)
+			return -1;
+		ev.column = val;
+		if (trace_seq_printf(s, "column:%u ", ev.column) <= 0)
+			return -1;
+	}
+
+	if (ev.validity_flags & CXL_DER_VALID_CORRECTION_MASK) {
+		ev.cor_mask = tep_get_field_raw(s, event, "cor_mask", record, &len, 1);
+		if (!ev.cor_mask)
+			return -1;
+		if (trace_seq_printf(s, "correction_mask:") <= 0)
+			return -1;
+		for (i = 0; i < CXL_EVENT_DER_CORRECTION_MASK_SIZE; i++) {
+			if (trace_seq_printf(s, "%02x ", ev.cor_mask[i]) <= 0)
+				break;
+		}
+	}
+
+	/* Insert data into the SGBD */
+#ifdef HAVE_SQLITE3
+	ras_store_cxl_dram_event(ras, &ev);
+#endif
+
+#ifdef HAVE_ABRT_REPORT
+	/* Report event to ABRT */
+	ras_report_cxl_dram_event(ras, &ev);
+#endif
+
+	return 0;
+}
diff --git a/ras-cxl-handler.h b/ras-cxl-handler.h
index 3adca4a..35455af 100644
--- a/ras-cxl-handler.h
+++ b/ras-cxl-handler.h
@@ -38,4 +38,7 @@ int ras_cxl_generic_event_handler(struct trace_seq *s,
 int ras_cxl_general_media_event_handler(struct trace_seq *s,
 					struct tep_record *record,
 					struct tep_event *event, void *context);
+int ras_cxl_dram_event_handler(struct trace_seq *s,
+			       struct tep_record *record,
+			       struct tep_event *event, void *context);
 #endif
diff --git a/ras-events.c b/ras-events.c
index 0858b51..00159e6 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -251,6 +251,7 @@ int toggle_ras_mc_event(int enable)
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_overflow", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_generic_event", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_general_media", enable);
+	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_dram", enable);
 #endif
 
 free_ras:
@@ -1031,6 +1032,14 @@ int handle_ras_events(int record_events)
 	else
 		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
 		    "cxl", "cxl_general_media");
+
+	rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_dram",
+			       ras_cxl_dram_event_handler, NULL, CXL_DRAM_EVENT);
+	if (!rc)
+		num_events++;
+	else
+		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
+		    "cxl", "cxl_dram");
 #endif
 
 	if (!num_events) {
diff --git a/ras-events.h b/ras-events.h
index 0a3edf5..3fe28da 100644
--- a/ras-events.h
+++ b/ras-events.h
@@ -45,6 +45,7 @@ enum {
 	CXL_OVERFLOW_EVENT,
 	CXL_GENERIC_EVENT,
 	CXL_GENERAL_MEDIA_EVENT,
+	CXL_DRAM_EVENT,
 	NR_EVENTS
 };
 
diff --git a/ras-record.c b/ras-record.c
index 0546b29..36f43cf 100644
--- a/ras-record.c
+++ b/ras-record.c
@@ -915,6 +915,83 @@ int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_gen
 
 	return rc;
 }
+
+/*
+ * Table and functions to handle cxl:cxl_dram_event
+ */
+static const struct db_fields cxl_dram_event_fields[] = {
+	{ .name = "id",			.type = "INTEGER PRIMARY KEY" },
+	{ .name = "timestamp",		.type = "TEXT" },
+	{ .name = "memdev",		.type = "TEXT" },
+	{ .name = "host",		.type = "TEXT" },
+	{ .name = "serial",		.type = "INTEGER" },
+	{ .name = "log_type",		.type = "TEXT" },
+	{ .name = "hdr_uuid",		.type = "TEXT" },
+	{ .name = "hdr_flags",		.type = "INTEGER" },
+	{ .name = "hdr_handle",		.type = "INTEGER" },
+	{ .name = "hdr_related_handle",	.type = "INTEGER" },
+	{ .name = "hdr_ts",		.type = "TEXT" },
+	{ .name = "hdr_length",		.type = "INTEGER" },
+	{ .name = "hdr_maint_op_class",	.type = "INTEGER" },
+	{ .name = "dpa",		.type = "INTEGER" },
+	{ .name = "dpa_flags",		.type = "INTEGER" },
+	{ .name = "descriptor",		.type = "INTEGER" },
+	{ .name = "type",		.type = "INTEGER" },
+	{ .name = "transaction_type",	.type = "INTEGER" },
+	{ .name = "channel",		.type = "INTEGER" },
+	{ .name = "rank",		.type = "INTEGER" },
+	{ .name = "nibble_mask",	.type = "INTEGER" },
+	{ .name = "bank_group",		.type = "INTEGER" },
+	{ .name = "bank",		.type = "INTEGER" },
+	{ .name = "row",		.type = "INTEGER" },
+	{ .name = "column",		.type = "INTEGER" },
+	{ .name = "cor_mask",		.type = "BLOB" },
+};
+
+static const struct db_table_descriptor cxl_dram_event_tab = {
+	.name = "cxl_dram_event",
+	.fields = cxl_dram_event_fields,
+	.num_fields = ARRAY_SIZE(cxl_dram_event_fields),
+};
+
+int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev)
+{
+	int rc;
+	struct sqlite3_priv *priv = ras->db_priv;
+
+	if (!priv || !priv->stmt_cxl_dram_event)
+		return 0;
+	log(TERM, LOG_INFO, "cxl_dram_event store: %p\n",
+	    priv->stmt_cxl_dram_event);
+
+	ras_store_cxl_common_hdr(priv->stmt_cxl_dram_event, &ev->hdr);
+	sqlite3_bind_int64(priv->stmt_cxl_dram_event, 13, ev->dpa);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 14, ev->dpa_flags);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 15, ev->descriptor);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 16, ev->type);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 17, ev->transaction_type);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 18, ev->channel);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 19, ev->rank);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 20, ev->nibble_mask);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 21, ev->bank_group);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 22, ev->bank);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 23, ev->row);
+	sqlite3_bind_int(priv->stmt_cxl_dram_event, 24, ev->column);
+	sqlite3_bind_blob(priv->stmt_cxl_dram_event, 25, ev->cor_mask,
+			  CXL_EVENT_DER_CORRECTION_MASK_SIZE, NULL);
+
+	rc = sqlite3_step(priv->stmt_cxl_dram_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed to do stmt_cxl_dram_event step on sqlite: error = %d\n", rc);
+	rc = sqlite3_reset(priv->stmt_cxl_dram_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed reset stmt_cxl_dram_event on sqlite: error = %d\n", rc);
+	log(TERM, LOG_INFO, "register inserted at db\n");
+
+	return rc;
+}
 #endif
 
 /*
@@ -1302,6 +1379,14 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras)
 		if (rc != SQLITE_OK)
 			goto error;
 	}
+
+	rc = ras_mc_create_table(priv, &cxl_dram_event_tab);
+	if (rc == SQLITE_OK) {
+		rc = ras_mc_prepare_stmt(priv, &priv->stmt_cxl_dram_event,
+					 &cxl_dram_event_tab);
+		if (rc != SQLITE_OK)
+			goto error;
+	}
 #endif
 
 	ras->db_priv = priv;
@@ -1464,6 +1549,14 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras)
 			    "cpu %u: Failed to finalize cxl_general_media_event sqlite: error = %d\n",
 			    cpu, rc);
 	}
+
+	if (priv->stmt_cxl_dram_event) {
+		rc = sqlite3_finalize(priv->stmt_cxl_dram_event);
+		if (rc != SQLITE_OK)
+			log(TERM, LOG_ERR,
+			    "cpu %u: Failed to finalize cxl_dram_event sqlite: error = %d\n",
+			    cpu, rc);
+	}
 #endif
 
 	rc = sqlite3_close_v2(db);
diff --git a/ras-record.h b/ras-record.h
index 37c32de..480ff92 100644
--- a/ras-record.h
+++ b/ras-record.h
@@ -135,6 +135,7 @@ struct ras_cxl_poison_event {
 #define CXL_HEADERLOG_SIZE_U32          (SZ_512 / sizeof(uint32_t))
 #define CXL_EVENT_RECORD_DATA_LENGTH	0x50
 #define CXL_EVENT_GEN_MED_COMP_ID_SIZE	0x10
+#define CXL_EVENT_DER_CORRECTION_MASK_SIZE	0x20
 
 struct ras_cxl_aer_ue_event {
 	char timestamp[64];
@@ -199,6 +200,24 @@ struct ras_cxl_general_media_event {
 	uint16_t validity_flags;
 };
 
+struct ras_cxl_dram_event {
+	struct ras_cxl_event_common_hdr hdr;
+	uint64_t dpa;
+	uint8_t dpa_flags;
+	uint8_t descriptor;
+	uint8_t type;
+	uint8_t transaction_type;
+	uint8_t channel;
+	uint8_t rank;
+	uint32_t nibble_mask;
+	uint8_t bank_group;
+	uint8_t bank;
+	uint32_t row;
+	uint16_t column;
+	uint8_t *cor_mask;
+	uint16_t validity_flags;
+};
+
 struct ras_mc_event;
 struct ras_aer_event;
 struct ras_extlog_event;
@@ -214,6 +233,7 @@ struct ras_cxl_aer_ce_event;
 struct ras_cxl_overflow_event;
 struct ras_cxl_generic_event;
 struct ras_cxl_general_media_event;
+struct ras_cxl_dram_event;
 
 #ifdef HAVE_SQLITE3
 
@@ -253,6 +273,7 @@ struct sqlite3_priv {
 	sqlite3_stmt	*stmt_cxl_overflow_event;
 	sqlite3_stmt	*stmt_cxl_generic_event;
 	sqlite3_stmt	*stmt_cxl_general_media_event;
+	sqlite3_stmt	*stmt_cxl_dram_event;
 #endif
 };
 
@@ -287,6 +308,7 @@ int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_eve
 int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev);
 int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev);
 int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev);
+int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev);
 
 #else
 static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; };
@@ -306,6 +328,7 @@ static inline int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_
 static inline int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; };
 static inline int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; };
 static inline int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; };
+static inline int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev) { return 0; };
 
 #endif
 
diff --git a/ras-report.c b/ras-report.c
index 725dc9b..21180b1 100644
--- a/ras-report.c
+++ b/ras-report.c
@@ -543,6 +543,68 @@ static int set_cxl_general_media_event_backtrace(char *buf, struct ras_cxl_gener
 	return 0;
 }
 
+static int set_cxl_dram_event_backtrace(char *buf, struct ras_cxl_dram_event *ev)
+{
+	char bt_buf[MAX_BACKTRACE_SIZE];
+
+	if (!buf || !ev)
+		return -1;
+
+	sprintf(bt_buf, "BACKTRACE="	\
+						"timestamp=%s\n"	\
+						"memdev=%s\n"		\
+						"host=%s\n"		\
+						"serial=0x%lx\n"	\
+						"log_type=%s\n"		\
+						"hdr_uuid=%s\n"		\
+						"hdr_flags=0x%x\n"	\
+						"hdr_handle=0x%x\n"	\
+						"hdr_related_handle=0x%x\n"	\
+						"hdr_timestamp=%s\n"	\
+						"hdr_length=%u\n"	\
+						"hdr_maint_op_class=%u\n"	\
+						"dpa=0x%lx\n"		\
+						"dpa_flags=%u\n"	\
+						"descriptor=%u\n"	\
+						"type=%u\n"		\
+						"transaction_type=%u\n"	\
+						"channel=%u\n"		\
+						"rank=%u\n"		\
+						"nibble_mask=%u\n"	\
+						"bank_group=%u\n"	\
+						"bank=%u\n"		\
+						"row=%u\n"		\
+						"column=%u\n",		\
+						ev->hdr.timestamp,	\
+						ev->hdr.memdev,		\
+						ev->hdr.host,		\
+						ev->hdr.serial,		\
+						ev->hdr.log_type,	\
+						ev->hdr.hdr_uuid,	\
+						ev->hdr.hdr_flags,	\
+						ev->hdr.hdr_handle,	\
+						ev->hdr.hdr_related_handle,	\
+						ev->hdr.hdr_timestamp,	\
+						ev->hdr.hdr_length,	\
+						ev->hdr.hdr_maint_op_class,	\
+						ev->dpa,		\
+						ev->dpa_flags,		\
+						ev->descriptor,		\
+						ev->type,		\
+						ev->transaction_type,	\
+						ev->channel,		\
+						ev->rank,		\
+						ev->nibble_mask,	\
+						ev->bank_group,		\
+						ev->bank,		\
+						ev->row,		\
+						ev->column);
+
+	strcat(buf, bt_buf);
+
+	return 0;
+}
+
 static int commit_report_backtrace(int sockfd, int type, void *ev){
 	char buf[MAX_BACKTRACE_SIZE];
 	char *pbuf = buf;
@@ -598,6 +660,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){
 	case CXL_GENERAL_MEDIA_EVENT:
 		rc = set_cxl_general_media_event_backtrace(buf, (struct ras_cxl_general_media_event *)ev);
 		break;
+	case CXL_DRAM_EVENT:
+		rc = set_cxl_dram_event_backtrace(buf, (struct ras_cxl_dram_event *)ev);
+		break;
 	default:
 		return -1;
 	}
@@ -1271,3 +1336,47 @@ cxl_general_media_fail:
 	else
 		return -1;
 }
+
+int ras_report_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev)
+{
+	char buf[MAX_MESSAGE_SIZE];
+	int sockfd = 0;
+	int done = 0;
+	int rc = -1;
+
+	memset(buf, 0, sizeof(buf));
+
+	sockfd = setup_report_socket();
+	if (sockfd < 0)
+		return -1;
+
+	rc = commit_report_basic(sockfd);
+	if (rc < 0)
+		goto cxl_dram_fail;
+
+	rc = commit_report_backtrace(sockfd, CXL_DRAM_EVENT, ev);
+	if (rc < 0)
+		goto cxl_dram_fail;
+
+	sprintf(buf, "ANALYZER=%s", "rasdaemon-cxl_dram_event");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_dram_fail;
+
+	sprintf(buf, "REASON=%s", "CXL DRAM Event");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_dram_fail;
+
+	done = 1;
+
+cxl_dram_fail:
+
+	if (sockfd >= 0)
+		close(sockfd);
+
+	if (done)
+		return 0;
+	else
+		return -1;
+}
diff --git a/ras-report.h b/ras-report.h
index d9ec7df..1ad00e0 100644
--- a/ras-report.h
+++ b/ras-report.h
@@ -45,6 +45,7 @@ int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_ev
 int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev);
 int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev);
 int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev);
+int ras_report_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev);
 
 #else
 
@@ -62,6 +63,7 @@ static inline int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras
 static inline int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; };
 static inline int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; };
 static inline int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; };
+static inline int ras_report_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev) { return 0; };
 
 #endif
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 7/7] rasdaemon: Add support for the CXL memory module events
  2023-04-12  8:33 [RFC PATCH 0/7] rasdaemon: Process the generic CXL trace events shiju.jose
                   ` (5 preceding siblings ...)
  2023-04-12  8:33 ` [RFC PATCH 6/7] rasdaemon: Add support for the CXL dram events shiju.jose
@ 2023-04-12  8:33 ` shiju.jose
  6 siblings, 0 replies; 8+ messages in thread
From: shiju.jose @ 2023-04-12  8:33 UTC (permalink / raw)
  To: mchehab, linux-cxl, linux-edac; +Cc: jonathan.cameron, linuxarm, shiju.jose

From: Shiju Jose <shiju.jose@huawei.com>

Add support to log and record the CXL memory module events.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 ras-cxl-handler.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++
 ras-cxl-handler.h |   3 +
 ras-events.c      |   9 +++
 ras-events.h      |   1 +
 ras-record.c      |  84 +++++++++++++++++++++++++
 ras-record.h      |  17 +++++
 ras-report.c      | 103 ++++++++++++++++++++++++++++++
 ras-report.h      |   2 +
 8 files changed, 375 insertions(+)

diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c
index fadf5db..ca23b97 100644
--- a/ras-cxl-handler.c
+++ b/ras-cxl-handler.c
@@ -1008,3 +1008,159 @@ int ras_cxl_dram_event_handler(struct trace_seq *s,
 
 	return 0;
 }
+
+/*
+ * Memory Module Event Record - MMER
+ *
+ * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+static const char* cxl_dev_evt_type[] = {
+	"Health Status Change",
+	"Media Status Change",
+	"Life Used Change",
+	"Temperature Change",
+	"Data Path Error",
+	"LSA Error",
+};
+
+/*
+ * Device Health Information - DHI
+ *
+ * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+#define CXL_DHI_HS_MAINTENANCE_NEEDED				BIT(0)
+#define CXL_DHI_HS_PERFORMANCE_DEGRADED				BIT(1)
+#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED			BIT(2)
+
+static const struct cxl_event_flags cxl_health_status[] = {
+	{ .bit = CXL_DHI_HS_MAINTENANCE_NEEDED, .flag = "MAINTENANCE_NEEDED" },
+	{ .bit = CXL_DHI_HS_PERFORMANCE_DEGRADED, .flag = "PERFORMANCE_DEGRADED" },
+	{ .bit = CXL_DHI_HS_HW_REPLACEMENT_NEEDED, .flag = "REPLACEMENT_NEEDED" },
+};
+
+static const char* cxl_media_status[] = {
+	"Normal",
+	"Not Ready",
+	"Write Persistency Lost",
+	"All Data Lost",
+	"Write Persistency Loss in the Event of Power Loss",
+	"Write Persistency Loss in Event of Shutdown",
+	"Write Persistency Loss Imminent",
+	"All Data Loss in Event of Power Loss",
+	"All Data loss in the Event of Shutdown",
+	"All Data Loss Imminent",
+};
+
+static const char* cxl_two_bit_status[] = {
+	"Normal",
+	"Warning",
+	"Critical",
+};
+
+static const char* cxl_one_bit_status[] = {
+	"Normal",
+	"Warning",
+};
+
+#define CXL_DHI_AS_LIFE_USED(as)	(as & 0x3)
+#define CXL_DHI_AS_DEV_TEMP(as)		((as & 0xC) >> 2)
+#define CXL_DHI_AS_COR_VOL_ERR_CNT(as)	((as & 0x10) >> 4)
+#define CXL_DHI_AS_COR_PER_ERR_CNT(as)	((as & 0x20) >> 5)
+
+int ras_cxl_memory_module_event_handler(struct trace_seq *s,
+					struct tep_record *record,
+					struct tep_event *event, void *context)
+{
+	unsigned long long val;
+	struct ras_events *ras = context;
+	struct ras_cxl_memory_module_event ev;
+
+	memset(&ev, 0, sizeof(ev));
+	if (handle_ras_cxl_common_hdr(s, record, event, context, &ev.hdr) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "event_type", record, &val, 1) < 0)
+		return -1;
+	ev.event_type = val;
+	if (trace_seq_printf(s, "event_type:%s ", get_cxl_type_str(cxl_dev_evt_type,
+			     ARRAY_SIZE(cxl_dev_evt_type), ev.event_type)) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "health_status", record, &val, 1) < 0)
+		return -1;
+	ev.health_status = val;
+	if (trace_seq_printf(s, "health_status:") <= 0)
+		return -1;
+	if (decode_cxl_event_flags(s, ev.health_status, cxl_health_status,
+				   ARRAY_SIZE(cxl_health_status)) < 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "media_status", record, &val, 1) < 0)
+		return -1;
+	ev.media_status = val;
+	if (trace_seq_printf(s, "media_status:%s ", get_cxl_type_str(cxl_media_status,
+			     ARRAY_SIZE(cxl_media_status), ev.media_status)) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "add_status", record, &val, 1) < 0)
+		return -1;
+	ev.add_status = val;
+	if (trace_seq_printf(s, "as_life_used:%s ", get_cxl_type_str(cxl_two_bit_status,
+			     ARRAY_SIZE(cxl_two_bit_status),
+			     CXL_DHI_AS_LIFE_USED(ev.add_status))) <= 0)
+		return -1;
+	if (trace_seq_printf(s, "as_dev_temp:%s ", get_cxl_type_str(cxl_two_bit_status,
+			     ARRAY_SIZE(cxl_two_bit_status),
+			     CXL_DHI_AS_DEV_TEMP(ev.add_status))) <= 0)
+		return -1;
+	if (trace_seq_printf(s, "as_cor_vol_err_cnt:%s ", get_cxl_type_str(cxl_one_bit_status,
+			     ARRAY_SIZE(cxl_one_bit_status),
+			     CXL_DHI_AS_COR_VOL_ERR_CNT(ev.add_status))) <= 0)
+		return -1;
+	if (trace_seq_printf(s, "as_cor_per_err_cnt:%s ", get_cxl_type_str(cxl_one_bit_status,
+			     ARRAY_SIZE(cxl_one_bit_status),
+			     CXL_DHI_AS_COR_PER_ERR_CNT(ev.add_status))) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "life_used", record, &val, 1) < 0)
+		return -1;
+	ev.life_used = val;
+	if (trace_seq_printf(s, "life_used:%u ", ev.life_used) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "device_temp", record, &val, 1) < 0)
+		return -1;
+	ev.device_temp = val;
+	if (trace_seq_printf(s, "device_temp:%u ", ev.device_temp) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "dirty_shutdown_cnt", record, &val, 1) < 0)
+		return -1;
+	ev.dirty_shutdown_cnt = val;
+	if (trace_seq_printf(s, "dirty_shutdown_cnt:%u ", ev.dirty_shutdown_cnt) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "cor_vol_err_cnt", record, &val, 1) < 0)
+		return -1;
+	ev.cor_vol_err_cnt = val;
+	if (trace_seq_printf(s, "cor_vol_err_cnt:%u ", ev.cor_vol_err_cnt) <= 0)
+		return -1;
+
+	if (tep_get_field_val(s, event, "cor_per_err_cnt", record, &val, 1) < 0)
+		return -1;
+	ev.cor_per_err_cnt = val;
+	if (trace_seq_printf(s, "cor_per_err_cnt:%u ", ev.cor_per_err_cnt) <= 0)
+		return -1;
+
+	/* Insert data into the SGBD */
+#ifdef HAVE_SQLITE3
+	ras_store_cxl_memory_module_event(ras, &ev);
+#endif
+
+#ifdef HAVE_ABRT_REPORT
+	/* Report event to ABRT */
+	ras_report_cxl_memory_module_event(ras, &ev);
+#endif
+
+	return 0;
+}
diff --git a/ras-cxl-handler.h b/ras-cxl-handler.h
index 35455af..1ea0f93 100644
--- a/ras-cxl-handler.h
+++ b/ras-cxl-handler.h
@@ -41,4 +41,7 @@ int ras_cxl_general_media_event_handler(struct trace_seq *s,
 int ras_cxl_dram_event_handler(struct trace_seq *s,
 			       struct tep_record *record,
 			       struct tep_event *event, void *context);
+int ras_cxl_memory_module_event_handler(struct trace_seq *s,
+					struct tep_record *record,
+					struct tep_event *event, void *context);
 #endif
diff --git a/ras-events.c b/ras-events.c
index 00159e6..182f28f 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -252,6 +252,7 @@ int toggle_ras_mc_event(int enable)
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_generic_event", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_general_media", enable);
 	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_dram", enable);
+	rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_memory_module", enable);
 #endif
 
 free_ras:
@@ -1040,6 +1041,14 @@ int handle_ras_events(int record_events)
 	else
 		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
 		    "cxl", "cxl_dram");
+
+	rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_memory_module",
+			       ras_cxl_memory_module_event_handler, NULL, CXL_MEMORY_MODULE_EVENT);
+	if (!rc)
+		num_events++;
+	else
+		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
+		    "cxl", "memory_module");
 #endif
 
 	if (!num_events) {
diff --git a/ras-events.h b/ras-events.h
index 3fe28da..ccc1336 100644
--- a/ras-events.h
+++ b/ras-events.h
@@ -46,6 +46,7 @@ enum {
 	CXL_GENERIC_EVENT,
 	CXL_GENERAL_MEDIA_EVENT,
 	CXL_DRAM_EVENT,
+	CXL_MEMORY_MODULE_EVENT,
 	NR_EVENTS
 };
 
diff --git a/ras-record.c b/ras-record.c
index 36f43cf..89fca74 100644
--- a/ras-record.c
+++ b/ras-record.c
@@ -992,6 +992,74 @@ int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *
 
 	return rc;
 }
+
+/*
+ * Table and functions to handle cxl:cxl_memory_module_event
+ */
+static const struct db_fields cxl_memory_module_event_fields[] = {
+	{ .name = "id",			.type = "INTEGER PRIMARY KEY" },
+	{ .name = "timestamp",		.type = "TEXT" },
+	{ .name = "memdev",		.type = "TEXT" },
+	{ .name = "host",		.type = "TEXT" },
+	{ .name = "serial",		.type = "INTEGER" },
+	{ .name = "log_type",		.type = "TEXT" },
+	{ .name = "hdr_uuid",		.type = "TEXT" },
+	{ .name = "hdr_flags",		.type = "INTEGER" },
+	{ .name = "hdr_handle",		.type = "INTEGER" },
+	{ .name = "hdr_related_handle",	.type = "INTEGER" },
+	{ .name = "hdr_ts",		.type = "TEXT" },
+	{ .name = "hdr_length",		.type = "INTEGER" },
+	{ .name = "hdr_maint_op_class",	.type = "INTEGER" },
+	{ .name = "event_type",		.type = "INTEGER" },
+	{ .name = "health_status",	.type = "INTEGER" },
+	{ .name = "media_status",	.type = "INTEGER" },
+	{ .name = "life_used",		.type = "INTEGER" },
+	{ .name = "dirty_shutdown_cnt",	.type = "INTEGER" },
+	{ .name = "cor_vol_err_cnt",	.type = "INTEGER" },
+	{ .name = "cor_per_err_cnt",	.type = "INTEGER" },
+	{ .name = "device_temp",	.type = "INTEGER" },
+	{ .name = "add_status",		.type = "INTEGER" },
+};
+
+static const struct db_table_descriptor cxl_memory_module_event_tab = {
+	.name = "cxl_memory_module_event",
+	.fields = cxl_memory_module_event_fields,
+	.num_fields = ARRAY_SIZE(cxl_memory_module_event_fields),
+};
+
+int ras_store_cxl_memory_module_event(struct ras_events *ras, struct ras_cxl_memory_module_event *ev)
+{
+	int rc;
+	struct sqlite3_priv *priv = ras->db_priv;
+
+	if (!priv || !priv->stmt_cxl_memory_module_event)
+		return 0;
+	log(TERM, LOG_INFO, "cxl_memory_module_event store: %p\n",
+	    priv->stmt_cxl_memory_module_event);
+
+	ras_store_cxl_common_hdr(priv->stmt_cxl_memory_module_event, &ev->hdr);
+	sqlite3_bind_int(priv->stmt_cxl_memory_module_event, 13, ev->event_type);
+	sqlite3_bind_int(priv->stmt_cxl_memory_module_event, 14, ev->health_status);
+	sqlite3_bind_int(priv->stmt_cxl_memory_module_event, 15, ev->media_status);
+	sqlite3_bind_int(priv->stmt_cxl_memory_module_event, 16, ev->life_used);
+	sqlite3_bind_int(priv->stmt_cxl_memory_module_event, 17, ev->dirty_shutdown_cnt);
+	sqlite3_bind_int(priv->stmt_cxl_memory_module_event, 18, ev->cor_vol_err_cnt);
+	sqlite3_bind_int(priv->stmt_cxl_memory_module_event, 19, ev->cor_per_err_cnt);
+	sqlite3_bind_int(priv->stmt_cxl_memory_module_event, 20, ev->device_temp);
+	sqlite3_bind_int(priv->stmt_cxl_memory_module_event, 21, ev->add_status);
+
+	rc = sqlite3_step(priv->stmt_cxl_memory_module_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed to do stmt_cxl_memory_module_event step on sqlite: error = %d\n", rc);
+	rc = sqlite3_reset(priv->stmt_cxl_memory_module_event);
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
+		log(TERM, LOG_ERR,
+		    "Failed reset stmt_cxl_memory_module_event on sqlite: error = %d\n", rc);
+	log(TERM, LOG_INFO, "register inserted at db\n");
+
+	return rc;
+}
 #endif
 
 /*
@@ -1387,6 +1455,14 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras)
 		if (rc != SQLITE_OK)
 			goto error;
 	}
+
+	rc = ras_mc_create_table(priv, &cxl_memory_module_event_tab);
+	if (rc == SQLITE_OK) {
+		rc = ras_mc_prepare_stmt(priv, &priv->stmt_cxl_memory_module_event,
+					 &cxl_memory_module_event_tab);
+		if (rc != SQLITE_OK)
+			goto error;
+	}
 #endif
 
 	ras->db_priv = priv;
@@ -1557,6 +1633,14 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras)
 			    "cpu %u: Failed to finalize cxl_dram_event sqlite: error = %d\n",
 			    cpu, rc);
 	}
+
+	if (priv->stmt_cxl_memory_module_event) {
+		rc = sqlite3_finalize(priv->stmt_cxl_memory_module_event);
+		if (rc != SQLITE_OK)
+			log(TERM, LOG_ERR,
+			    "cpu %u: Failed to finalize stmt_cxl_memory_module_event sqlite: error = %d\n",
+			    cpu, rc);
+	}
 #endif
 
 	rc = sqlite3_close_v2(db);
diff --git a/ras-record.h b/ras-record.h
index 480ff92..a7b9ab9 100644
--- a/ras-record.h
+++ b/ras-record.h
@@ -218,6 +218,19 @@ struct ras_cxl_dram_event {
 	uint16_t validity_flags;
 };
 
+struct ras_cxl_memory_module_event {
+	struct ras_cxl_event_common_hdr hdr;
+	uint8_t event_type;
+	uint8_t health_status;
+	uint8_t media_status;
+	uint8_t life_used;
+	uint32_t dirty_shutdown_cnt;
+	uint32_t cor_vol_err_cnt;
+	uint32_t cor_per_err_cnt;
+	int16_t device_temp;
+	uint8_t add_status;
+};
+
 struct ras_mc_event;
 struct ras_aer_event;
 struct ras_extlog_event;
@@ -234,6 +247,7 @@ struct ras_cxl_overflow_event;
 struct ras_cxl_generic_event;
 struct ras_cxl_general_media_event;
 struct ras_cxl_dram_event;
+struct ras_cxl_memory_module_event;
 
 #ifdef HAVE_SQLITE3
 
@@ -274,6 +288,7 @@ struct sqlite3_priv {
 	sqlite3_stmt	*stmt_cxl_generic_event;
 	sqlite3_stmt	*stmt_cxl_general_media_event;
 	sqlite3_stmt	*stmt_cxl_dram_event;
+	sqlite3_stmt	*stmt_cxl_memory_module_event;
 #endif
 };
 
@@ -309,6 +324,7 @@ int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow
 int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev);
 int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev);
 int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev);
+int ras_store_cxl_memory_module_event(struct ras_events *ras, struct ras_cxl_memory_module_event *ev);
 
 #else
 static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; };
@@ -329,6 +345,7 @@ static inline int ras_store_cxl_overflow_event(struct ras_events *ras, struct ra
 static inline int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; };
 static inline int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; };
 static inline int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev) { return 0; };
+static inline int ras_store_cxl_memory_module_event(struct ras_events *ras, struct ras_cxl_memory_module_event *ev) { return 0; };
 
 #endif
 
diff --git a/ras-report.c b/ras-report.c
index 21180b1..a30b66d 100644
--- a/ras-report.c
+++ b/ras-report.c
@@ -605,6 +605,62 @@ static int set_cxl_dram_event_backtrace(char *buf, struct ras_cxl_dram_event *ev
 	return 0;
 }
 
+static int set_cxl_memory_module_event_backtrace(char *buf, struct ras_cxl_memory_module_event *ev)
+{
+	char bt_buf[MAX_BACKTRACE_SIZE];
+
+	if (!buf || !ev)
+		return -1;
+
+	sprintf(bt_buf, "BACKTRACE="	\
+						"timestamp=%s\n"	\
+						"memdev=%s\n"		\
+						"host=%s\n"		\
+						"serial=0x%lx\n"	\
+						"log_type=%s\n"		\
+						"hdr_uuid=%s\n"		\
+						"hdr_flags=0x%x\n"	\
+						"hdr_handle=0x%x\n"	\
+						"hdr_related_handle=0x%x\n"	\
+						"hdr_timestamp=%s\n"	\
+						"hdr_length=%u\n"	\
+						"hdr_maint_op_class=%u\n"	\
+						"event_type=%u\n"	\
+						"health_status=%u\n"	\
+						"media_status=%u\n"	\
+						"life_used=%u\n"	\
+						"dirty_shutdown_cnt=%u\n"	\
+						"cor_vol_err_cnt=%u\n"	\
+						"cor_per_err_cnt=%u\n"	\
+						"device_temp=%d\n"	\
+						"add_status=%u\n",	\
+						ev->hdr.timestamp,	\
+						ev->hdr.memdev,		\
+						ev->hdr.host,		\
+						ev->hdr.serial,		\
+						ev->hdr.log_type,	\
+						ev->hdr.hdr_uuid,	\
+						ev->hdr.hdr_flags,	\
+						ev->hdr.hdr_handle,	\
+						ev->hdr.hdr_related_handle,	\
+						ev->hdr.hdr_timestamp,	\
+						ev->hdr.hdr_length,	\
+						ev->hdr.hdr_maint_op_class,	\
+						ev->event_type,		\
+						ev->health_status,	\
+						ev->media_status,	\
+						ev->life_used,		\
+						ev->dirty_shutdown_cnt,	\
+						ev->cor_vol_err_cnt,	\
+						ev->cor_per_err_cnt,	\
+						ev->device_temp,	\
+						ev->add_status);
+
+	strcat(buf, bt_buf);
+
+	return 0;
+}
+
 static int commit_report_backtrace(int sockfd, int type, void *ev){
 	char buf[MAX_BACKTRACE_SIZE];
 	char *pbuf = buf;
@@ -663,6 +719,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){
 	case CXL_DRAM_EVENT:
 		rc = set_cxl_dram_event_backtrace(buf, (struct ras_cxl_dram_event *)ev);
 		break;
+	case CXL_MEMORY_MODULE_EVENT:
+		rc = set_cxl_memory_module_event_backtrace(buf, (struct ras_cxl_memory_module_event *)ev);
+		break;
 	default:
 		return -1;
 	}
@@ -1380,3 +1439,47 @@ cxl_dram_fail:
 	else
 		return -1;
 }
+
+int ras_report_cxl_memory_module_event(struct ras_events *ras, struct ras_cxl_memory_module_event *ev)
+{
+	char buf[MAX_MESSAGE_SIZE];
+	int sockfd = 0;
+	int done = 0;
+	int rc = -1;
+
+	memset(buf, 0, sizeof(buf));
+
+	sockfd = setup_report_socket();
+	if (sockfd < 0)
+		return -1;
+
+	rc = commit_report_basic(sockfd);
+	if (rc < 0)
+		goto cxl_memory_module_fail;
+
+	rc = commit_report_backtrace(sockfd, CXL_MEMORY_MODULE_EVENT, ev);
+	if (rc < 0)
+		goto cxl_memory_module_fail;
+
+	sprintf(buf, "ANALYZER=%s", "rasdaemon-cxl_memory_module_event");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_memory_module_fail;
+
+	sprintf(buf, "REASON=%s", "CXL Memory Module Event");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if (rc < strlen(buf) + 1)
+		goto cxl_memory_module_fail;
+
+	done = 1;
+
+cxl_memory_module_fail:
+
+	if (sockfd >= 0)
+		close(sockfd);
+
+	if (done)
+		return 0;
+	else
+		return -1;
+}
diff --git a/ras-report.h b/ras-report.h
index 1ad00e0..e401850 100644
--- a/ras-report.h
+++ b/ras-report.h
@@ -46,6 +46,7 @@ int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflo
 int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev);
 int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev);
 int ras_report_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev);
+int ras_report_cxl_memory_module_event(struct ras_events *ras, struct ras_cxl_memory_module_event *ev);
 
 #else
 
@@ -64,6 +65,7 @@ static inline int ras_report_cxl_overflow_event(struct ras_events *ras, struct r
 static inline int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; };
 static inline int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; };
 static inline int ras_report_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev) { return 0; };
+static inline int ras_report_cxl_memory_module_event(struct ras_events *ras, struct ras_cxl_memory_module_event *ev) { return 0; };
 
 #endif
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-04-12  9:23 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-12  8:33 [RFC PATCH 0/7] rasdaemon: Process the generic CXL trace events shiju.jose
2023-04-12  8:33 ` [RFC PATCH 1/7] rasdaemon: Add common function to convert timestamp in the CXL event records to the broken-down time format shiju.jose
2023-04-12  8:33 ` [RFC PATCH 2/7] rasdaemon: Add common function to get timestamp for the event shiju.jose
2023-04-12  8:33 ` [RFC PATCH 3/7] rasdaemon: Add support for the CXL overflow events shiju.jose
2023-04-12  8:33 ` [RFC PATCH 4/7] rasdaemon: Add support for the CXL generic events shiju.jose
2023-04-12  8:33 ` [RFC PATCH 5/7] rasdaemon: Add support for the CXL general media events shiju.jose
2023-04-12  8:33 ` [RFC PATCH 6/7] rasdaemon: Add support for the CXL dram events shiju.jose
2023-04-12  8:33 ` [RFC PATCH 7/7] rasdaemon: Add support for the CXL memory module events shiju.jose

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).