linux-edac.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yazen Ghannam <yazen.ghannam@amd.com>
To: <bp@alien8.de>, <tony.luck@intel.com>, <linux-edac@vger.kernel.org>
Cc: <linux-kernel@vger.kernel.org>, <avadhut.naik@amd.com>,
	<john.allen@amd.com>, <muralidhara.mk@amd.com>,
	<sathyapriya.k@amd.com>, <naveenkrishna.chatradhi@amd.com>,
	Yazen Ghannam <yazen.ghannam@amd.com>
Subject: [PATCH v2 2/3] RAS/AMD/FMPM: Save SPA values
Date: Fri, 1 Mar 2024 08:37:47 -0600	[thread overview]
Message-ID: <20240301143748.854090-3-yazen.ghannam@amd.com> (raw)
In-Reply-To: <20240301143748.854090-1-yazen.ghannam@amd.com>

The system physical address (SPA) of an error is not a stable value. It
will change depending on the location of the memory: parts can be
swapped. And it will change depending on memory topology: NUMA nodes
and/or interleaving can be adjusted.

Therefore, the SPA value is not part of the "FRU Memory Poison" record
format. And it will not be saved to persistent storage.

However, the SPA values can be helpful during debug and for system
admins during run time.

Save the SPA values in a separate structure. This is updated when
records are restored and when new errors are saved.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
Link:
https://lore.kernel.org/r/20240226152941.2615007-3-yazen.ghannam@amd.com

v1->v2:
* Changed variable names to remove "sys_" prefix. (Boris)
* Used "spa_" prefix to highlight that these are for SPA values. (Yazen)
* Added warning to "index out-of-bound" condition. (Boris)
* Reworked save_spa() flow to get a valid array position before saving
  SPA value (Yazen).

 drivers/ras/amd/fmpm.c | 68 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c
index 80dd112b720a..a7bb36eb60cb 100644
--- a/drivers/ras/amd/fmpm.c
+++ b/drivers/ras/amd/fmpm.c
@@ -111,6 +111,11 @@ struct fru_rec {
  */
 static struct fru_rec **fru_records;
 
+/* system physical addresses array */
+static u64 *spa_entries;
+
+#define INVALID_SPA	~0ULL
+
 #define CPER_CREATOR_FMP						\
 	GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3,	\
 		  0xa0, 0x33, 0x08, 0x75)
@@ -140,6 +145,9 @@ static unsigned int max_nr_fru;
 /* Total length of record including headers and list of descriptor entries. */
 static size_t max_rec_len;
 
+/* Total number of SPA entries across all FRUs. */
+static unsigned int spa_nr_entries;
+
 /*
  * Protect the local records cache in fru_records and prevent concurrent
  * writes to storage. This is only needed after init once notifier block
@@ -269,6 +277,52 @@ static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd)
 	return false;
 }
 
+static void save_spa(struct fru_rec *rec, unsigned int entry,
+		     u64 addr, u64 id, unsigned int cpu)
+{
+	unsigned int i, fru_idx, spa_entry;
+	struct atl_err a_err;
+	unsigned long spa;
+
+	if (entry >= max_nr_entries) {
+		pr_warn_once("entry out-of-bounds\n");
+		return;
+	}
+
+	for (i = 0; i < spa_nr_entries; i += max_nr_entries) {
+		fru_idx = i / max_nr_entries;
+		if (fru_records[fru_idx] == rec)
+			break;
+	}
+
+	if (i >= spa_nr_entries) {
+		pr_warn_once("record not found");
+		return;
+	}
+
+	spa_entry = i + entry;
+	if (spa_entry >= spa_nr_entries) {
+		pr_warn_once("spa_entries[] index out-of-bounds\n");
+		return;
+	}
+
+	memset(&a_err, 0, sizeof(struct atl_err));
+
+	a_err.addr = addr;
+	a_err.ipid = id;
+	a_err.cpu  = cpu;
+
+	spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
+	if (IS_ERR_VALUE(spa)) {
+		pr_debug("Failed to get system address\n");
+		return;
+	}
+
+	spa_entries[spa_entry] = spa;
+	pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n",
+		 fru_idx, entry, spa_entry, spa_entries[spa_entry]);
+}
+
 static void update_fru_record(struct fru_rec *rec, struct mce *m)
 {
 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
@@ -301,6 +355,7 @@ static void update_fru_record(struct fru_rec *rec, struct mce *m)
 	entry  = fmp->nr_entries;
 
 save_fpd:
+	save_spa(rec, entry, m->addr, m->ipid, m->extcpu);
 	fpd_dest  = &rec->entries[entry];
 	memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc));
 
@@ -385,6 +440,7 @@ static void retire_mem_fmp(struct fru_rec *rec)
 			continue;
 
 		retire_dram_row(fpd->addr, fpd->hw_id, err_cpu);
+		save_spa(rec, i, fpd->addr, fpd->hw_id, err_cpu);
 	}
 }
 
@@ -696,6 +752,8 @@ static int get_system_info(void)
 	if (!max_nr_entries)
 		max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES;
 
+	spa_nr_entries = max_nr_fru * max_nr_entries;
+
 	max_rec_len  = sizeof(struct fru_rec);
 	max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries;
 
@@ -714,6 +772,7 @@ static void free_records(void)
 		kfree(rec);
 
 	kfree(fru_records);
+	kfree(spa_entries);
 }
 
 static int allocate_records(void)
@@ -734,6 +793,15 @@ static int allocate_records(void)
 		}
 	}
 
+	spa_entries = kcalloc(spa_nr_entries, sizeof(u64), GFP_KERNEL);
+	if (!spa_entries) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	for (i = 0; i < spa_nr_entries; i++)
+		spa_entries[i] = INVALID_SPA;
+
 	return ret;
 
 out_free:
-- 
2.34.1


  parent reply	other threads:[~2024-03-01 14:38 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-01 14:37 [PATCH v2 0/3] FMPM Debug Updates Yazen Ghannam
2024-03-01 14:37 ` [PATCH v2 1/3] RAS: Export helper to get ras_debugfs_dir Yazen Ghannam
2024-03-01 14:37 ` Yazen Ghannam [this message]
2024-03-01 15:50   ` [PATCH v2 2/3] RAS/AMD/FMPM: Save SPA values Borislav Petkov
2024-03-01 14:37 ` [PATCH v2 3/3] RAS/AMD/FMPM: Add debugfs interface to print record entries Yazen Ghannam
2024-03-02  9:49 ` [PATCH v2 0/3] FMPM Debug Updates Borislav Petkov
2024-03-04  5:13 ` M K, Muralidhara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240301143748.854090-3-yazen.ghannam@amd.com \
    --to=yazen.ghannam@amd.com \
    --cc=avadhut.naik@amd.com \
    --cc=bp@alien8.de \
    --cc=john.allen@amd.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=muralidhara.mk@amd.com \
    --cc=naveenkrishna.chatradhi@amd.com \
    --cc=sathyapriya.k@amd.com \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).