linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4] ACPI: APEI: fix missing erst record id
@ 2022-04-08  8:27 Liu Xinpeng
  2022-04-08 19:24 ` Luck, Tony
  0 siblings, 1 reply; 2+ messages in thread
From: Liu Xinpeng @ 2022-04-08  8:27 UTC (permalink / raw)
  To: rafael, dave.hansen, x86, hpa, keescook, anton, ccross,
	robert.moore, tony.luck, lenb, james.morse, bp, tglx, mingo,
	ying.huang, gong.chen
  Cc: linux-kernel, linux-acpi, Liu Xinpeng

Read a record is cleared by others, but the deleted record cache entry is
still created by erst_get_record_id_next. When next enumerate the records,
get the cached deleted record, then erst_read return -ENOENT and try to
get next record, loop back to first ID will return 0 in function
__erst_record_id_cache_add_one and then set record_id as
APEI_ERST_INVALID_RECORD_ID, finished this time read operation.
It will result in read the records just in the cache hereafter.

This patch cleared the deleted record cache, fix the issue that
"./erst-inject -p" shows record counts not equal to "./erst-inject -n".

A reproducer of the problem(retry many times):

[root@localhost erst-inject]# ./erst-inject -c 0xaaaaa00011
[root@localhost erst-inject]# ./erst-inject -p
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00012
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00013
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00014
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000006
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000007
[root@localhost erst-inject]# ./erst-inject -i 0xaaaaa000008
[root@localhost erst-inject]# ./erst-inject -p
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00012
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00013
rc: 273
rcd sig: CPER
rcd id: 0xaaaaa00014
[root@localhost erst-inject]# ./erst-inject -n
total error record count: 6

v1->v2  fix style problems
v2->v3  fix apei_read_mce called erst_get_record_id_next and modify
the commit message.
v3->v4  add erst_clear_cache in another retry.

Signed-off-by: Liu Xinpeng <liuxp11@chinatelecom.cn>
---
 arch/x86/kernel/cpu/mce/apei.c |  9 ++++++---
 drivers/acpi/apei/erst-dbg.c   |  4 +++-
 drivers/acpi/apei/erst.c       | 34 +++++++++++++++++++++++++++++++---
 include/acpi/apei.h            |  1 +
 4 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 0e3ae64d3b76..d77de72a91d2 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -179,14 +179,17 @@ ssize_t apei_read_mce(struct mce *m, u64 *record_id)
 		goto out;
 	rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
 	/* someone else has cleared the record, try next one */
-	if (rc == -ENOENT)
+	if (rc == -ENOENT) {
+		erst_clear_cache(*record_id);
 		goto retry;
-	else if (rc < 0)
+	} else if (rc < 0)
 		goto out;
 	/* try to skip other type records in storage */
 	else if (rc != sizeof(rcd) ||
-		 !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
+		 !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE)) {
+		erst_clear_cache(*record_id);
 		goto retry;
+	}
 	memcpy(m, &rcd.mce, sizeof(*m));
 	rc = sizeof(*m);
 out:
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index c740f0faad39..5b8164280a17 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -113,8 +113,10 @@ static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
 retry:
 	rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
 	/* The record may be cleared by others, try read next record */
-	if (rc == -ENOENT)
+	if (rc == -ENOENT) {
+		erst_clear_cache(id);
 		goto retry_next;
+	}
 	if (rc < 0)
 		goto out;
 	if (len > ERST_DBG_RECORD_LEN_MAX) {
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 698d67cee052..07d69dc7fd62 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -856,6 +856,31 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
 }
 EXPORT_SYMBOL_GPL(erst_read);
 
+int erst_clear_cache(u64 record_id)
+{
+	int rc, i;
+	u64 *entries;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+	if (rc)
+		return rc;
+
+	entries = erst_record_id_cache.entries;
+	for (i = 0; i < erst_record_id_cache.len; i++) {
+		if (entries[i] == record_id)
+			entries[i] = APEI_ERST_INVALID_RECORD_ID;
+	}
+	__erst_record_id_cache_compact();
+
+	mutex_unlock(&erst_record_id_cache.lock);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(erst_clear_cache);
+
 int erst_clear(u64 record_id)
 {
 	int rc, i;
@@ -998,14 +1023,17 @@ static ssize_t erst_reader(struct pstore_record *record)
 
 	len = erst_read(record_id, &rcd->hdr, rcd_len);
 	/* The record may be cleared by others, try read next record */
-	if (len == -ENOENT)
+	if (len == -ENOENT) {
+		erst_clear_cache(record_id);
 		goto skip;
-	else if (len < 0 || len < sizeof(*rcd)) {
+	} else if (len < 0 || len < sizeof(*rcd)) {
 		rc = -EIO;
 		goto out;
 	}
-	if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE))
+	if (!guid_equal(&rcd->hdr.creator_id, &CPER_CREATOR_PSTORE)) {
+		erst_clear_cache(record_id);
 		goto skip;
+	}
 
 	record->buf = kmalloc(len, GFP_KERNEL);
 	if (record->buf == NULL) {
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index afaca3a075e8..f8c11ff4115a 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -47,6 +47,7 @@ void erst_get_record_id_end(void);
 ssize_t erst_read(u64 record_id, struct cper_record_header *record,
 		  size_t buflen);
 int erst_clear(u64 record_id);
+int erst_clear_cache(u64 record_id);
 
 int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
 void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* RE: [PATCH v4] ACPI: APEI: fix missing erst record id
  2022-04-08  8:27 [PATCH v4] ACPI: APEI: fix missing erst record id Liu Xinpeng
@ 2022-04-08 19:24 ` Luck, Tony
  0 siblings, 0 replies; 2+ messages in thread
From: Luck, Tony @ 2022-04-08 19:24 UTC (permalink / raw)
  To: liuxp11, rafael, dave.hansen, x86, hpa, keescook, anton, ccross,
	Moore, Robert, lenb, james.morse, bp, tglx, mingo, Huang, Ying,
	gong.chen
  Cc: linux-kernel, linux-acpi

I think it might be a cleaner solution if you first implement a new function:

/*
 * Like erst_read(), but look for a specific record type (by size and guid)
 * also retry -ENOENT returns by skipping to next record_id
 */
ssize_t erst_read_record(u64 *record_id, struct cper_record_header *record,
			size_t buflen, size_t recordlen, const guid_t *guid)
{
}

Step 2: Update apei_read_mce() and erst_reader() to use this function.

Step 3: Apply your erst_clear_cache() fix to the new function

-Tony

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-04-08 19:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-08  8:27 [PATCH v4] ACPI: APEI: fix missing erst record id Liu Xinpeng
2022-04-08 19:24 ` Luck, Tony

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).