From: Borislav Petkov <bp@alien8.de>
To: Tony Luck <tony.luck@intel.com>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>, X86 ML <x86@kernel.org>,
LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 5/9] x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
Date: Tue, 7 Apr 2020 18:34:10 +0200 [thread overview]
Message-ID: <20200407163414.18058-6-bp@alien8.de> (raw)
In-Reply-To: <20200407163414.18058-1-bp@alien8.de>
From: Tony Luck <tony.luck@intel.com>
If the handler took any action to log or deal with the error, set a bit
in mce->kflags so that the default handler on the end of the machine
check chain can see what has been done.
Get rid of NOTIFY_STOP returns. Make the EDAC and dev-mcelog handlers
skip over errors already processed by CEC.
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200214222720.13168-5-tony.luck@intel.com
---
arch/x86/kernel/cpu/mce/core.c | 4 +++-
arch/x86/kernel/cpu/mce/dev-mcelog.c | 5 +++++
drivers/acpi/acpi_extlog.c | 5 +++--
drivers/acpi/nfit/mce.c | 1 +
drivers/edac/i7core_edac.c | 5 +++--
drivers/edac/mce_amd.c | 6 +++++-
drivers/edac/pnd2_edac.c | 5 +++--
drivers/edac/sb_edac.c | 5 ++++-
drivers/edac/skx_common.c | 4 ++++
drivers/ras/cec.c | 9 ++++++---
10 files changed, 37 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b033b3589630..5666a48a4bc9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -581,8 +581,10 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
return NOTIFY_DONE;
pfn = mce->addr >> PAGE_SHIFT;
- if (!memory_failure(pfn, 0))
+ if (!memory_failure(pfn, 0)) {
set_mce_nospec(pfn);
+ mce->kflags |= MCE_HANDLED_UC;
+ }
return NOTIFY_OK;
}
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index d089567a9ce8..c033e7ea9e3c 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -39,6 +39,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
struct mce *mce = (struct mce *)data;
unsigned int entry;
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
mutex_lock(&mce_chrdev_read_mutex);
entry = mcelog->next;
@@ -56,6 +59,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
mcelog->entry[entry].finished = 1;
+ mcelog->entry[entry].kflags = 0;
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
@@ -63,6 +67,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
unlock:
mutex_unlock(&mce_chrdev_read_mutex);
+ mce->kflags |= MCE_HANDLED_MCELOG;
return NOTIFY_OK;
}
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 8596a106a933..9cc3c1f92db5 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -146,7 +146,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
static u32 err_seq;
estatus = extlog_elog_entry_check(cpu, bank);
- if (estatus == NULL)
+ if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
return NOTIFY_DONE;
memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
@@ -176,7 +176,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
}
out:
- return NOTIFY_STOP;
+ mce->kflags |= MCE_HANDLED_EXTLOG;
+ return NOTIFY_OK;
}
static bool __init extlog_get_l1addr(void)
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index f0ae48515b48..ee8d9973f60b 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -76,6 +76,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
*/
acpi_nfit_ars_rescan(acpi_desc, 0);
}
+ mce->kflags |= MCE_HANDLED_NFIT;
break;
}
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index b3135b208f9a..5860ca41185c 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1815,7 +1815,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
struct mem_ctl_info *mci;
i7_dev = get_i7core_dev(mce->socketid);
- if (!i7_dev)
+ if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC))
return NOTIFY_DONE;
mci = i7_dev->mci;
@@ -1834,7 +1834,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
i7core_check_error(mci, mce);
/* Advise mcelog that the errors were handled */
- return NOTIFY_STOP;
+ mce->kflags |= MCE_HANDLED_EDAC;
+ return NOTIFY_OK;
}
static struct notifier_block i7_mce_dec = {
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index e58644d9c92b..2b5401db56ad 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1046,6 +1046,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
unsigned int fam = x86_family(m->cpuid);
int ecc;
+ if (m->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
pr_emerg(HW_ERR "%s\n", decode_error_status(m));
pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
@@ -1146,7 +1149,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
err_code:
amd_decode_err_code(m->status & 0xffff);
- return NOTIFY_STOP;
+ m->kflags |= MCE_HANDLED_EDAC;
+ return NOTIFY_OK;
}
static struct notifier_block amd_mce_dec_nb = {
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index bc47328eb485..1929a5dc8f94 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1400,7 +1400,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
return NOTIFY_DONE;
mci = pnd2_mci;
- if (!mci)
+ if (!mci || (mce->kflags & MCE_HANDLED_CEC))
return NOTIFY_DONE;
/*
@@ -1429,7 +1429,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
pnd2_mce_output_error(mci, mce, &daddr);
/* Advice mcelog that the error were handled */
- return NOTIFY_STOP;
+ mce->kflags |= MCE_HANDLED_EDAC;
+ return NOTIFY_OK;
}
static struct notifier_block pnd2_mce_dec = {
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 7d51c82be62b..f790f7d08688 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3136,6 +3136,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
/*
* Just let mcelog handle it if the error is
@@ -3183,7 +3185,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
sbridge_mce_output_error(mci, mce);
/* Advice mcelog that the error were handled */
- return NOTIFY_STOP;
+ mce->kflags |= MCE_HANDLED_EDAC;
+ return NOTIFY_OK;
}
static struct notifier_block sbridge_mce_dec = {
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 99bbaf629b8d..6f08a12f6b11 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -577,6 +577,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
/* ignore unless this is memory related with an address */
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE;
@@ -616,6 +619,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
skx_mce_output_error(mci, mce, &res);
+ mce->kflags |= MCE_HANDLED_EDAC;
return NOTIFY_DONE;
}
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 6b42040bf956..569d9ad2c594 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -538,9 +538,12 @@ static int cec_notifier(struct notifier_block *nb, unsigned long val,
/* We eat only correctable DRAM errors with usable addresses. */
if (mce_is_memory_error(m) &&
mce_is_correctable(m) &&
- mce_usable_address(m))
- if (!cec_add_elem(m->addr >> PAGE_SHIFT))
- return NOTIFY_STOP;
+ mce_usable_address(m)) {
+ if (!cec_add_elem(m->addr >> PAGE_SHIFT)) {
+ m->kflags |= MCE_HANDLED_CEC;
+ return NOTIFY_OK;
+ }
+ }
return NOTIFY_DONE;
}
--
2.21.0
next prev parent reply other threads:[~2020-04-07 16:34 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-12 20:46 [RFC PATCH 0/5] New way to track mce notifier chain actions Tony Luck
2020-02-12 20:46 ` [PATCH 1/5] x86/mce: Rename "first" function as "early" Tony Luck
2020-02-12 20:46 ` [PATCH 2/5] x86/mce: Convert corrected error collector to use mce notifier Tony Luck
2020-02-12 20:46 ` [PATCH 3/5] x86/mce: Add new "handled" field to "struct mce" Tony Luck
2020-02-13 16:56 ` Borislav Petkov
2020-02-13 22:09 ` Luck, Tony
2020-02-14 8:50 ` Borislav Petkov
2020-02-12 20:46 ` [PATCH 4/5] x86/mce: Fix all mce notifiers to update the mce->handled bitmask Tony Luck
2020-02-13 17:03 ` Borislav Petkov
2020-02-13 22:19 ` Luck, Tony
2020-02-13 22:27 ` Andy Lutomirski
2020-02-13 23:08 ` Luck, Tony
2020-02-14 9:02 ` Borislav Petkov
2020-02-14 0:18 ` Thomas Gleixner
2020-02-14 8:59 ` Borislav Petkov
2020-02-12 20:46 ` [PATCH 5/5] x86/mce: Change default mce logger to check mce->handled Tony Luck
2020-02-13 17:08 ` Borislav Petkov
2020-02-13 22:27 ` Luck, Tony
2020-02-14 9:05 ` Borislav Petkov
2020-02-12 23:08 ` [RFC PATCH 0/5] New way to track mce notifier chain actions Luck, Tony
2020-02-13 5:52 ` Andy Lutomirski
2020-02-13 6:09 ` Borislav Petkov
2020-02-13 16:05 ` Andy Lutomirski
2020-02-14 22:27 ` [PATCH v2 0/7] " Tony Luck
2020-02-14 22:27 ` [PATCH v2 1/7] x86/mce: Rename "first" function as "early" Tony Luck
2020-04-15 9:49 ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-02-14 22:27 ` [PATCH v2 2/7] x86/mce: Convert corrected error collector to use mce notifier Tony Luck
2020-04-15 9:49 ` [tip: ras/core] x86/mce: Convert the CEC to use the MCE notifier tip-bot2 for Tony Luck
2020-02-14 22:27 ` [PATCH v2 3/7] x86/mce: Add new "kflags" field to "struct mce" Tony Luck
2020-04-15 9:49 ` [tip: ras/core] x86/mce: Add a struct mce.kflags field tip-bot2 for Tony Luck
2020-04-15 18:19 ` Luck, Tony
2020-04-15 18:36 ` Borislav Petkov
2020-04-15 19:58 ` [PATCH] x86/mce: Drop bogus comment about mce.kflags Luck, Tony
2020-04-17 9:21 ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-04-20 8:06 ` [tip: ras/core] x86/mce: Add a struct mce.kflags field Christoph Hellwig
2020-04-20 8:42 ` Borislav Petkov
2020-02-14 22:27 ` [PATCH v2 4/7] x86/mce: Fix all mce notifiers to update the mce->kflags bitmask Tony Luck
2020-04-07 8:21 ` Borislav Petkov
2020-04-15 9:49 ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-02-14 22:27 ` [PATCH v2 5/7] x86/mce: Change default mce logger to check mce->kflags Tony Luck
2020-04-07 11:10 ` Borislav Petkov
2020-04-07 16:43 ` Luck, Tony
2020-04-07 19:37 ` Borislav Petkov
2020-04-07 19:44 ` Luck, Tony
2020-04-15 9:49 ` [tip: ras/core] x86/mce: Change default MCE " tip-bot2 for Tony Luck
2020-02-14 22:27 ` [PATCH v2 6/7] x86/mce: Add mce=print_all option Tony Luck
2020-04-15 9:49 ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-02-14 22:27 ` [PATCH v2 7/7] x86/mce: Drop the EDAC report status checks Tony Luck
2020-04-15 9:49 ` [tip: ras/core] EDAC: " tip-bot2 for Tony Luck
2020-04-07 16:34 ` [PATCH 0/9 v3] New way to track mce notifier chain actions Borislav Petkov
2020-04-07 16:34 ` [PATCH 1/9] x86/mce/amd, edac: Remove report_gart_errors Borislav Petkov
2020-04-15 9:49 ` [tip: ras/core] " tip-bot2 for Borislav Petkov
2020-04-07 16:34 ` [PATCH 2/9] x86/mce: Rename "first" function as "early" Borislav Petkov
2020-04-07 16:34 ` [PATCH 3/9] x86/mce: Convert the CEC to use the MCE notifier Borislav Petkov
2020-04-07 16:34 ` [PATCH 4/9] x86/mce: Add a struct mce.kflags field Borislav Petkov
2020-04-07 16:34 ` Borislav Petkov [this message]
2020-04-07 16:34 ` [PATCH 6/9] x86/mce: Change default MCE logger to check mce->kflags Borislav Petkov
2020-04-07 16:34 ` [PATCH 7/9] x86/mce: Add mce=print_all option Borislav Petkov
2020-04-07 16:34 ` [PATCH 8/9] EDAC: Drop the EDAC report status checks Borislav Petkov
2020-04-07 16:34 ` [PATCH 9/9] x86/mce: Fixup exception only for the correct MCEs Borislav Petkov
2020-04-15 9:49 ` [tip: ras/core] " tip-bot2 for Borislav Petkov
2020-04-07 19:53 ` [PATCH 0/9 v3] New way to track mce notifier chain actions Luck, Tony
2020-04-07 19:56 ` Borislav Petkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200407163414.18058-6-bp@alien8.de \
--to=bp@alien8.de \
--cc=Yazen.Ghannam@amd.com \
--cc=linux-kernel@vger.kernel.org \
--cc=tony.luck@intel.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).