All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tony Luck <tony.luck@intel.com>
To: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>,
	x86@kernel.org, Andy Lutomirski <luto@kernel.org>,
	linux-kernel@vger.kernel.org
Subject: [PATCH v2 4/7] x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
Date: Fri, 14 Feb 2020 14:27:17 -0800	[thread overview]
Message-ID: <20200214222720.13168-5-tony.luck@intel.com> (raw)
In-Reply-To: <20200214222720.13168-1-tony.luck@intel.com>

If the handler took any action to log or deal with the error, set
a bit int mce->kflags so that the default handler on the end of
the machine check chain can see what has been done.

Get rid of NOTIFY_STOP (well almost ... mce_amd.c is currently using
it to filter out some GART TLB errors ... need to deal with that
later).

Make the EDAC and dev-mcelog handlers skip over errors already
processed by CEC.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mce/core.c       | 4 +++-
 arch/x86/kernel/cpu/mce/dev-mcelog.c | 5 +++++
 drivers/acpi/acpi_extlog.c           | 5 +++--
 drivers/acpi/nfit/mce.c              | 1 +
 drivers/edac/i7core_edac.c           | 5 +++--
 drivers/edac/mce_amd.c               | 9 +++++++--
 drivers/edac/pnd2_edac.c             | 5 +++--
 drivers/edac/sb_edac.c               | 5 ++++-
 drivers/edac/skx_common.c            | 4 ++++
 drivers/ras/cec.c                    | 9 ++++++---
 10 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 06240cbe6f3e..d3d11d1e52b3 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -579,8 +579,10 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
 		return NOTIFY_DONE;
 
 	pfn = mce->addr >> PAGE_SHIFT;
-	if (!memory_failure(pfn, 0))
+	if (!memory_failure(pfn, 0)) {
 		set_mce_nospec(pfn);
+		mce->kflags |= MCE_HANDLED_UC;
+	}
 
 	return NOTIFY_OK;
 }
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 7c8958dee103..f1bf7535ead7 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -43,6 +43,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 	struct mce *mce = (struct mce *)data;
 	unsigned int entry;
 
+	if (mce->kflags & MCE_HANDLED_CEC)
+		return NOTIFY_DONE;
+
 	mutex_lock(&mce_chrdev_read_mutex);
 
 	entry = mcelog.next;
@@ -60,6 +63,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 
 	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
 	mcelog.entry[entry].finished = 1;
+	mcelog.entry[entry].kflags = 0;
 
 	/* wake processes polling /dev/mcelog */
 	wake_up_interruptible(&mce_chrdev_wait);
@@ -67,6 +71,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 unlock:
 	mutex_unlock(&mce_chrdev_read_mutex);
 
+	mce->kflags |= MCE_HANDLED_MCELOG;
 	return NOTIFY_OK;
 }
 
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 8596a106a933..9cc3c1f92db5 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -146,7 +146,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	static u32 err_seq;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
-	if (estatus == NULL)
+	if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
 		return NOTIFY_DONE;
 
 	memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
@@ -176,7 +176,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	}
 
 out:
-	return NOTIFY_STOP;
+	mce->kflags |= MCE_HANDLED_EXTLOG;
+	return NOTIFY_OK;
 }
 
 static bool __init extlog_get_l1addr(void)
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index f0ae48515b48..ee8d9973f60b 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -76,6 +76,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 			 */
 			acpi_nfit_ars_rescan(acpi_desc, 0);
 		}
+		mce->kflags |= MCE_HANDLED_NFIT;
 		break;
 	}
 
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index b3135b208f9a..5860ca41185c 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1815,7 +1815,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 	struct mem_ctl_info *mci;
 
 	i7_dev = get_i7core_dev(mce->socketid);
-	if (!i7_dev)
+	if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC))
 		return NOTIFY_DONE;
 
 	mci = i7_dev->mci;
@@ -1834,7 +1834,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 	i7core_check_error(mci, mce);
 
 	/* Advise mcelog that the errors were handled */
-	return NOTIFY_STOP;
+	mce->kflags |= MCE_HANDLED_EDAC;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block i7_mce_dec = {
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ea980c556f2e..e31e4db64e1b 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1067,8 +1067,12 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 	unsigned int fam = x86_family(m->cpuid);
 	int ecc;
 
-	if (ignore_mce(m))
+	if (ignore_mce(m)) {
+		m->kflags |= MCE_HANDLED_EDAC;
 		return NOTIFY_STOP;
+	}
+	if (m->kflags & MCE_HANDLED_CEC)
+		return NOTIFY_DONE;
 
 	pr_emerg(HW_ERR "%s\n", decode_error_status(m));
 
@@ -1170,7 +1174,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
  err_code:
 	amd_decode_err_code(m->status & 0xffff);
 
-	return NOTIFY_STOP;
+	m->kflags |= MCE_HANDLED_EDAC;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block amd_mce_dec_nb = {
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 933f7722b893..77ad315c7e8d 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1400,7 +1400,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
 		return NOTIFY_DONE;
 
 	mci = pnd2_mci;
-	if (!mci)
+	if (!mci || (mce->kflags & MCE_HANDLED_CEC))
 		return NOTIFY_DONE;
 
 	/*
@@ -1429,7 +1429,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
 	pnd2_mce_output_error(mci, mce, &daddr);
 
 	/* Advice mcelog that the error were handled */
-	return NOTIFY_STOP;
+	mce->kflags |= MCE_HANDLED_EDAC;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block pnd2_mce_dec = {
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 4957e8ee1879..6e17f601ea63 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3136,6 +3136,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 
 	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 		return NOTIFY_DONE;
+	if (mce->kflags & MCE_HANDLED_CEC)
+		return NOTIFY_DONE;
 
 	/*
 	 * Just let mcelog handle it if the error is
@@ -3183,7 +3185,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 	sbridge_mce_output_error(mci, mce);
 
 	/* Advice mcelog that the error were handled */
-	return NOTIFY_STOP;
+	mce->kflags |= MCE_HANDLED_EDAC;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block sbridge_mce_dec = {
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 99bbaf629b8d..6f08a12f6b11 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -577,6 +577,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 		return NOTIFY_DONE;
 
+	if (mce->kflags & MCE_HANDLED_CEC)
+		return NOTIFY_DONE;
+
 	/* ignore unless this is memory related with an address */
 	if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
 		return NOTIFY_DONE;
@@ -616,6 +619,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 
 	skx_mce_output_error(mci, mce, &res);
 
+	mce->kflags |= MCE_HANDLED_EDAC;
 	return NOTIFY_DONE;
 }
 
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index d7f6718cbf8d..e061962d3c58 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -538,9 +538,12 @@ static int cec_notifier(struct notifier_block *nb, unsigned long val,
 	/* We eat only correctable DRAM errors with usable addresses. */
 	if (mce_is_memory_error(m) &&
 	    mce_is_correctable(m)  &&
-	    mce_usable_address(m))
-		if (!cec_add_elem(m->addr >> PAGE_SHIFT))
-			return NOTIFY_STOP;
+	    mce_usable_address(m)) {
+		if (!cec_add_elem(m->addr >> PAGE_SHIFT)) {
+			m->kflags |= MCE_HANDLED_CEC;
+			return NOTIFY_OK;
+		}
+	}
 
 	return NOTIFY_DONE;
 }
-- 
2.21.1


  parent reply	other threads:[~2020-02-14 22:28 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-12 20:46 [RFC PATCH 0/5] New way to track mce notifier chain actions Tony Luck
2020-02-12 20:46 ` [PATCH 1/5] x86/mce: Rename "first" function as "early" Tony Luck
2020-02-12 20:46 ` [PATCH 2/5] x86/mce: Convert corrected error collector to use mce notifier Tony Luck
2020-02-12 20:46 ` [PATCH 3/5] x86/mce: Add new "handled" field to "struct mce" Tony Luck
2020-02-13 16:56   ` Borislav Petkov
2020-02-13 22:09     ` Luck, Tony
2020-02-14  8:50       ` Borislav Petkov
2020-02-12 20:46 ` [PATCH 4/5] x86/mce: Fix all mce notifiers to update the mce->handled bitmask Tony Luck
2020-02-13 17:03   ` Borislav Petkov
2020-02-13 22:19     ` Luck, Tony
2020-02-13 22:27       ` Andy Lutomirski
2020-02-13 23:08         ` Luck, Tony
2020-02-14  9:02           ` Borislav Petkov
2020-02-14  0:18         ` Thomas Gleixner
2020-02-14  8:59       ` Borislav Petkov
2020-02-12 20:46 ` [PATCH 5/5] x86/mce: Change default mce logger to check mce->handled Tony Luck
2020-02-13 17:08   ` Borislav Petkov
2020-02-13 22:27     ` Luck, Tony
2020-02-14  9:05       ` Borislav Petkov
2020-02-12 23:08 ` [RFC PATCH 0/5] New way to track mce notifier chain actions Luck, Tony
2020-02-13  5:52   ` Andy Lutomirski
2020-02-13  6:09     ` Borislav Petkov
2020-02-13 16:05       ` Andy Lutomirski
2020-02-14 22:27 ` [PATCH v2 0/7] " Tony Luck
2020-02-14 22:27   ` [PATCH v2 1/7] x86/mce: Rename "first" function as "early" Tony Luck
2020-04-15  9:49     ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-02-14 22:27   ` [PATCH v2 2/7] x86/mce: Convert corrected error collector to use mce notifier Tony Luck
2020-04-15  9:49     ` [tip: ras/core] x86/mce: Convert the CEC to use the MCE notifier tip-bot2 for Tony Luck
2020-02-14 22:27   ` [PATCH v2 3/7] x86/mce: Add new "kflags" field to "struct mce" Tony Luck
2020-04-15  9:49     ` [tip: ras/core] x86/mce: Add a struct mce.kflags field tip-bot2 for Tony Luck
2020-04-15 18:19       ` Luck, Tony
2020-04-15 18:36         ` Borislav Petkov
2020-04-15 19:58           ` [PATCH] x86/mce: Drop bogus comment about mce.kflags Luck, Tony
2020-04-17  9:21             ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-04-20  8:06       ` [tip: ras/core] x86/mce: Add a struct mce.kflags field Christoph Hellwig
2020-04-20  8:42         ` Borislav Petkov
2020-02-14 22:27   ` Tony Luck [this message]
2020-04-07  8:21     ` [PATCH v2 4/7] x86/mce: Fix all mce notifiers to update the mce->kflags bitmask Borislav Petkov
2020-04-15  9:49     ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-02-14 22:27   ` [PATCH v2 5/7] x86/mce: Change default mce logger to check mce->kflags Tony Luck
2020-04-07 11:10     ` Borislav Petkov
2020-04-07 16:43       ` Luck, Tony
2020-04-07 19:37         ` Borislav Petkov
2020-04-07 19:44           ` Luck, Tony
2020-04-15  9:49     ` [tip: ras/core] x86/mce: Change default MCE " tip-bot2 for Tony Luck
2020-02-14 22:27   ` [PATCH v2 6/7] x86/mce: Add mce=print_all option Tony Luck
2020-04-15  9:49     ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-02-14 22:27   ` [PATCH v2 7/7] x86/mce: Drop the EDAC report status checks Tony Luck
2020-04-15  9:49     ` [tip: ras/core] EDAC: " tip-bot2 for Tony Luck
2020-04-07 16:34 ` [PATCH 0/9 v3] New way to track mce notifier chain actions Borislav Petkov
2020-04-07 16:34   ` [PATCH 1/9] x86/mce/amd, edac: Remove report_gart_errors Borislav Petkov
2020-04-15  9:49     ` [tip: ras/core] " tip-bot2 for Borislav Petkov
2020-04-07 16:34   ` [PATCH 2/9] x86/mce: Rename "first" function as "early" Borislav Petkov
2020-04-07 16:34   ` [PATCH 3/9] x86/mce: Convert the CEC to use the MCE notifier Borislav Petkov
2020-04-07 16:34   ` [PATCH 4/9] x86/mce: Add a struct mce.kflags field Borislav Petkov
2020-04-07 16:34   ` [PATCH 5/9] x86/mce: Fix all mce notifiers to update the mce->kflags bitmask Borislav Petkov
2020-04-07 16:34   ` [PATCH 6/9] x86/mce: Change default MCE logger to check mce->kflags Borislav Petkov
2020-04-07 16:34   ` [PATCH 7/9] x86/mce: Add mce=print_all option Borislav Petkov
2020-04-07 16:34   ` [PATCH 8/9] EDAC: Drop the EDAC report status checks Borislav Petkov
2020-04-07 16:34   ` [PATCH 9/9] x86/mce: Fixup exception only for the correct MCEs Borislav Petkov
2020-04-15  9:49     ` [tip: ras/core] " tip-bot2 for Borislav Petkov
2020-04-07 19:53   ` [PATCH 0/9 v3] New way to track mce notifier chain actions Luck, Tony
2020-04-07 19:56     ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200214222720.13168-5-tony.luck@intel.com \
    --to=tony.luck@intel.com \
    --cc=bp@alien8.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.