linux-edac.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yazen Ghannam <yazen.ghannam@amd.com>
To: <linux-edac@vger.kernel.org>
Cc: <linux-kernel@vger.kernel.org>, <tony.luck@intel.com>,
	<x86@kernel.org>, <Avadhut.Naik@amd.com>, <John.Allen@amd.com>,
	Yazen Ghannam <yazen.ghannam@amd.com>
Subject: [PATCH v2 09/16] x86/mce: Unify AMD THR handler with MCA Polling
Date: Thu, 4 Apr 2024 10:13:52 -0500	[thread overview]
Message-ID: <20240404151359.47970-10-yazen.ghannam@amd.com> (raw)
In-Reply-To: <20240404151359.47970-1-yazen.ghannam@amd.com>

AMD systems optionally support an MCA Thresholding interrupt. The
interrupt should be used as another signal to trigger MCA polling. This
is similar to how the Intel Corrected Machine Check interrupt (CMCI) is
handled.

AMD MCA Thresholding is managed using the MCA_MISC registers within an
MCA bank. The OS will need to modify the hardware error count field in
order to reset the threshold limit and rearm the interrupt. Management
of the MCA_MISC register should be done as a follow up to the basic MCA
polling flow. It should not be the main focus of the interrupt handler.

Furthermore, future systems will have the ability to send an MCA
Thresholding interrupt to the OS even when the OS does not manage the
feature, i.e. MCA_MISC registers are Read-as-Zero/Locked.

Call the common MCA polling function when handling the MCA Thresholding
interrupt. This will allow the OS to find any valid errors whether or
not the MCA Thresholding feature is OS-managed. Also, this allows the
common MCA polling options and kernel parameters to apply to AMD
systems.

Add a callback to the MCA polling function to handle vendor-specific
operations. Start by handling the AMD MCA Thresholding "block reset"
flow.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---

Notes:
    Link:
    https://lkml.kernel.org/r/20231118193248.1296798-14-yazen.ghannam@amd.com
    
    v1->v2:
    * No change.

 arch/x86/kernel/cpu/mce/amd.c      | 57 ++++++++++++++----------------
 arch/x86/kernel/cpu/mce/core.c     |  8 +++++
 arch/x86/kernel/cpu/mce/internal.h |  2 ++
 3 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index f59f4a1c9b21..75195d6fe971 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -979,12 +979,7 @@ static void amd_deferred_error_interrupt(void)
 		log_error_deferred(bank);
 }
 
-static void log_error_thresholding(unsigned int bank, u64 misc)
-{
-	_log_error_deferred(bank, misc);
-}
-
-static void log_and_reset_block(struct threshold_block *block)
+static void reset_block(struct threshold_block *block)
 {
 	struct thresh_restart tr;
 	u32 low = 0, high = 0;
@@ -998,49 +993,51 @@ static void log_and_reset_block(struct threshold_block *block)
 	if (!(high & MASK_OVERFLOW_HI))
 		return;
 
-	/* Log the MCE which caused the threshold event. */
-	log_error_thresholding(block->bank, ((u64)high << 32) | low);
-
 	/* Reset threshold block after logging error. */
 	memset(&tr, 0, sizeof(tr));
 	tr.b = block;
 	threshold_restart_bank(&tr);
 }
 
-/*
- * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
- * goes off when error_count reaches threshold_limit.
- */
-static void amd_threshold_interrupt(void)
+static void reset_thr_blocks(unsigned int bank)
 {
 	struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
 	struct threshold_bank **bp = this_cpu_read(threshold_banks);
-	unsigned int bank, cpu = smp_processor_id();
 
 	/*
 	 * Validate that the threshold bank has been initialized already. The
 	 * handler is installed at boot time, but on a hotplug event the
 	 * interrupt might fire before the data has been initialized.
 	 */
-	if (!bp)
+	if (!bp || !bp[bank])
 		return;
 
-	for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
-		if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank)))
-			continue;
+	first_block = bp[bank]->blocks;
+	if (!first_block)
+		return;
 
-		first_block = bp[bank]->blocks;
-		if (!first_block)
-			continue;
+	/*
+	 * The first block is also the head of the list. Check it first
+	 * before iterating over the rest.
+	 */
+	reset_block(first_block);
+	list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
+		reset_block(block);
+}
 
-		/*
-		 * The first block is also the head of the list. Check it first
-		 * before iterating over the rest.
-		 */
-		log_and_reset_block(first_block);
-		list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
-			log_and_reset_block(block);
-	}
+/*
+ * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
+ * goes off when error_count reaches threshold_limit.
+ */
+static void amd_threshold_interrupt(void)
+{
+	/* Check all banks for now. This could be optimized in the future. */
+	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
+}
+
+void amd_handle_error(struct mce *m)
+{
+	reset_thr_blocks(m->bank);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7a857b33f515..75297e7eb980 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -672,6 +672,12 @@ static noinstr void mce_read_aux(struct mce *m, int i)
 	}
 }
 
+static void vendor_handle_error(struct mce *m)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return amd_handle_error(m);
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -787,6 +793,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 			mce_log(&m);
 
 clear_it:
+		vendor_handle_error(&m);
+
 		/*
 		 * Clear state for this bank.
 		 */
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index e86e53695828..96b108175ca2 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -267,6 +267,7 @@ void mce_setup_for_cpu(unsigned int cpu, struct mce *m);
 #ifdef CONFIG_X86_MCE_AMD
 extern bool amd_filter_mce(struct mce *m);
 bool amd_mce_usable_address(struct mce *m);
+void amd_handle_error(struct mce *m);
 
 /*
  * If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
@@ -295,6 +296,7 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
 #else
 static inline bool amd_filter_mce(struct mce *m) { return false; }
 static inline bool amd_mce_usable_address(struct mce *m) { return false; }
+static inline void amd_handle_error(struct mce *m) { }
 static inline void smca_extract_err_addr(struct mce *m) { }
 #endif
 
-- 
2.34.1


  parent reply	other threads:[~2024-04-04 15:14 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-04 15:13 [PATCH v2 00/16] MCA Updates Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 01/16] x86/mce: Define mce_setup() helpers for common and per-CPU fields Yazen Ghannam
2024-04-16 10:02   ` Borislav Petkov
2024-04-17 13:50     ` Yazen Ghannam
2024-04-22  8:13       ` Borislav Petkov
2024-04-04 15:13 ` [PATCH v2 02/16] x86/mce: Use mce_setup() helpers for apei_smca_report_x86_error() Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 03/16] x86/mce/amd: Use fixed bank number for quirks Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 04/16] x86/mce/amd: Look up bank type by IPID Yazen Ghannam
2024-04-23 17:06   ` Borislav Petkov
2024-04-23 19:16     ` Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 05/16] x86/mce/amd: Clean up SMCA configuration Yazen Ghannam
2024-04-23 19:06   ` Borislav Petkov
2024-04-23 19:32     ` Yazen Ghannam
2024-04-24  2:29       ` Borislav Petkov
2024-04-24 13:44         ` Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 06/16] x86/mce/amd: Prep DFR handler before enabling banks Yazen Ghannam
2024-04-24 18:34   ` Borislav Petkov
2024-04-25 13:31     ` Yazen Ghannam
2024-04-29 12:38       ` Borislav Petkov
2024-04-29 13:22         ` Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 07/16] x86/mce/amd: Simplify DFR handler setup Yazen Ghannam
2024-04-24 19:06   ` Borislav Petkov
2024-04-25 14:12     ` Yazen Ghannam
2024-04-29 12:59       ` Borislav Petkov
2024-04-29 13:56         ` Yazen Ghannam
2024-04-29 14:12           ` Borislav Petkov
2024-04-29 14:25             ` Yazen Ghannam
2024-04-30 13:47               ` Borislav Petkov
2024-04-29 18:34       ` Robert Richter
2024-04-30 18:06         ` Borislav Petkov
2024-05-02 16:02           ` Yazen Ghannam
2024-05-02 18:48             ` Robert Richter
2024-05-04 14:37               ` Borislav Petkov
2024-04-04 15:13 ` [PATCH v2 08/16] x86/mce/amd: Clean up enable_deferred_error_interrupt() Yazen Ghannam
2024-04-29 13:12   ` Borislav Petkov
2024-04-29 14:18     ` Yazen Ghannam
2024-05-04 14:41       ` Borislav Petkov
2024-04-04 15:13 ` Yazen Ghannam [this message]
2024-04-29 13:40   ` [PATCH v2 09/16] x86/mce: Unify AMD THR handler with MCA Polling Borislav Petkov
2024-04-29 14:36     ` Yazen Ghannam
2024-05-04 14:52       ` Borislav Petkov
2024-05-07 16:25         ` Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 10/16] x86/mce: Unify AMD DFR " Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 11/16] x86/mce: Skip AMD threshold init if no threshold banks found Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 12/16] x86/mce/amd: Support SMCA Corrected Error Interrupt Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 13/16] x86/mce: Add wrapper for struct mce to export vendor specific info Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 14/16] x86/mce, EDAC/mce_amd: Add support for new MCA_SYND{1,2} registers Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 15/16] x86/mce/apei: Handle variable register array size Yazen Ghannam
2024-04-04 15:13 ` [PATCH v2 16/16] EDAC/mce_amd: Add support for FRU Text in MCA Yazen Ghannam
2024-04-05 16:06   ` Luck, Tony
2024-04-07 13:19     ` Yazen Ghannam
2024-04-08 19:47     ` Naik, Avadhut
2024-04-08 19:57       ` Luck, Tony

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240404151359.47970-10-yazen.ghannam@amd.com \
    --to=yazen.ghannam@amd.com \
    --cc=Avadhut.Naik@amd.com \
    --cc=John.Allen@amd.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).