All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Steven Rostedt <rostedt@goodmis.org>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Borislav Petkov <bp@alien8.de>
Subject: Re: [patch 2/6] x86/idle: Sanitize X86_BUG_AMD_E400 handling
Date: Fri, 01 Mar 2024 13:33:31 +0100	[thread overview]
Message-ID: <877cim6sis.ffs@tglx> (raw)
In-Reply-To: <20240229142248.329708185@linutronix.de>

On Thu, Feb 29 2024 at 15:23, Thomas Gleixner wrote:
> @@ -985,7 +958,9 @@ void __init arch_post_acpi_subsys_init(v
>  
>  	if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
>  		mark_tsc_unstable("TSC halt in AMD C1E");
> -	pr_info("System has AMD C1E enabled\n");
> +
> +	static_branch_enable(&arch_needs_tick_broadcast);
> +	pr_info("System has AMD C1E erratum E400. Workaround enabled.\n");
>  }

Breaks the 32-bit build with APIC=n :(

---
Subject: x86/idle: Sanitize X86_BUG_AMD_E400 handling
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 28 Feb 2024 23:13:00 +0100

amd_e400_idle(), the idle routine for AMD CPUs which are affected by
erratum 400 violates the RCU constraints by invoking tick_broadcast_enter()
and tick_broadcast_exit() after the core code has marked RCU non-idle.  The
functions can end up in lockdep or tracing, which rightfully triggers a
RCU warning.

The core code provides now a static branch conditional invocation of the
broadcast functions.

Remove amd_e400_idle(), enforce default_idle() and enable the static branch
on affected CPUs to cure this.

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V1b: Fix 32bit built with APIC=n (0-day)
---
 arch/x86/Kconfig          |    1 +
 arch/x86/kernel/process.c |   42 +++++++++---------------------------------
 2 files changed, 10 insertions(+), 33 deletions(-)

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -147,6 +147,7 @@ config X86
 	select EDAC_ATOMIC_SCRUB
 	select EDAC_SUPPORT
 	select GENERIC_CLOCKEVENTS_BROADCAST	if X86_64 || (X86_32 && X86_LOCAL_APIC)
+	select GENERIC_CLOCKEVENTS_BROADCAST_IDLE	if GENERIC_CLOCKEVENTS_BROADCAST
 	select GENERIC_CLOCKEVENTS_MIN_ADJUST
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_CPU_AUTOPROBE
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -846,31 +846,6 @@ void __noreturn stop_this_cpu(void *dumm
 }
 
 /*
- * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
- * states (local apic timer and TSC stop).
- *
- * XXX this function is completely buggered vs RCU and tracing.
- */
-static void amd_e400_idle(void)
-{
-	/*
-	 * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E
-	 * gets set after static_cpu_has() places have been converted via
-	 * alternatives.
-	 */
-	if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
-		default_idle();
-		return;
-	}
-
-	tick_broadcast_enter();
-
-	default_idle();
-
-	tick_broadcast_exit();
-}
-
-/*
  * Prefer MWAIT over HALT if MWAIT is supported, MWAIT_CPUID leaf
  * exists and whenever MONITOR/MWAIT extensions are present there is at
  * least one C1 substate.
@@ -890,8 +865,8 @@ static int prefer_mwait_c1_over_halt(con
 	if (!cpu_has(c, X86_FEATURE_MWAIT))
 		return 0;
 
-	/* Monitor has a bug. Fallback to HALT */
-	if (boot_cpu_has_bug(X86_BUG_MONITOR))
+	/* Monitor has a bug or APIC stops in C1E. Fallback to HALT */
+	if (boot_cpu_has_bug(X86_BUG_MONITOR) || boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E))
 		return 0;
 
 	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
@@ -942,17 +917,15 @@ void select_idle_routine(const struct cp
 	if (x86_idle_set() || boot_option_idle_override == IDLE_POLL)
 		return;
 
-	if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
-		pr_info("using AMD E400 aware idle routine\n");
-		static_call_update(x86_idle, amd_e400_idle);
-	} else if (prefer_mwait_c1_over_halt(c)) {
+	if (prefer_mwait_c1_over_halt(c)) {
 		pr_info("using mwait in idle threads\n");
 		static_call_update(x86_idle, mwait_idle);
 	} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
 		pr_info("using TDX aware idle routine\n");
 		static_call_update(x86_idle, tdx_safe_halt);
-	} else
+	} else {
 		static_call_update(x86_idle, default_idle);
+	}
 }
 
 void amd_e400_c1e_apic_setup(void)
@@ -985,7 +958,10 @@ void __init arch_post_acpi_subsys_init(v
 
 	if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 		mark_tsc_unstable("TSC halt in AMD C1E");
-	pr_info("System has AMD C1E enabled\n");
+
+	if (IS_ENABLED(GENERIC_CLOCKEVENTS_BROADCAST_IDLE))
+		static_branch_enable(&arch_needs_tick_broadcast);
+	pr_info("System has AMD C1E erratum E400. Workaround enabled.\n");
 }
 
 static int __init idle_setup(char *str)

  reply	other threads:[~2024-03-01 12:33 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-29 14:23 [patch 0/6] x86/idle: Cure RCU violations and cleanups Thomas Gleixner
2024-02-29 14:23 ` [patch 1/6] sched/idle: Conditionally handle tick broadcast in default_idle_call() Thomas Gleixner
2024-03-01 20:48   ` [tip: x86/core] " tip-bot2 for Thomas Gleixner
2024-02-29 14:23 ` [patch 2/6] x86/idle: Sanitize X86_BUG_AMD_E400 handling Thomas Gleixner
2024-03-01 12:33   ` Thomas Gleixner [this message]
2024-03-01 20:48     ` [tip: x86/core] " tip-bot2 for Thomas Gleixner
2024-03-04 16:51     ` tip-bot2 for Thomas Gleixner
2024-02-29 14:23 ` [patch 3/6] x86/idle: Clean up idle selection Thomas Gleixner
2024-03-01 20:48   ` [tip: x86/core] " tip-bot2 for Thomas Gleixner
2024-03-04 16:51   ` tip-bot2 for Thomas Gleixner
2024-02-29 14:23 ` [patch 4/6] x86/idle: Cleanup idle_setup() Thomas Gleixner
2024-03-01 20:48   ` [tip: x86/core] " tip-bot2 for Thomas Gleixner
2024-03-04 16:51   ` tip-bot2 for Thomas Gleixner
2024-02-29 14:23 ` [patch 5/6] x86/idle: Let prefer_mwait_c1_over_halt() return bool Thomas Gleixner
2024-03-01 20:48   ` [tip: x86/core] " tip-bot2 for Thomas Gleixner
2024-03-04 16:51   ` tip-bot2 for Thomas Gleixner
2024-02-29 14:23 ` [patch 6/6] x86/idle: Select idle routine only once Thomas Gleixner
2024-03-01  8:14   ` Thomas Gleixner
2024-03-01 11:33     ` Thomas Gleixner
2024-03-01 20:48       ` [tip: x86/core] " tip-bot2 for Thomas Gleixner
2024-03-04 16:51       ` tip-bot2 for Thomas Gleixner
2024-03-01 18:41 ` [patch 0/6] x86/idle: Cure RCU violations and cleanups Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=877cim6sis.ffs@tglx \
    --to=tglx@linutronix.de \
    --cc=bp@alien8.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.