linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] More RCU vs idle fixes
@ 2020-11-20 11:41 Peter Zijlstra
  2020-11-20 11:41 ` [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing Peter Zijlstra
                   ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Peter Zijlstra @ 2020-11-20 11:41 UTC (permalink / raw)
  To: rafael, viresh.kumar, mingo
  Cc: x86, mark.rutland, will, svens, linux-kernel, peterz


Both arm64 and s390 are tripping over arch_cpu_idle() RCU,tracing,lockdep
interaction. While looking at that I also found fail in inte_idle.

Please consider for this cycle.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-11-20 11:41 [PATCH 0/2] More RCU vs idle fixes Peter Zijlstra
@ 2020-11-20 11:41 ` Peter Zijlstra
  2020-11-20 12:39   ` Mark Rutland
                     ` (2 more replies)
  2020-11-20 11:41 ` [PATCH 2/2] intel_idle: Fix intel_idle() " Peter Zijlstra
  2020-11-24 13:47 ` [PATCH 0/2] More RCU vs idle fixes Sven Schnelle
  2 siblings, 3 replies; 22+ messages in thread
From: Peter Zijlstra @ 2020-11-20 11:41 UTC (permalink / raw)
  To: rafael, viresh.kumar, mingo
  Cc: x86, mark.rutland, will, svens, linux-kernel, peterz

We call arch_cpu_idle() with RCU disabled, but then use
local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.

Switch all arch_cpu_idle() implementations to use
raw_local_irq_{en,dis}able() and carefully manage the
lockdep,rcu,tracing state like we do in entry.

(XXX: we really should change arch_cpu_idle() to not return with
interrupts enabled)

Reported-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/alpha/kernel/process.c      |    2 +-
 arch/arm/kernel/process.c        |    2 +-
 arch/arm64/kernel/process.c      |    2 +-
 arch/csky/kernel/process.c       |    2 +-
 arch/h8300/kernel/process.c      |    2 +-
 arch/hexagon/kernel/process.c    |    2 +-
 arch/ia64/kernel/process.c       |    2 +-
 arch/microblaze/kernel/process.c |    2 +-
 arch/mips/kernel/idle.c          |   12 ++++++------
 arch/nios2/kernel/process.c      |    2 +-
 arch/openrisc/kernel/process.c   |    2 +-
 arch/parisc/kernel/process.c     |    2 +-
 arch/powerpc/kernel/idle.c       |    4 ++--
 arch/riscv/kernel/process.c      |    2 +-
 arch/s390/kernel/idle.c          |    2 +-
 arch/sh/kernel/idle.c            |    2 +-
 arch/sparc/kernel/leon_pmc.c     |    4 ++--
 arch/sparc/kernel/process_32.c   |    2 +-
 arch/sparc/kernel/process_64.c   |    4 ++--
 arch/um/kernel/process.c         |    2 +-
 arch/x86/include/asm/mwait.h     |    2 --
 arch/x86/kernel/process.c        |   12 +++++++-----
 kernel/sched/idle.c              |   28 +++++++++++++++++++++++++++-
 23 files changed, 62 insertions(+), 36 deletions(-)

--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off);
 void arch_cpu_idle(void)
 {
 	wtint(0);
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_dead(void)
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -71,7 +71,7 @@ void arch_cpu_idle(void)
 		arm_pm_idle();
 	else
 		cpu_do_idle();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_prepare(void)
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -126,7 +126,7 @@ void arch_cpu_idle(void)
 	 * tricks
 	 */
 	cpu_do_idle();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -102,6 +102,6 @@ void arch_cpu_idle(void)
 #ifdef CONFIG_CPU_PM_STOP
 	asm volatile("stop\n");
 #endif
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 #endif
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(v
  */
 void arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 	__asm__("sleep");
 }
 
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -44,7 +44,7 @@ void arch_cpu_idle(void)
 {
 	__vmwait();
 	/*  interrupts wake us up, but irqs are still disabled */
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -239,7 +239,7 @@ void arch_cpu_idle(void)
 	if (mark_idle)
 		(*mark_idle)(1);
 
-	safe_halt();
+	raw_safe_halt();
 
 	if (mark_idle)
 		(*mark_idle)(0);
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_f
 
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
 }
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void)
 {
 	unsigned long cfg = read_c0_conf();
 	write_c0_conf(cfg | R30XX_CONF_HALT);
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 static void __cpuidle r39xx_wait(void)
 {
 	if (!need_resched())
 		write_c0_conf(read_c0_conf() | TX39_CONF_HALT);
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void __cpuidle r4k_wait(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 	__r4k_wait();
 }
 
@@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void)
 		"	.set	arch=r4000	\n"
 		"	wait			\n"
 		"	.set	pop		\n");
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
@@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(v
 		"	wait						\n"
 		"	mtc0	$1, $12		# stalls until W stage	\n"
 		"	.set	pop					\n");
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
@@ -257,7 +257,7 @@ void arch_cpu_idle(void)
 	if (cpu_wait)
 		cpu_wait();
 	else
-		local_irq_enable();
+		raw_local_irq_enable();
 }
 
 #ifdef CONFIG_CPU_IDLE
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 void arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -79,7 +79,7 @@ void machine_power_off(void)
  */
 void arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 	if (mfspr(SPR_UPR) & SPR_UPR_PMP)
 		mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
 }
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void)
 
 void __cpuidle arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 
 	/* nop on real hardware, qemu will idle sleep. */
 	asm volatile("or %%r10,%%r10,%%r10\n":::);
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -52,9 +52,9 @@ void arch_cpu_idle(void)
 		 * interrupts enabled, some don't.
 		 */
 		if (irqs_disabled())
-			local_irq_enable();
+			raw_local_irq_enable();
 	} else {
-		local_irq_enable();
+		raw_local_irq_enable();
 		/*
 		 * Go into low thread priority and possibly
 		 * low power mode.
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_t
 void arch_cpu_idle(void)
 {
 	wait_for_interrupt();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void show_regs(struct pt_regs *regs)
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void)
 void arch_cpu_idle(void)
 {
 	enabled_wait();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_exit(void)
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -22,7 +22,7 @@ static void (*sh_idle)(void);
 void default_idle(void)
 {
 	set_bl_bit();
-	local_irq_enable();
+	raw_local_irq_enable();
 	/* Isn't this racy ? */
 	cpu_sleep();
 	clear_bl_bit();
--- a/arch/sparc/kernel/leon_pmc.c
+++ b/arch/sparc/kernel/leon_pmc.c
@@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void)
 	register unsigned int address = (unsigned int)leon3_irqctrl_regs;
 
 	/* Interrupts need to be enabled to not hang the CPU */
-	local_irq_enable();
+	raw_local_irq_enable();
 
 	__asm__ __volatile__ (
 		"wr	%%g0, %%asr19\n"
@@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void)
 static void pmc_leon_idle(void)
 {
 	/* Interrupts need to be enabled to not hang the CPU */
-	local_irq_enable();
+	raw_local_irq_enable();
 
 	/* For systems without power-down, this will be no-op */
 	__asm__ __volatile__ ("wr	%g0, %asr19\n\t");
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -74,7 +74,7 @@ void arch_cpu_idle(void)
 {
 	if (sparc_idle)
 		(*sparc_idle)();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -62,11 +62,11 @@ void arch_cpu_idle(void)
 {
 	if (tlb_type != hypervisor) {
 		touch_nmi_watchdog();
-		local_irq_enable();
+		raw_local_irq_enable();
 	} else {
 		unsigned long pstate;
 
-		local_irq_enable();
+		raw_local_irq_enable();
 
                 /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
                  * the cpu sleep hypervisor call.
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -217,7 +217,7 @@ void arch_cpu_idle(void)
 {
 	cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
 	um_idle_sleep();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 int __cant_sleep(void) {
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned lon
 
 static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 {
-	trace_hardirqs_on();
-
 	mds_idle_clear_cpu_buffers();
 	/* "mwait %eax, %ecx;" */
 	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -685,7 +685,7 @@ void arch_cpu_idle(void)
  */
 void __cpuidle default_idle(void)
 {
-	safe_halt();
+	raw_safe_halt();
 }
 #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
 EXPORT_SYMBOL(default_idle);
@@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy)
 /*
  * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
  * states (local apic timer and TSC stop).
+ *
+ * XXX this function is completely buggered vs RCU and tracing.
  */
 static void amd_e400_idle(void)
 {
@@ -757,9 +759,9 @@ static void amd_e400_idle(void)
 	 * The switch back from broadcast mode needs to be called with
 	 * interrupts disabled.
 	 */
-	local_irq_disable();
+	raw_local_irq_disable();
 	tick_broadcast_exit();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
@@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void)
 		if (!need_resched())
 			__sti_mwait(0, 0);
 		else
-			local_irq_enable();
+			raw_local_irq_enable();
 	} else {
-		local_irq_enable();
+		raw_local_irq_enable();
 	}
 	__current_clr_polling();
 }
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { }
 void __weak arch_cpu_idle(void)
 {
 	cpu_idle_force_poll = 1;
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /**
@@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void)
 
 		trace_cpu_idle(1, smp_processor_id());
 		stop_critical_timings();
+
+		/*
+		 * arch_cpu_idle() is supposed to enable IRQs, however
+		 * we can't do that because of RCU and tracing.
+		 *
+		 * Trace IRQs enable here, then switch off RCU, and have
+		 * arch_cpu_idle() use raw_local_irq_enable(). Note that
+		 * rcu_idle_enter() relies on lockdep IRQ state, so switch that
+		 * last -- this is very similar to the entry code.
+		 */
+		trace_hardirqs_on_prepare();
+		lockdep_hardirqs_on_prepare(_THIS_IP_);
 		rcu_idle_enter();
+		lockdep_hardirqs_on(_THIS_IP_);
+
 		arch_cpu_idle();
+
+		/*
+		 * OK, so IRQs are enabled here, but RCU needs them disabled to
+		 * turn itself back on.. funny thing is that disabling IRQs
+		 * will cause tracing, which needs RCU. Jump through hoops to
+		 * make it 'work'.
+		 */
+		raw_local_irq_disable();
+		lockdep_hardirqs_off(_THIS_IP_);
 		rcu_idle_exit();
+		lockdep_hardirqs_on(_THIS_IP_);
+		raw_local_irq_enable();
+
 		start_critical_timings();
 		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 	}



^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] intel_idle: Fix intel_idle() vs tracing
  2020-11-20 11:41 [PATCH 0/2] More RCU vs idle fixes Peter Zijlstra
  2020-11-20 11:41 ` [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing Peter Zijlstra
@ 2020-11-20 11:41 ` Peter Zijlstra
  2020-11-23 10:26   ` Rafael J. Wysocki
  2020-11-24 13:47 ` [PATCH 0/2] More RCU vs idle fixes Sven Schnelle
  2 siblings, 1 reply; 22+ messages in thread
From: Peter Zijlstra @ 2020-11-20 11:41 UTC (permalink / raw)
  To: rafael, viresh.kumar, mingo
  Cc: x86, mark.rutland, will, svens, linux-kernel, peterz

cpuidle->enter() callbacks should not call into tracing because RCU
has already been disabled. Instead of doing the broadcast thing
itself, simply advertise to the cpuidle core that those states stop
the timer.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 drivers/idle/intel_idle.c |   37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -126,26 +126,9 @@ static __cpuidle int intel_idle(struct c
 	struct cpuidle_state *state = &drv->states[index];
 	unsigned long eax = flg2MWAIT(state->flags);
 	unsigned long ecx = 1; /* break on interrupt flag */
-	bool tick;
-
-	if (!static_cpu_has(X86_FEATURE_ARAT)) {
-		/*
-		 * Switch over to one-shot tick broadcast if the target C-state
-		 * is deeper than C1.
-		 */
-		if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
-			tick = true;
-			tick_broadcast_enter();
-		} else {
-			tick = false;
-		}
-	}
 
 	mwait_idle_with_hints(eax, ecx);
 
-	if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
-		tick_broadcast_exit();
-
 	return index;
 }
 
@@ -1460,6 +1443,23 @@ static bool __init intel_idle_verify_cst
 	return true;
 }
 
+static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
+{
+	unsigned long eax = flg2MWAIT(state->flags);
+
+	if (boot_cpu_has(X86_FEATURE_ARAT))
+		return false;
+
+	/*
+	 * Switch over to one-shot tick broadcast if the target C-state
+	 * is deeper than C1.
+	 */
+	if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
+		return true;
+
+	return false;
+}
+
 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 {
 	int cstate;
@@ -1507,6 +1507,9 @@ static void __init intel_idle_init_cstat
 		     !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
 			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
 
+		if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
+			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
+
 		drv->state_count++;
 	}
 



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-11-20 11:41 ` [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing Peter Zijlstra
@ 2020-11-20 12:39   ` Mark Rutland
  2020-11-25 13:57   ` [tip: locking/urgent] " tip-bot2 for Peter Zijlstra
  2020-11-30 21:00   ` [PATCH 1/2] " Guenter Roeck
  2 siblings, 0 replies; 22+ messages in thread
From: Mark Rutland @ 2020-11-20 12:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: rafael, viresh.kumar, mingo, x86, will, svens, linux-kernel

On Fri, Nov 20, 2020 at 12:41:46PM +0100, Peter Zijlstra wrote:
> We call arch_cpu_idle() with RCU disabled, but then use
> local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
> 
> Switch all arch_cpu_idle() implementations to use
> raw_local_irq_{en,dis}able() and carefully manage the
> lockdep,rcu,tracing state like we do in entry.
> 
> (XXX: we really should change arch_cpu_idle() to not return with
> interrupts enabled)
> 
> Reported-by: Mark Rutland <mark.rutland@arm.com>
> Reported-by: Sven Schnelle <svens@linux.ibm.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

[...]

>  void arch_cpu_idle_prepare(void)
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -126,7 +126,7 @@ void arch_cpu_idle(void)
>  	 * tricks
>  	 */
>  	cpu_do_idle();
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }

Prior to this patch, I could see the above causing calls to
trace_hardirqs_on() while RCU wasn't watching, with a local patch
hacking RCU_LOCKDEP_WARN(!rcu_is_watching(), ...) into
trace_hardirqs_{on,off}() to make that obvious.

With this patch applied, that splat is gone. We still have a latent
issue on arm64 becuase our IRQ entry path calls trace_hardirqs_on(), and
we can take an interrupt before the idle code calls rcu_idle_exit().
IIUC the right thing to do is something like the generic entry code's
irqentry_{enter,exit}().

I suspect other architectures are in the same bucket w.r.t. that latent
issue, and it'd be nicer for those in that bucket if we could avoid
taking the interrupt while RCU isn't watching, but this is certainly
better than before.

> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { }
>  void __weak arch_cpu_idle(void)
>  {
>  	cpu_idle_force_poll = 1;
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  /**
> @@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void)
>  
>  		trace_cpu_idle(1, smp_processor_id());
>  		stop_critical_timings();
> +
> +		/*
> +		 * arch_cpu_idle() is supposed to enable IRQs, however
> +		 * we can't do that because of RCU and tracing.
> +		 *
> +		 * Trace IRQs enable here, then switch off RCU, and have
> +		 * arch_cpu_idle() use raw_local_irq_enable(). Note that
> +		 * rcu_idle_enter() relies on lockdep IRQ state, so switch that
> +		 * last -- this is very similar to the entry code.
> +		 */
> +		trace_hardirqs_on_prepare();
> +		lockdep_hardirqs_on_prepare(_THIS_IP_);
>  		rcu_idle_enter();
> +		lockdep_hardirqs_on(_THIS_IP_);
> +
>  		arch_cpu_idle();
> +
> +		/*
> +		 * OK, so IRQs are enabled here, but RCU needs them disabled to
> +		 * turn itself back on.. funny thing is that disabling IRQs
> +		 * will cause tracing, which needs RCU. Jump through hoops to
> +		 * make it 'work'.
> +		 */
> +		raw_local_irq_disable();
> +		lockdep_hardirqs_off(_THIS_IP_);
>  		rcu_idle_exit();
> +		lockdep_hardirqs_on(_THIS_IP_);
> +		raw_local_irq_enable();
> +
>  		start_critical_timings();
>  		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());

FWIW looks good to me, so:

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] intel_idle: Fix intel_idle() vs tracing
  2020-11-20 11:41 ` [PATCH 2/2] intel_idle: Fix intel_idle() " Peter Zijlstra
@ 2020-11-23 10:26   ` Rafael J. Wysocki
  2020-11-23 13:46     ` Peter Zijlstra
  0 siblings, 1 reply; 22+ messages in thread
From: Rafael J. Wysocki @ 2020-11-23 10:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Rafael J. Wysocki, Viresh Kumar, Ingo Molnar,
	the arch/x86 maintainers, Mark Rutland, Will Deacon, svens,
	Linux Kernel Mailing List

On Fri, Nov 20, 2020 at 12:50 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> cpuidle->enter() callbacks should not call into tracing because RCU
> has already been disabled. Instead of doing the broadcast thing
> itself, simply advertise to the cpuidle core that those states stop
> the timer.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  drivers/idle/intel_idle.c |   37 ++++++++++++++++++++-----------------
>  1 file changed, 20 insertions(+), 17 deletions(-)
>
> --- a/drivers/idle/intel_idle.c
> +++ b/drivers/idle/intel_idle.c
> @@ -126,26 +126,9 @@ static __cpuidle int intel_idle(struct c
>         struct cpuidle_state *state = &drv->states[index];
>         unsigned long eax = flg2MWAIT(state->flags);
>         unsigned long ecx = 1; /* break on interrupt flag */
> -       bool tick;
> -
> -       if (!static_cpu_has(X86_FEATURE_ARAT)) {
> -               /*
> -                * Switch over to one-shot tick broadcast if the target C-state
> -                * is deeper than C1.
> -                */
> -               if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
> -                       tick = true;
> -                       tick_broadcast_enter();
> -               } else {
> -                       tick = false;
> -               }
> -       }
>
>         mwait_idle_with_hints(eax, ecx);
>
> -       if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
> -               tick_broadcast_exit();
> -
>         return index;
>  }
>
> @@ -1460,6 +1443,23 @@ static bool __init intel_idle_verify_cst
>         return true;
>  }
>
> +static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
> +{
> +       unsigned long eax = flg2MWAIT(state->flags);
> +
> +       if (boot_cpu_has(X86_FEATURE_ARAT))
> +               return false;
> +
> +       /*
> +        * Switch over to one-shot tick broadcast if the target C-state
> +        * is deeper than C1.
> +        */
> +       if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
> +               return true;
> +
> +       return false;
> +}
> +
>  static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
>  {
>         int cstate;
> @@ -1507,6 +1507,9 @@ static void __init intel_idle_init_cstat
>                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
>                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
>
> +               if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
> +                       drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
> +
>                 drv->state_count++;
>         }

This doesn't cover the ACPI case AFAICS.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] intel_idle: Fix intel_idle() vs tracing
  2020-11-23 10:26   ` Rafael J. Wysocki
@ 2020-11-23 13:46     ` Peter Zijlstra
  2020-11-23 13:54       ` Rafael J. Wysocki
  0 siblings, 1 reply; 22+ messages in thread
From: Peter Zijlstra @ 2020-11-23 13:46 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Viresh Kumar, Ingo Molnar, the arch/x86 maintainers,
	Mark Rutland, Will Deacon, svens, Linux Kernel Mailing List

On Mon, Nov 23, 2020 at 11:26:39AM +0100, Rafael J. Wysocki wrote:
> On Fri, Nov 20, 2020 at 12:50 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > cpuidle->enter() callbacks should not call into tracing because RCU
> > has already been disabled. Instead of doing the broadcast thing
> > itself, simply advertise to the cpuidle core that those states stop
> > the timer.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  drivers/idle/intel_idle.c |   37 ++++++++++++++++++++-----------------
> >  1 file changed, 20 insertions(+), 17 deletions(-)
> >
> > --- a/drivers/idle/intel_idle.c
> > +++ b/drivers/idle/intel_idle.c
> > @@ -126,26 +126,9 @@ static __cpuidle int intel_idle(struct c
> >         struct cpuidle_state *state = &drv->states[index];
> >         unsigned long eax = flg2MWAIT(state->flags);
> >         unsigned long ecx = 1; /* break on interrupt flag */
> > -       bool tick;
> > -
> > -       if (!static_cpu_has(X86_FEATURE_ARAT)) {
> > -               /*
> > -                * Switch over to one-shot tick broadcast if the target C-state
> > -                * is deeper than C1.
> > -                */
> > -               if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
> > -                       tick = true;
> > -                       tick_broadcast_enter();
> > -               } else {
> > -                       tick = false;
> > -               }
> > -       }
> >
> >         mwait_idle_with_hints(eax, ecx);
> >
> > -       if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
> > -               tick_broadcast_exit();
> > -
> >         return index;
> >  }
> >
> > @@ -1460,6 +1443,23 @@ static bool __init intel_idle_verify_cst
> >         return true;
> >  }
> >
> > +static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
> > +{
> > +       unsigned long eax = flg2MWAIT(state->flags);
> > +
> > +       if (boot_cpu_has(X86_FEATURE_ARAT))
> > +               return false;
> > +
> > +       /*
> > +        * Switch over to one-shot tick broadcast if the target C-state
> > +        * is deeper than C1.
> > +        */
> > +       if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
> > +               return true;
> > +
> > +       return false;
> > +}
> > +
> >  static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
> >  {
> >         int cstate;
> > @@ -1507,6 +1507,9 @@ static void __init intel_idle_init_cstat
> >                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
> >                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
> >
> > +               if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
> > +                       drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
> > +
> >                 drv->state_count++;
> >         }
> 
> This doesn't cover the ACPI case AFAICS.

aa6b43d57f99 ("ACPI: processor: Use CPUIDLE_FLAG_TIMER_STOP")

did that, no?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] intel_idle: Fix intel_idle() vs tracing
  2020-11-23 13:46     ` Peter Zijlstra
@ 2020-11-23 13:54       ` Rafael J. Wysocki
  2020-11-23 14:35         ` Peter Zijlstra
  0 siblings, 1 reply; 22+ messages in thread
From: Rafael J. Wysocki @ 2020-11-23 13:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Rafael J. Wysocki, Viresh Kumar, Ingo Molnar,
	the arch/x86 maintainers, Mark Rutland, Will Deacon, svens,
	Linux Kernel Mailing List

On Mon, Nov 23, 2020 at 2:46 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Nov 23, 2020 at 11:26:39AM +0100, Rafael J. Wysocki wrote:
> > On Fri, Nov 20, 2020 at 12:50 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > cpuidle->enter() callbacks should not call into tracing because RCU
> > > has already been disabled. Instead of doing the broadcast thing
> > > itself, simply advertise to the cpuidle core that those states stop
> > > the timer.
> > >
> > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > ---
> > >  drivers/idle/intel_idle.c |   37 ++++++++++++++++++++-----------------
> > >  1 file changed, 20 insertions(+), 17 deletions(-)
> > >
> > > --- a/drivers/idle/intel_idle.c
> > > +++ b/drivers/idle/intel_idle.c
> > > @@ -126,26 +126,9 @@ static __cpuidle int intel_idle(struct c
> > >         struct cpuidle_state *state = &drv->states[index];
> > >         unsigned long eax = flg2MWAIT(state->flags);
> > >         unsigned long ecx = 1; /* break on interrupt flag */
> > > -       bool tick;
> > > -
> > > -       if (!static_cpu_has(X86_FEATURE_ARAT)) {
> > > -               /*
> > > -                * Switch over to one-shot tick broadcast if the target C-state
> > > -                * is deeper than C1.
> > > -                */
> > > -               if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
> > > -                       tick = true;
> > > -                       tick_broadcast_enter();
> > > -               } else {
> > > -                       tick = false;
> > > -               }
> > > -       }
> > >
> > >         mwait_idle_with_hints(eax, ecx);
> > >
> > > -       if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
> > > -               tick_broadcast_exit();
> > > -
> > >         return index;
> > >  }
> > >
> > > @@ -1460,6 +1443,23 @@ static bool __init intel_idle_verify_cst
> > >         return true;
> > >  }
> > >
> > > +static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
> > > +{
> > > +       unsigned long eax = flg2MWAIT(state->flags);
> > > +
> > > +       if (boot_cpu_has(X86_FEATURE_ARAT))
> > > +               return false;
> > > +
> > > +       /*
> > > +        * Switch over to one-shot tick broadcast if the target C-state
> > > +        * is deeper than C1.
> > > +        */
> > > +       if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
> > > +               return true;
> > > +
> > > +       return false;
> > > +}
> > > +
> > >  static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
> > >  {
> > >         int cstate;
> > > @@ -1507,6 +1507,9 @@ static void __init intel_idle_init_cstat
> > >                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
> > >                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
> > >
> > > +               if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
> > > +                       drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
> > > +
> > >                 drv->state_count++;
> > >         }
> >
> > This doesn't cover the ACPI case AFAICS.
>
> aa6b43d57f99 ("ACPI: processor: Use CPUIDLE_FLAG_TIMER_STOP")
>
> did that, no?

Nope.

intel_idle_init_cstates_acpi() needs to be updated too as it doesn't
pick up the flags automatically.  It looks like CPUIDLE_FLAG_RCU_IDLE
needs to be copied too.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] intel_idle: Fix intel_idle() vs tracing
  2020-11-23 13:54       ` Rafael J. Wysocki
@ 2020-11-23 14:35         ` Peter Zijlstra
  2020-11-23 14:54           ` Rafael J. Wysocki
  2020-11-25 13:57           ` [tip: locking/urgent] " tip-bot2 for Peter Zijlstra
  0 siblings, 2 replies; 22+ messages in thread
From: Peter Zijlstra @ 2020-11-23 14:35 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Viresh Kumar, Ingo Molnar, the arch/x86 maintainers,
	Mark Rutland, Will Deacon, svens, Linux Kernel Mailing List

On Mon, Nov 23, 2020 at 02:54:47PM +0100, Rafael J. Wysocki wrote:

> intel_idle_init_cstates_acpi() needs to be updated too as it doesn't
> pick up the flags automatically.

Ooh, it has two different state init routines :-/, sorry I missed that.
See below.

> It looks like CPUIDLE_FLAG_RCU_IDLE needs to be copied too.

I might need more clue again; processor_idle() needs this because it
calls into ACPI (acpi_idle_enter_bm()) for that BM crud. I didn't find
anything like that here.


---
Subject: intel_idle: Fix intel_idle() vs tracing
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri Nov 20 11:28:35 CET 2020

cpuidle->enter() callbacks should not call into tracing because RCU
has already been disabled. Instead of doing the broadcast thing
itself, simply advertise to the cpuidle core that those states stop
the timer.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 drivers/idle/intel_idle.c |   40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -126,26 +126,9 @@ static __cpuidle int intel_idle(struct c
 	struct cpuidle_state *state = &drv->states[index];
 	unsigned long eax = flg2MWAIT(state->flags);
 	unsigned long ecx = 1; /* break on interrupt flag */
-	bool tick;
-
-	if (!static_cpu_has(X86_FEATURE_ARAT)) {
-		/*
-		 * Switch over to one-shot tick broadcast if the target C-state
-		 * is deeper than C1.
-		 */
-		if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
-			tick = true;
-			tick_broadcast_enter();
-		} else {
-			tick = false;
-		}
-	}
 
 	mwait_idle_with_hints(eax, ecx);
 
-	if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
-		tick_broadcast_exit();
-
 	return index;
 }
 
@@ -1227,6 +1210,23 @@ static bool __init intel_idle_acpi_cst_e
 	return false;
 }
 
+static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
+{
+	unsigned long eax = flg2MWAIT(state->flags);
+
+	if (boot_cpu_has(X86_FEATURE_ARAT))
+		return false;
+
+	/*
+	 * Switch over to one-shot tick broadcast if the target C-state
+	 * is deeper than C1.
+	 */
+	if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
+		return true;
+
+	return false;
+}
+
 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
 {
 	int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
@@ -1269,6 +1269,9 @@ static void __init intel_idle_init_cstat
 		if (disabled_states_mask & BIT(cstate))
 			state->flags |= CPUIDLE_FLAG_OFF;
 
+		if (intel_idle_state_needs_timer_stop(state))
+			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+
 		state->enter = intel_idle;
 		state->enter_s2idle = intel_idle_s2idle;
 	}
@@ -1507,6 +1510,9 @@ static void __init intel_idle_init_cstat
 		     !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
 			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
 
+		if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
+			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
+
 		drv->state_count++;
 	}
 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] intel_idle: Fix intel_idle() vs tracing
  2020-11-23 14:35         ` Peter Zijlstra
@ 2020-11-23 14:54           ` Rafael J. Wysocki
  2020-11-25 13:57           ` [tip: locking/urgent] " tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 22+ messages in thread
From: Rafael J. Wysocki @ 2020-11-23 14:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Rafael J. Wysocki, Viresh Kumar, Ingo Molnar,
	the arch/x86 maintainers, Mark Rutland, Will Deacon, svens,
	Linux Kernel Mailing List

On Mon, Nov 23, 2020 at 3:35 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Nov 23, 2020 at 02:54:47PM +0100, Rafael J. Wysocki wrote:
>
> > intel_idle_init_cstates_acpi() needs to be updated too as it doesn't
> > pick up the flags automatically.
>
> Ooh, it has two different state init routines :-/, sorry I missed that.
> See below.
>
> > It looks like CPUIDLE_FLAG_RCU_IDLE needs to be copied too.
>
> I might need more clue again; processor_idle() needs this because it
> calls into ACPI (acpi_idle_enter_bm()) for that BM crud. I didn't find
> anything like that here.
>
>
> ---
> Subject: intel_idle: Fix intel_idle() vs tracing
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Fri Nov 20 11:28:35 CET 2020
>
> cpuidle->enter() callbacks should not call into tracing because RCU
> has already been disabled. Instead of doing the broadcast thing
> itself, simply advertise to the cpuidle core that those states stop
> the timer.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

with a very minor nit below.

> ---
>  drivers/idle/intel_idle.c |   40 +++++++++++++++++++++++-----------------
>  1 file changed, 23 insertions(+), 17 deletions(-)
>
> --- a/drivers/idle/intel_idle.c
> +++ b/drivers/idle/intel_idle.c
> @@ -126,26 +126,9 @@ static __cpuidle int intel_idle(struct c
>         struct cpuidle_state *state = &drv->states[index];
>         unsigned long eax = flg2MWAIT(state->flags);
>         unsigned long ecx = 1; /* break on interrupt flag */
> -       bool tick;
> -
> -       if (!static_cpu_has(X86_FEATURE_ARAT)) {
> -               /*
> -                * Switch over to one-shot tick broadcast if the target C-state
> -                * is deeper than C1.
> -                */
> -               if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
> -                       tick = true;
> -                       tick_broadcast_enter();
> -               } else {
> -                       tick = false;
> -               }
> -       }
>
>         mwait_idle_with_hints(eax, ecx);
>
> -       if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
> -               tick_broadcast_exit();
> -
>         return index;
>  }
>
> @@ -1227,6 +1210,23 @@ static bool __init intel_idle_acpi_cst_e
>         return false;
>  }
>
> +static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
> +{
> +       unsigned long eax = flg2MWAIT(state->flags);
> +
> +       if (boot_cpu_has(X86_FEATURE_ARAT))
> +               return false;
> +
> +       /*
> +        * Switch over to one-shot tick broadcast if the target C-state
> +        * is deeper than C1.
> +        */
> +       if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
> +               return true;
> +
> +       return false;

The above can be written as

return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);

> +}
> +
>  static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
>  {
>         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
> @@ -1269,6 +1269,9 @@ static void __init intel_idle_init_cstat
>                 if (disabled_states_mask & BIT(cstate))
>                         state->flags |= CPUIDLE_FLAG_OFF;
>
> +               if (intel_idle_state_needs_timer_stop(state))
> +                       state->flags |= CPUIDLE_FLAG_TIMER_STOP;
> +
>                 state->enter = intel_idle;
>                 state->enter_s2idle = intel_idle_s2idle;
>         }
> @@ -1507,6 +1510,9 @@ static void __init intel_idle_init_cstat
>                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
>                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
>
> +               if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
> +                       drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
> +
>                 drv->state_count++;
>         }
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/2] More RCU vs idle fixes
  2020-11-20 11:41 [PATCH 0/2] More RCU vs idle fixes Peter Zijlstra
  2020-11-20 11:41 ` [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing Peter Zijlstra
  2020-11-20 11:41 ` [PATCH 2/2] intel_idle: Fix intel_idle() " Peter Zijlstra
@ 2020-11-24 13:47 ` Sven Schnelle
  2020-11-24 14:18   ` Peter Zijlstra
  2 siblings, 1 reply; 22+ messages in thread
From: Sven Schnelle @ 2020-11-24 13:47 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: rafael, viresh.kumar, mingo, x86, mark.rutland, will, linux-kernel

Peter Zijlstra <peterz@infradead.org> writes:

> Both arm64 and s390 are tripping over arch_cpu_idle() RCU,tracing,lockdep
> interaction. While looking at that I also found fail in inte_idle.
>
> Please consider for this cycle.

Is anyone taking this patchset? For s390, we also need to change the
local_irq_safe/restore to the raw variants in enabled_wait() in
arch/s390/kernel/idle.c. I can make a patch and carry that via the
s390 tree, but i want to make sure the s390 change in this patchset
also reaches linus' tree.

Thanks
Sven

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/2] More RCU vs idle fixes
  2020-11-24 13:47 ` [PATCH 0/2] More RCU vs idle fixes Sven Schnelle
@ 2020-11-24 14:18   ` Peter Zijlstra
  2020-11-24 14:23     ` Peter Zijlstra
  0 siblings, 1 reply; 22+ messages in thread
From: Peter Zijlstra @ 2020-11-24 14:18 UTC (permalink / raw)
  To: Sven Schnelle
  Cc: rafael, viresh.kumar, mingo, x86, mark.rutland, will, linux-kernel

On Tue, Nov 24, 2020 at 02:47:27PM +0100, Sven Schnelle wrote:
> Peter Zijlstra <peterz@infradead.org> writes:
> 
> > Both arm64 and s390 are tripping over arch_cpu_idle() RCU,tracing,lockdep
> > interaction. While looking at that I also found fail in inte_idle.
> >
> > Please consider for this cycle.
> 
> Is anyone taking this patchset?

I think I'll stuff it in x86/urgent for lack of a better place.

> For s390, we also need to change the
> local_irq_safe/restore to the raw variants in enabled_wait() in
> arch/s390/kernel/idle.c.

Duh, I'll add that.




^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/2] More RCU vs idle fixes
  2020-11-24 14:18   ` Peter Zijlstra
@ 2020-11-24 14:23     ` Peter Zijlstra
  0 siblings, 0 replies; 22+ messages in thread
From: Peter Zijlstra @ 2020-11-24 14:23 UTC (permalink / raw)
  To: Sven Schnelle
  Cc: rafael, viresh.kumar, mingo, x86, mark.rutland, will, linux-kernel

On Tue, Nov 24, 2020 at 03:18:53PM +0100, Peter Zijlstra wrote:
> On Tue, Nov 24, 2020 at 02:47:27PM +0100, Sven Schnelle wrote:
> > Peter Zijlstra <peterz@infradead.org> writes:
> > 
> > > Both arm64 and s390 are tripping over arch_cpu_idle() RCU,tracing,lockdep
> > > interaction. While looking at that I also found fail in inte_idle.
> > >
> > > Please consider for this cycle.
> > 
> > Is anyone taking this patchset?
> 
> I think I'll stuff it in x86/urgent for lack of a better place.

Ah, locking/urgent might be a better place I suppose.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [tip: locking/urgent] intel_idle: Fix intel_idle() vs tracing
  2020-11-23 14:35         ` Peter Zijlstra
  2020-11-23 14:54           ` Rafael J. Wysocki
@ 2020-11-25 13:57           ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 22+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-11-25 13:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel), Rafael J. Wysocki, x86, linux-kernel

The following commit has been merged into the locking/urgent branch of tip:

Commit-ID:     6e1d2bc675bd57640f5658a4a657ae488db4c204
Gitweb:        https://git.kernel.org/tip/6e1d2bc675bd57640f5658a4a657ae488db4c204
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 20 Nov 2020 11:28:35 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 24 Nov 2020 16:47:36 +01:00

intel_idle: Fix intel_idle() vs tracing

cpuidle->enter() callbacks should not call into tracing because RCU
has already been disabled. Instead of doing the broadcast thing
itself, simply advertise to the cpuidle core that those states stop
the timer.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lkml.kernel.org/r/20201123143510.GR3021@hirez.programming.kicks-ass.net
---
 drivers/idle/intel_idle.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 01bace4..7ee7ffe 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -126,26 +126,9 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
 	struct cpuidle_state *state = &drv->states[index];
 	unsigned long eax = flg2MWAIT(state->flags);
 	unsigned long ecx = 1; /* break on interrupt flag */
-	bool tick;
-
-	if (!static_cpu_has(X86_FEATURE_ARAT)) {
-		/*
-		 * Switch over to one-shot tick broadcast if the target C-state
-		 * is deeper than C1.
-		 */
-		if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
-			tick = true;
-			tick_broadcast_enter();
-		} else {
-			tick = false;
-		}
-	}
 
 	mwait_idle_with_hints(eax, ecx);
 
-	if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
-		tick_broadcast_exit();
-
 	return index;
 }
 
@@ -1227,6 +1210,20 @@ static bool __init intel_idle_acpi_cst_extract(void)
 	return false;
 }
 
+static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
+{
+	unsigned long eax = flg2MWAIT(state->flags);
+
+	if (boot_cpu_has(X86_FEATURE_ARAT))
+		return false;
+
+	/*
+	 * Switch over to one-shot tick broadcast if the target C-state
+	 * is deeper than C1.
+	 */
+	return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
+}
+
 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
 {
 	int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
@@ -1269,6 +1266,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
 		if (disabled_states_mask & BIT(cstate))
 			state->flags |= CPUIDLE_FLAG_OFF;
 
+		if (intel_idle_state_needs_timer_stop(state))
+			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+
 		state->enter = intel_idle;
 		state->enter_s2idle = intel_idle_s2idle;
 	}
@@ -1507,6 +1507,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 		     !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
 			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
 
+		if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
+			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
+
 		drv->state_count++;
 	}
 

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [tip: locking/urgent] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-11-20 11:41 ` [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing Peter Zijlstra
  2020-11-20 12:39   ` Mark Rutland
@ 2020-11-25 13:57   ` tip-bot2 for Peter Zijlstra
  2020-11-30 21:00   ` [PATCH 1/2] " Guenter Roeck
  2 siblings, 0 replies; 22+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2020-11-25 13:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Sven Schnelle, Peter Zijlstra (Intel), Mark Rutland, x86, linux-kernel

The following commit has been merged into the locking/urgent branch of tip:

Commit-ID:     58c644ba512cfbc2e39b758dd979edd1d6d00e27
Gitweb:        https://git.kernel.org/tip/58c644ba512cfbc2e39b758dd979edd1d6d00e27
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 20 Nov 2020 11:50:35 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 24 Nov 2020 16:47:35 +01:00

sched/idle: Fix arch_cpu_idle() vs tracing

We call arch_cpu_idle() with RCU disabled, but then use
local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.

Switch all arch_cpu_idle() implementations to use
raw_local_irq_{en,dis}able() and carefully manage the
lockdep,rcu,tracing state like we do in entry.

(XXX: we really should change arch_cpu_idle() to not return with
interrupts enabled)

Reported-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/20201120114925.594122626@infradead.org
---
 arch/alpha/kernel/process.c      |  2 +-
 arch/arm/kernel/process.c        |  2 +-
 arch/arm64/kernel/process.c      |  2 +-
 arch/csky/kernel/process.c       |  2 +-
 arch/h8300/kernel/process.c      |  2 +-
 arch/hexagon/kernel/process.c    |  2 +-
 arch/ia64/kernel/process.c       |  2 +-
 arch/microblaze/kernel/process.c |  2 +-
 arch/mips/kernel/idle.c          | 12 ++++++------
 arch/nios2/kernel/process.c      |  2 +-
 arch/openrisc/kernel/process.c   |  2 +-
 arch/parisc/kernel/process.c     |  2 +-
 arch/powerpc/kernel/idle.c       |  4 ++--
 arch/riscv/kernel/process.c      |  2 +-
 arch/s390/kernel/idle.c          |  6 +++---
 arch/sh/kernel/idle.c            |  2 +-
 arch/sparc/kernel/leon_pmc.c     |  4 ++--
 arch/sparc/kernel/process_32.c   |  2 +-
 arch/sparc/kernel/process_64.c   |  4 ++--
 arch/um/kernel/process.c         |  2 +-
 arch/x86/include/asm/mwait.h     |  2 --
 arch/x86/kernel/process.c        | 12 +++++++-----
 kernel/sched/idle.c              | 28 +++++++++++++++++++++++++++-
 23 files changed, 64 insertions(+), 38 deletions(-)

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 7462a79..4c7b041 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off);
 void arch_cpu_idle(void)
 {
 	wtint(0);
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_dead(void)
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 8e6ace0..9f199b1 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -71,7 +71,7 @@ void arch_cpu_idle(void)
 		arm_pm_idle();
 	else
 		cpu_do_idle();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_prepare(void)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4784011..9ebe025 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -126,7 +126,7 @@ void arch_cpu_idle(void)
 	 * tricks
 	 */
 	cpu_do_idle();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
index f730869..69af6bc 100644
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -102,6 +102,6 @@ void arch_cpu_idle(void)
 #ifdef CONFIG_CPU_PM_STOP
 	asm volatile("stop\n");
 #endif
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 #endif
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index aea0a40..bc1364d 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void);
  */
 void arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 	__asm__("sleep");
 }
 
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 5a0a95d..67767c5 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -44,7 +44,7 @@ void arch_cpu_idle(void)
 {
 	__vmwait();
 	/*  interrupts wake us up, but irqs are still disabled */
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 6b61a70..c9ff879 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -239,7 +239,7 @@ void arch_cpu_idle(void)
 	if (mark_idle)
 		(*mark_idle)(1);
 
-	safe_halt();
+	raw_safe_halt();
 
 	if (mark_idle)
 		(*mark_idle)(0);
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index a9e46e5..f998607 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs)
 
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
 }
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 5bc3b04..18e69eb 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void)
 {
 	unsigned long cfg = read_c0_conf();
 	write_c0_conf(cfg | R30XX_CONF_HALT);
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 static void __cpuidle r39xx_wait(void)
 {
 	if (!need_resched())
 		write_c0_conf(read_c0_conf() | TX39_CONF_HALT);
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void __cpuidle r4k_wait(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 	__r4k_wait();
 }
 
@@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void)
 		"	.set	arch=r4000	\n"
 		"	wait			\n"
 		"	.set	pop		\n");
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
@@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void)
 		"	wait						\n"
 		"	mtc0	$1, $12		# stalls until W stage	\n"
 		"	.set	pop					\n");
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
@@ -257,7 +257,7 @@ void arch_cpu_idle(void)
 	if (cpu_wait)
 		cpu_wait();
 	else
-		local_irq_enable();
+		raw_local_irq_enable();
 }
 
 #ifdef CONFIG_CPU_IDLE
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 4ffe857..50b4eb1 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 void arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 0ff391f..3c98728 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -79,7 +79,7 @@ void machine_power_off(void)
  */
 void arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 	if (mfspr(SPR_UPR) & SPR_UPR_PMP)
 		mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
 }
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index f196d96..a92a23d 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void)
 
 void __cpuidle arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 
 	/* nop on real hardware, qemu will idle sleep. */
 	asm volatile("or %%r10,%%r10,%%r10\n":::);
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index ae0e263..1f83553 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -52,9 +52,9 @@ void arch_cpu_idle(void)
 		 * interrupts enabled, some don't.
 		 */
 		if (irqs_disabled())
-			local_irq_enable();
+			raw_local_irq_enable();
 	} else {
-		local_irq_enable();
+		raw_local_irq_enable();
 		/*
 		 * Go into low thread priority and possibly
 		 * low power mode.
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 19225ec..dd5f985 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void);
 void arch_cpu_idle(void)
 {
 	wait_for_interrupt();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void show_regs(struct pt_regs *regs)
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index f7f1e64..2b85096 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -33,10 +33,10 @@ void enabled_wait(void)
 		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 	clear_cpu_flag(CIF_NOHZ_DELAY);
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	/* Call the assembler magic in entry.S */
 	psw_idle(idle, psw_mask);
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 
 	/* Account time spent with enabled wait psw loaded as idle time. */
 	raw_write_seqcount_begin(&idle->seqcount);
@@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void)
 void arch_cpu_idle(void)
 {
 	enabled_wait();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_exit(void)
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 0dc0f52..f598149 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -22,7 +22,7 @@ static void (*sh_idle)(void);
 void default_idle(void)
 {
 	set_bl_bit();
-	local_irq_enable();
+	raw_local_irq_enable();
 	/* Isn't this racy ? */
 	cpu_sleep();
 	clear_bl_bit();
diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c
index 065e2d4..396f46b 100644
--- a/arch/sparc/kernel/leon_pmc.c
+++ b/arch/sparc/kernel/leon_pmc.c
@@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void)
 	register unsigned int address = (unsigned int)leon3_irqctrl_regs;
 
 	/* Interrupts need to be enabled to not hang the CPU */
-	local_irq_enable();
+	raw_local_irq_enable();
 
 	__asm__ __volatile__ (
 		"wr	%%g0, %%asr19\n"
@@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void)
 static void pmc_leon_idle(void)
 {
 	/* Interrupts need to be enabled to not hang the CPU */
-	local_irq_enable();
+	raw_local_irq_enable();
 
 	/* For systems without power-down, this will be no-op */
 	__asm__ __volatile__ ("wr	%g0, %asr19\n\t");
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index adfcaea..a023637 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -74,7 +74,7 @@ void arch_cpu_idle(void)
 {
 	if (sparc_idle)
 		(*sparc_idle)();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index a75093b..6f8c782 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -62,11 +62,11 @@ void arch_cpu_idle(void)
 {
 	if (tlb_type != hypervisor) {
 		touch_nmi_watchdog();
-		local_irq_enable();
+		raw_local_irq_enable();
 	} else {
 		unsigned long pstate;
 
-		local_irq_enable();
+		raw_local_irq_enable();
 
                 /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
                  * the cpu sleep hypervisor call.
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 3bed095..9505a7e 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -217,7 +217,7 @@ void arch_cpu_idle(void)
 {
 	cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
 	um_idle_sleep();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 int __cant_sleep(void) {
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index e039a93..29dd27b 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
 
 static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 {
-	trace_hardirqs_on();
-
 	mds_idle_clear_cpu_buffers();
 	/* "mwait %eax, %ecx;" */
 	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ba4593a..145a7ac 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -685,7 +685,7 @@ void arch_cpu_idle(void)
  */
 void __cpuidle default_idle(void)
 {
-	safe_halt();
+	raw_safe_halt();
 }
 #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
 EXPORT_SYMBOL(default_idle);
@@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy)
 /*
  * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
  * states (local apic timer and TSC stop).
+ *
+ * XXX this function is completely buggered vs RCU and tracing.
  */
 static void amd_e400_idle(void)
 {
@@ -757,9 +759,9 @@ static void amd_e400_idle(void)
 	 * The switch back from broadcast mode needs to be called with
 	 * interrupts disabled.
 	 */
-	local_irq_disable();
+	raw_local_irq_disable();
 	tick_broadcast_exit();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
@@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void)
 		if (!need_resched())
 			__sti_mwait(0, 0);
 		else
-			local_irq_enable();
+			raw_local_irq_enable();
 	} else {
-		local_irq_enable();
+		raw_local_irq_enable();
 	}
 	__current_clr_polling();
 }
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 24d0ee2..c6932b8 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { }
 void __weak arch_cpu_idle(void)
 {
 	cpu_idle_force_poll = 1;
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /**
@@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void)
 
 		trace_cpu_idle(1, smp_processor_id());
 		stop_critical_timings();
+
+		/*
+		 * arch_cpu_idle() is supposed to enable IRQs, however
+		 * we can't do that because of RCU and tracing.
+		 *
+		 * Trace IRQs enable here, then switch off RCU, and have
+		 * arch_cpu_idle() use raw_local_irq_enable(). Note that
+		 * rcu_idle_enter() relies on lockdep IRQ state, so switch that
+		 * last -- this is very similar to the entry code.
+		 */
+		trace_hardirqs_on_prepare();
+		lockdep_hardirqs_on_prepare(_THIS_IP_);
 		rcu_idle_enter();
+		lockdep_hardirqs_on(_THIS_IP_);
+
 		arch_cpu_idle();
+
+		/*
+		 * OK, so IRQs are enabled here, but RCU needs them disabled to
+		 * turn itself back on.. funny thing is that disabling IRQs
+		 * will cause tracing, which needs RCU. Jump through hoops to
+		 * make it 'work'.
+		 */
+		raw_local_irq_disable();
+		lockdep_hardirqs_off(_THIS_IP_);
 		rcu_idle_exit();
+		lockdep_hardirqs_on(_THIS_IP_);
+		raw_local_irq_enable();
+
 		start_critical_timings();
 		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 	}

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-11-20 11:41 ` [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing Peter Zijlstra
  2020-11-20 12:39   ` Mark Rutland
  2020-11-25 13:57   ` [tip: locking/urgent] " tip-bot2 for Peter Zijlstra
@ 2020-11-30 21:00   ` Guenter Roeck
  2020-12-01 11:02     ` Peter Zijlstra
  2 siblings, 1 reply; 22+ messages in thread
From: Guenter Roeck @ 2020-11-30 21:00 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: rafael, viresh.kumar, mingo, x86, mark.rutland, will, svens,
	linux-kernel, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, linux-s390

On Fri, Nov 20, 2020 at 12:41:46PM +0100, Peter Zijlstra wrote:
> We call arch_cpu_idle() with RCU disabled, but then use
> local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
> 
> Switch all arch_cpu_idle() implementations to use
> raw_local_irq_{en,dis}able() and carefully manage the
> lockdep,rcu,tracing state like we do in entry.
> 
> (XXX: we really should change arch_cpu_idle() to not return with
> interrupts enabled)
> 

Has this patch been tested on s390 ? Reason for asking is that it causes
all my s390 emulations to crash. Reverting it fixes the problem.

Guenter

> Reported-by: Mark Rutland <mark.rutland@arm.com>
> Reported-by: Sven Schnelle <svens@linux.ibm.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/alpha/kernel/process.c      |    2 +-
>  arch/arm/kernel/process.c        |    2 +-
>  arch/arm64/kernel/process.c      |    2 +-
>  arch/csky/kernel/process.c       |    2 +-
>  arch/h8300/kernel/process.c      |    2 +-
>  arch/hexagon/kernel/process.c    |    2 +-
>  arch/ia64/kernel/process.c       |    2 +-
>  arch/microblaze/kernel/process.c |    2 +-
>  arch/mips/kernel/idle.c          |   12 ++++++------
>  arch/nios2/kernel/process.c      |    2 +-
>  arch/openrisc/kernel/process.c   |    2 +-
>  arch/parisc/kernel/process.c     |    2 +-
>  arch/powerpc/kernel/idle.c       |    4 ++--
>  arch/riscv/kernel/process.c      |    2 +-
>  arch/s390/kernel/idle.c          |    2 +-
>  arch/sh/kernel/idle.c            |    2 +-
>  arch/sparc/kernel/leon_pmc.c     |    4 ++--
>  arch/sparc/kernel/process_32.c   |    2 +-
>  arch/sparc/kernel/process_64.c   |    4 ++--
>  arch/um/kernel/process.c         |    2 +-
>  arch/x86/include/asm/mwait.h     |    2 --
>  arch/x86/kernel/process.c        |   12 +++++++-----
>  kernel/sched/idle.c              |   28 +++++++++++++++++++++++++++-
>  23 files changed, 62 insertions(+), 36 deletions(-)
> 
> --- a/arch/alpha/kernel/process.c
> +++ b/arch/alpha/kernel/process.c
> @@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off);
>  void arch_cpu_idle(void)
>  {
>  	wtint(0);
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  void arch_cpu_idle_dead(void)
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -71,7 +71,7 @@ void arch_cpu_idle(void)
>  		arm_pm_idle();
>  	else
>  		cpu_do_idle();
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  void arch_cpu_idle_prepare(void)
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -126,7 +126,7 @@ void arch_cpu_idle(void)
>  	 * tricks
>  	 */
>  	cpu_do_idle();
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> --- a/arch/csky/kernel/process.c
> +++ b/arch/csky/kernel/process.c
> @@ -102,6 +102,6 @@ void arch_cpu_idle(void)
>  #ifdef CONFIG_CPU_PM_STOP
>  	asm volatile("stop\n");
>  #endif
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  #endif
> --- a/arch/h8300/kernel/process.c
> +++ b/arch/h8300/kernel/process.c
> @@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(v
>   */
>  void arch_cpu_idle(void)
>  {
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  	__asm__("sleep");
>  }
>  
> --- a/arch/hexagon/kernel/process.c
> +++ b/arch/hexagon/kernel/process.c
> @@ -44,7 +44,7 @@ void arch_cpu_idle(void)
>  {
>  	__vmwait();
>  	/*  interrupts wake us up, but irqs are still disabled */
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  /*
> --- a/arch/ia64/kernel/process.c
> +++ b/arch/ia64/kernel/process.c
> @@ -239,7 +239,7 @@ void arch_cpu_idle(void)
>  	if (mark_idle)
>  		(*mark_idle)(1);
>  
> -	safe_halt();
> +	raw_safe_halt();
>  
>  	if (mark_idle)
>  		(*mark_idle)(0);
> --- a/arch/microblaze/kernel/process.c
> +++ b/arch/microblaze/kernel/process.c
> @@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_f
>  
>  void arch_cpu_idle(void)
>  {
> -       local_irq_enable();
> +       raw_local_irq_enable();
>  }
> --- a/arch/mips/kernel/idle.c
> +++ b/arch/mips/kernel/idle.c
> @@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void)
>  {
>  	unsigned long cfg = read_c0_conf();
>  	write_c0_conf(cfg | R30XX_CONF_HALT);
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  static void __cpuidle r39xx_wait(void)
>  {
>  	if (!need_resched())
>  		write_c0_conf(read_c0_conf() | TX39_CONF_HALT);
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  void __cpuidle r4k_wait(void)
>  {
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  	__r4k_wait();
>  }
>  
> @@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void)
>  		"	.set	arch=r4000	\n"
>  		"	wait			\n"
>  		"	.set	pop		\n");
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  /*
> @@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(v
>  		"	wait						\n"
>  		"	mtc0	$1, $12		# stalls until W stage	\n"
>  		"	.set	pop					\n");
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  /*
> @@ -257,7 +257,7 @@ void arch_cpu_idle(void)
>  	if (cpu_wait)
>  		cpu_wait();
>  	else
> -		local_irq_enable();
> +		raw_local_irq_enable();
>  }
>  
>  #ifdef CONFIG_CPU_IDLE
> --- a/arch/nios2/kernel/process.c
> +++ b/arch/nios2/kernel/process.c
> @@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off);
>  
>  void arch_cpu_idle(void)
>  {
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  /*
> --- a/arch/openrisc/kernel/process.c
> +++ b/arch/openrisc/kernel/process.c
> @@ -79,7 +79,7 @@ void machine_power_off(void)
>   */
>  void arch_cpu_idle(void)
>  {
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  	if (mfspr(SPR_UPR) & SPR_UPR_PMP)
>  		mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
>  }
> --- a/arch/parisc/kernel/process.c
> +++ b/arch/parisc/kernel/process.c
> @@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void)
>  
>  void __cpuidle arch_cpu_idle(void)
>  {
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  
>  	/* nop on real hardware, qemu will idle sleep. */
>  	asm volatile("or %%r10,%%r10,%%r10\n":::);
> --- a/arch/powerpc/kernel/idle.c
> +++ b/arch/powerpc/kernel/idle.c
> @@ -52,9 +52,9 @@ void arch_cpu_idle(void)
>  		 * interrupts enabled, some don't.
>  		 */
>  		if (irqs_disabled())
> -			local_irq_enable();
> +			raw_local_irq_enable();
>  	} else {
> -		local_irq_enable();
> +		raw_local_irq_enable();
>  		/*
>  		 * Go into low thread priority and possibly
>  		 * low power mode.
> --- a/arch/riscv/kernel/process.c
> +++ b/arch/riscv/kernel/process.c
> @@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_t
>  void arch_cpu_idle(void)
>  {
>  	wait_for_interrupt();
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  void show_regs(struct pt_regs *regs)
> --- a/arch/s390/kernel/idle.c
> +++ b/arch/s390/kernel/idle.c
> @@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void)
>  void arch_cpu_idle(void)
>  {
>  	enabled_wait();
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  void arch_cpu_idle_exit(void)
> --- a/arch/sh/kernel/idle.c
> +++ b/arch/sh/kernel/idle.c
> @@ -22,7 +22,7 @@ static void (*sh_idle)(void);
>  void default_idle(void)
>  {
>  	set_bl_bit();
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  	/* Isn't this racy ? */
>  	cpu_sleep();
>  	clear_bl_bit();
> --- a/arch/sparc/kernel/leon_pmc.c
> +++ b/arch/sparc/kernel/leon_pmc.c
> @@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void)
>  	register unsigned int address = (unsigned int)leon3_irqctrl_regs;
>  
>  	/* Interrupts need to be enabled to not hang the CPU */
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  
>  	__asm__ __volatile__ (
>  		"wr	%%g0, %%asr19\n"
> @@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void)
>  static void pmc_leon_idle(void)
>  {
>  	/* Interrupts need to be enabled to not hang the CPU */
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  
>  	/* For systems without power-down, this will be no-op */
>  	__asm__ __volatile__ ("wr	%g0, %asr19\n\t");
> --- a/arch/sparc/kernel/process_32.c
> +++ b/arch/sparc/kernel/process_32.c
> @@ -74,7 +74,7 @@ void arch_cpu_idle(void)
>  {
>  	if (sparc_idle)
>  		(*sparc_idle)();
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
> --- a/arch/sparc/kernel/process_64.c
> +++ b/arch/sparc/kernel/process_64.c
> @@ -62,11 +62,11 @@ void arch_cpu_idle(void)
>  {
>  	if (tlb_type != hypervisor) {
>  		touch_nmi_watchdog();
> -		local_irq_enable();
> +		raw_local_irq_enable();
>  	} else {
>  		unsigned long pstate;
>  
> -		local_irq_enable();
> +		raw_local_irq_enable();
>  
>                  /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
>                   * the cpu sleep hypervisor call.
> --- a/arch/um/kernel/process.c
> +++ b/arch/um/kernel/process.c
> @@ -217,7 +217,7 @@ void arch_cpu_idle(void)
>  {
>  	cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
>  	um_idle_sleep();
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  int __cant_sleep(void) {
> --- a/arch/x86/include/asm/mwait.h
> +++ b/arch/x86/include/asm/mwait.h
> @@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned lon
>  
>  static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
>  {
> -	trace_hardirqs_on();
> -
>  	mds_idle_clear_cpu_buffers();
>  	/* "mwait %eax, %ecx;" */
>  	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -685,7 +685,7 @@ void arch_cpu_idle(void)
>   */
>  void __cpuidle default_idle(void)
>  {
> -	safe_halt();
> +	raw_safe_halt();
>  }
>  #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
>  EXPORT_SYMBOL(default_idle);
> @@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy)
>  /*
>   * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
>   * states (local apic timer and TSC stop).
> + *
> + * XXX this function is completely buggered vs RCU and tracing.
>   */
>  static void amd_e400_idle(void)
>  {
> @@ -757,9 +759,9 @@ static void amd_e400_idle(void)
>  	 * The switch back from broadcast mode needs to be called with
>  	 * interrupts disabled.
>  	 */
> -	local_irq_disable();
> +	raw_local_irq_disable();
>  	tick_broadcast_exit();
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  /*
> @@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void)
>  		if (!need_resched())
>  			__sti_mwait(0, 0);
>  		else
> -			local_irq_enable();
> +			raw_local_irq_enable();
>  	} else {
> -		local_irq_enable();
> +		raw_local_irq_enable();
>  	}
>  	__current_clr_polling();
>  }
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { }
>  void __weak arch_cpu_idle(void)
>  {
>  	cpu_idle_force_poll = 1;
> -	local_irq_enable();
> +	raw_local_irq_enable();
>  }
>  
>  /**
> @@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void)
>  
>  		trace_cpu_idle(1, smp_processor_id());
>  		stop_critical_timings();
> +
> +		/*
> +		 * arch_cpu_idle() is supposed to enable IRQs, however
> +		 * we can't do that because of RCU and tracing.
> +		 *
> +		 * Trace IRQs enable here, then switch off RCU, and have
> +		 * arch_cpu_idle() use raw_local_irq_enable(). Note that
> +		 * rcu_idle_enter() relies on lockdep IRQ state, so switch that
> +		 * last -- this is very similar to the entry code.
> +		 */
> +		trace_hardirqs_on_prepare();
> +		lockdep_hardirqs_on_prepare(_THIS_IP_);
>  		rcu_idle_enter();
> +		lockdep_hardirqs_on(_THIS_IP_);
> +
>  		arch_cpu_idle();
> +
> +		/*
> +		 * OK, so IRQs are enabled here, but RCU needs them disabled to
> +		 * turn itself back on.. funny thing is that disabling IRQs
> +		 * will cause tracing, which needs RCU. Jump through hoops to
> +		 * make it 'work'.
> +		 */
> +		raw_local_irq_disable();
> +		lockdep_hardirqs_off(_THIS_IP_);
>  		rcu_idle_exit();
> +		lockdep_hardirqs_on(_THIS_IP_);
> +		raw_local_irq_enable();
> +
>  		start_critical_timings();
>  		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
>  	}
> 
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-11-30 21:00   ` [PATCH 1/2] " Guenter Roeck
@ 2020-12-01 11:02     ` Peter Zijlstra
  2020-12-01 11:12       ` Sven Schnelle
  2020-12-01 11:56       ` Sven Schnelle
  0 siblings, 2 replies; 22+ messages in thread
From: Peter Zijlstra @ 2020-12-01 11:02 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: rafael, viresh.kumar, mingo, x86, mark.rutland, will, svens,
	linux-kernel, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, linux-s390

On Mon, Nov 30, 2020 at 01:00:03PM -0800, Guenter Roeck wrote:
> On Fri, Nov 20, 2020 at 12:41:46PM +0100, Peter Zijlstra wrote:
> > We call arch_cpu_idle() with RCU disabled, but then use
> > local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
> > 
> > Switch all arch_cpu_idle() implementations to use
> > raw_local_irq_{en,dis}able() and carefully manage the
> > lockdep,rcu,tracing state like we do in entry.
> > 
> > (XXX: we really should change arch_cpu_idle() to not return with
> > interrupts enabled)
> > 
> 
> Has this patch been tested on s390 ? Reason for asking is that it causes
> all my s390 emulations to crash. Reverting it fixes the problem.

My understanding is that it changes the error on s390. Previously it
would complain about the local_irq_enable() in arch_cpu_idle(), now it
complains when taking an interrupt during idle.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-12-01 11:02     ` Peter Zijlstra
@ 2020-12-01 11:12       ` Sven Schnelle
  2020-12-01 11:56       ` Sven Schnelle
  1 sibling, 0 replies; 22+ messages in thread
From: Sven Schnelle @ 2020-12-01 11:12 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Guenter Roeck, rafael, viresh.kumar, mingo, x86, mark.rutland,
	will, linux-kernel, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, linux-s390

Peter Zijlstra <peterz@infradead.org> writes:

> On Mon, Nov 30, 2020 at 01:00:03PM -0800, Guenter Roeck wrote:
>> On Fri, Nov 20, 2020 at 12:41:46PM +0100, Peter Zijlstra wrote:
>> > We call arch_cpu_idle() with RCU disabled, but then use
>> > local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
>> > 
>> > Switch all arch_cpu_idle() implementations to use
>> > raw_local_irq_{en,dis}able() and carefully manage the
>> > lockdep,rcu,tracing state like we do in entry.
>> > 
>> > (XXX: we really should change arch_cpu_idle() to not return with
>> > interrupts enabled)
>> > 
>> 
>> Has this patch been tested on s390 ? Reason for asking is that it causes
>> all my s390 emulations to crash. Reverting it fixes the problem.
>
> My understanding is that it changes the error on s390. Previously it
> would complain about the local_irq_enable() in arch_cpu_idle(), now it
> complains when taking an interrupt during idle.

The errors on s390 all were fixed in the meantime. I cannot say which
patch fixed it, but we haven't seen any warning in our internal CI
during the last weeks. So reverting the patch would likely fix the issue
for us.

s390 is likely to switch to generic entry with the next merge window (im
working on it), so all that stuff will be easier than.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-12-01 11:02     ` Peter Zijlstra
  2020-12-01 11:12       ` Sven Schnelle
@ 2020-12-01 11:56       ` Sven Schnelle
  2020-12-01 12:52         ` Peter Zijlstra
  2020-12-01 14:51         ` Peter Zijlstra
  1 sibling, 2 replies; 22+ messages in thread
From: Sven Schnelle @ 2020-12-01 11:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Guenter Roeck, rafael, viresh.kumar, mingo, x86, mark.rutland,
	will, linux-kernel, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, linux-s390, Linus Torvalds

Hi Peter,

Peter Zijlstra <peterz@infradead.org> writes:

> On Mon, Nov 30, 2020 at 01:00:03PM -0800, Guenter Roeck wrote:
>> On Fri, Nov 20, 2020 at 12:41:46PM +0100, Peter Zijlstra wrote:
>> > We call arch_cpu_idle() with RCU disabled, but then use
>> > local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
>> > 
>> > Switch all arch_cpu_idle() implementations to use
>> > raw_local_irq_{en,dis}able() and carefully manage the
>> > lockdep,rcu,tracing state like we do in entry.
>> > 
>> > (XXX: we really should change arch_cpu_idle() to not return with
>> > interrupts enabled)
>> > 
>> 
>> Has this patch been tested on s390 ? Reason for asking is that it causes
>> all my s390 emulations to crash. Reverting it fixes the problem.
>
> My understanding is that it changes the error on s390. Previously it
> would complain about the local_irq_enable() in arch_cpu_idle(), now it
> complains when taking an interrupt during idle.

I looked into adding the required functionality for s390, but the code
we would need to add to entry.S is rather large - as you noted we would
have to duplicate large portions of irqentry_enter() into our code.
Given that s390 was fine before that patch, can you revert it and submit
it again during the next merge window?

Thanks
Sven

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-12-01 11:56       ` Sven Schnelle
@ 2020-12-01 12:52         ` Peter Zijlstra
  2020-12-01 13:06           ` Guenter Roeck
  2020-12-01 13:38           ` Will Deacon
  2020-12-01 14:51         ` Peter Zijlstra
  1 sibling, 2 replies; 22+ messages in thread
From: Peter Zijlstra @ 2020-12-01 12:52 UTC (permalink / raw)
  To: Sven Schnelle
  Cc: Guenter Roeck, rafael, viresh.kumar, mingo, x86, mark.rutland,
	will, linux-kernel, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, linux-s390, Linus Torvalds

On Tue, Dec 01, 2020 at 12:56:27PM +0100, Sven Schnelle wrote:
> Hi Peter,
> 
> Peter Zijlstra <peterz@infradead.org> writes:
> 
> > On Mon, Nov 30, 2020 at 01:00:03PM -0800, Guenter Roeck wrote:
> >> On Fri, Nov 20, 2020 at 12:41:46PM +0100, Peter Zijlstra wrote:
> >> > We call arch_cpu_idle() with RCU disabled, but then use
> >> > local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
> >> > 
> >> > Switch all arch_cpu_idle() implementations to use
> >> > raw_local_irq_{en,dis}able() and carefully manage the
> >> > lockdep,rcu,tracing state like we do in entry.
> >> > 
> >> > (XXX: we really should change arch_cpu_idle() to not return with
> >> > interrupts enabled)
> >> > 
> >> 
> >> Has this patch been tested on s390 ? Reason for asking is that it causes
> >> all my s390 emulations to crash. Reverting it fixes the problem.
> >
> > My understanding is that it changes the error on s390. Previously it
> > would complain about the local_irq_enable() in arch_cpu_idle(), now it
> > complains when taking an interrupt during idle.
> 
> I looked into adding the required functionality for s390, but the code
> we would need to add to entry.S is rather large - as you noted we would
> have to duplicate large portions of irqentry_enter() into our code.
> Given that s390 was fine before that patch, can you revert it and submit
> it again during the next merge window?

I'm not sure I understand how s390 was fine without it, let me consdier.
Also, what's the status of ARM64, they do need this too.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-12-01 12:52         ` Peter Zijlstra
@ 2020-12-01 13:06           ` Guenter Roeck
  2020-12-01 13:38           ` Will Deacon
  1 sibling, 0 replies; 22+ messages in thread
From: Guenter Roeck @ 2020-12-01 13:06 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Sven Schnelle, rafael, viresh.kumar, mingo, x86, mark.rutland,
	will, linux-kernel, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, linux-s390, Linus Torvalds

On Tue, Dec 01, 2020 at 01:52:46PM +0100, Peter Zijlstra wrote:
> On Tue, Dec 01, 2020 at 12:56:27PM +0100, Sven Schnelle wrote:
> > Hi Peter,
> > 
> > Peter Zijlstra <peterz@infradead.org> writes:
> > 
> > > On Mon, Nov 30, 2020 at 01:00:03PM -0800, Guenter Roeck wrote:
> > >> On Fri, Nov 20, 2020 at 12:41:46PM +0100, Peter Zijlstra wrote:
> > >> > We call arch_cpu_idle() with RCU disabled, but then use
> > >> > local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
> > >> > 
> > >> > Switch all arch_cpu_idle() implementations to use
> > >> > raw_local_irq_{en,dis}able() and carefully manage the
> > >> > lockdep,rcu,tracing state like we do in entry.
> > >> > 
> > >> > (XXX: we really should change arch_cpu_idle() to not return with
> > >> > interrupts enabled)
> > >> > 
> > >> 
> > >> Has this patch been tested on s390 ? Reason for asking is that it causes
> > >> all my s390 emulations to crash. Reverting it fixes the problem.
> > >
> > > My understanding is that it changes the error on s390. Previously it
> > > would complain about the local_irq_enable() in arch_cpu_idle(), now it
> > > complains when taking an interrupt during idle.
> > 
> > I looked into adding the required functionality for s390, but the code
> > we would need to add to entry.S is rather large - as you noted we would
> > have to duplicate large portions of irqentry_enter() into our code.
> > Given that s390 was fine before that patch, can you revert it and submit
> > it again during the next merge window?
> 
> I'm not sure I understand how s390 was fine without it, let me consdier.
> Also, what's the status of ARM64, they do need this too.

For v5.10-rc6:

Build results:
	total: 154 pass: 154 fail: 0
Qemu test results:
	total: 426 pass: 421 fail: 5
Failed tests:
	s390:defconfig:initrd
	s390:defconfig:virtio-blk-ccw:rootfs
	s390:defconfig:scsi[virtio-ccw]:rootfs
	s390:defconfig:virtio-pci:rootfs
	s390:defconfig:scsi[virtio-pci]:rootfs

At least with qemu all other tests/architectures are fine.
You can find the tested architectures at https://kerneltests.org/builders.

Guenter

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-12-01 12:52         ` Peter Zijlstra
  2020-12-01 13:06           ` Guenter Roeck
@ 2020-12-01 13:38           ` Will Deacon
  1 sibling, 0 replies; 22+ messages in thread
From: Will Deacon @ 2020-12-01 13:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Sven Schnelle, Guenter Roeck, rafael, viresh.kumar, mingo, x86,
	mark.rutland, linux-kernel, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, linux-s390, Linus Torvalds

On Tue, Dec 01, 2020 at 01:52:46PM +0100, Peter Zijlstra wrote:
> On Tue, Dec 01, 2020 at 12:56:27PM +0100, Sven Schnelle wrote:
> > Peter Zijlstra <peterz@infradead.org> writes:
> > > On Mon, Nov 30, 2020 at 01:00:03PM -0800, Guenter Roeck wrote:
> > >> On Fri, Nov 20, 2020 at 12:41:46PM +0100, Peter Zijlstra wrote:
> > >> > We call arch_cpu_idle() with RCU disabled, but then use
> > >> > local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
> > >> > 
> > >> > Switch all arch_cpu_idle() implementations to use
> > >> > raw_local_irq_{en,dis}able() and carefully manage the
> > >> > lockdep,rcu,tracing state like we do in entry.
> > >> > 
> > >> > (XXX: we really should change arch_cpu_idle() to not return with
> > >> > interrupts enabled)
> > >> > 
> > >> 
> > >> Has this patch been tested on s390 ? Reason for asking is that it causes
> > >> all my s390 emulations to crash. Reverting it fixes the problem.
> > >
> > > My understanding is that it changes the error on s390. Previously it
> > > would complain about the local_irq_enable() in arch_cpu_idle(), now it
> > > complains when taking an interrupt during idle.
> > 
> > I looked into adding the required functionality for s390, but the code
> > we would need to add to entry.S is rather large - as you noted we would
> > have to duplicate large portions of irqentry_enter() into our code.
> > Given that s390 was fine before that patch, can you revert it and submit
> > it again during the next merge window?
> 
> I'm not sure I understand how s390 was fine without it, let me consdier.
> Also, what's the status of ARM64, they do need this too.

We've got the batch of fixes from Mark queued for -rc7:

https://fixes.arm64.dev/

which rely on Peter's patch:

https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/commit/?h=for-next/fixes&id=114e0a684753516ef4b71ccb55a8ebcfa8735edb

There's room for consolidation and cleanup in future, but right now we've
focussed purely on fixing things.

Will

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing
  2020-12-01 11:56       ` Sven Schnelle
  2020-12-01 12:52         ` Peter Zijlstra
@ 2020-12-01 14:51         ` Peter Zijlstra
  1 sibling, 0 replies; 22+ messages in thread
From: Peter Zijlstra @ 2020-12-01 14:51 UTC (permalink / raw)
  To: Sven Schnelle
  Cc: Guenter Roeck, rafael, viresh.kumar, mingo, x86, mark.rutland,
	will, linux-kernel, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, linux-s390, Linus Torvalds

On Tue, Dec 01, 2020 at 12:56:27PM +0100, Sven Schnelle wrote:
> Hi Peter,
> 
> Peter Zijlstra <peterz@infradead.org> writes:
> 
> > On Mon, Nov 30, 2020 at 01:00:03PM -0800, Guenter Roeck wrote:
> >> On Fri, Nov 20, 2020 at 12:41:46PM +0100, Peter Zijlstra wrote:
> >> > We call arch_cpu_idle() with RCU disabled, but then use
> >> > local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
> >> > 
> >> > Switch all arch_cpu_idle() implementations to use
> >> > raw_local_irq_{en,dis}able() and carefully manage the
> >> > lockdep,rcu,tracing state like we do in entry.
> >> > 
> >> > (XXX: we really should change arch_cpu_idle() to not return with
> >> > interrupts enabled)
> >> > 
> >> 
> >> Has this patch been tested on s390 ? Reason for asking is that it causes
> >> all my s390 emulations to crash. Reverting it fixes the problem.
> >
> > My understanding is that it changes the error on s390. Previously it
> > would complain about the local_irq_enable() in arch_cpu_idle(), now it
> > complains when taking an interrupt during idle.
> 
> I looked into adding the required functionality for s390, but the code
> we would need to add to entry.S is rather large - as you noted we would
> have to duplicate large portions of irqentry_enter() into our code.
> Given that s390 was fine before that patch, can you revert it and submit
> it again during the next merge window?

So the thing that got me started here was:

  https://lkml.kernel.org/r/yt9dimbm79qi.fsf@linux.ibm.com/

And I got a very similar report from Mark for arm64. I'm not sure what
you meanwhile did to get rid of that. But I'm struggling to understand
how s390 can work on v5.10-rc5.

There's just too much calling into tracing while RCU is stopped.

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2020-12-01 14:52 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-20 11:41 [PATCH 0/2] More RCU vs idle fixes Peter Zijlstra
2020-11-20 11:41 ` [PATCH 1/2] sched/idle: Fix arch_cpu_idle() vs tracing Peter Zijlstra
2020-11-20 12:39   ` Mark Rutland
2020-11-25 13:57   ` [tip: locking/urgent] " tip-bot2 for Peter Zijlstra
2020-11-30 21:00   ` [PATCH 1/2] " Guenter Roeck
2020-12-01 11:02     ` Peter Zijlstra
2020-12-01 11:12       ` Sven Schnelle
2020-12-01 11:56       ` Sven Schnelle
2020-12-01 12:52         ` Peter Zijlstra
2020-12-01 13:06           ` Guenter Roeck
2020-12-01 13:38           ` Will Deacon
2020-12-01 14:51         ` Peter Zijlstra
2020-11-20 11:41 ` [PATCH 2/2] intel_idle: Fix intel_idle() " Peter Zijlstra
2020-11-23 10:26   ` Rafael J. Wysocki
2020-11-23 13:46     ` Peter Zijlstra
2020-11-23 13:54       ` Rafael J. Wysocki
2020-11-23 14:35         ` Peter Zijlstra
2020-11-23 14:54           ` Rafael J. Wysocki
2020-11-25 13:57           ` [tip: locking/urgent] " tip-bot2 for Peter Zijlstra
2020-11-24 13:47 ` [PATCH 0/2] More RCU vs idle fixes Sven Schnelle
2020-11-24 14:18   ` Peter Zijlstra
2020-11-24 14:23     ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).