linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] lockdep: report broken irq restoration
@ 2020-12-09 18:33 Mark Rutland
  2020-12-09 19:05 ` Andy Lutomirski
  2020-12-17 14:36 ` Peter Zijlstra
  0 siblings, 2 replies; 5+ messages in thread
From: Mark Rutland @ 2020-12-09 18:33 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mark Rutland, Andy Lutomirski, Ingo Molnar, Juergen Gross,
	Peter Zijlstra, Thomas Gleixner

We generally expect local_irq_save() and local_irq_restore() to be
paired and sanely nested, and so local_irq_restore() expects to be
called with irqs disabled. Thus, within local_irq_restore() we only
trace irq flag changes when unmasking irqs.

This means that a seuence such as:

| local_irq_disable();
| local_irq_save(flags);
| local_irq_enable();
| local_irq_restore(flags);

... is liable to break things, as the local_irq_restore() would mask
IRQs without tracing this change.

We don't consider such sequences to be a good idea, so let's define
those as forbidden, and add tooling to detect such broken cases.

This patch adds debug code to WARN() when local_irq_restore() is called
with irqs enabled. As local_irq_restore() is expected to pair with
local_irq_save(), it should never be called with interrupts enabled.

To avoid the possibility of circular header dependencies beteen
irqflags.h and bug.h, the warning is handled in a separate C file.

The new code is all conditional on a new CONFIG_DEBUG_IRQFLAGS symbol
which is independent of CONFIG_TRACE_IRQFLAGS. As noted above such cases
will confuse lockdep, so CONFIG_DEBUG_LOCKDEP now selects
CONFIG_DEBUG_IRQFLAGS.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqflags.h       | 18 +++++++++++++++++-
 kernel/locking/Makefile        |  1 +
 kernel/locking/irqflag-debug.c | 12 ++++++++++++
 lib/Kconfig.debug              |  7 +++++++
 4 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 kernel/locking/irqflag-debug.c

Note: as things stand this'll blow up at boot-time on x86 within the io-apic
timer_irq_works() boot-time test. I've proposed a fix for that:

https://lore.kernel.org/lkml/20201209181514.GA14235@C02TD0UTHF1T.local/

... which was sufficient for booting under QEMU without splats. I'm giving this
a soak under Syzkaller on arm64 as that booted cleanly to begin with.

Mark.

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 3ed4e8771b64..bca3c6fa8270 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -220,10 +220,26 @@ do {						\
 
 #else /* !CONFIG_TRACE_IRQFLAGS */
 
+#ifdef CONFIG_DEBUG_IRQFLAGS
+extern void warn_bogus_irq_restore(bool *warned);
+#define check_bogus_irq_restore()				\
+	do {							\
+		static bool __section(".data.once") __warned;	\
+		if (unlikely(!raw_irqs_disabled()))		\
+			warn_bogus_irq_restore(&__warned);	\
+	} while (0)
+#else
+#define check_bogus_irq_restore() do { } while (0)
+#endif
+
 #define local_irq_enable()	do { raw_local_irq_enable(); } while (0)
 #define local_irq_disable()	do { raw_local_irq_disable(); } while (0)
 #define local_irq_save(flags)	do { raw_local_irq_save(flags); } while (0)
-#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
+#define local_irq_restore(flags)		\
+	do {					\
+		check_bogus_irq_restore();	\
+		raw_local_irq_restore(flags);	\
+	} while (0)
 #define safe_halt()		do { raw_safe_halt(); } while (0)
 
 #endif /* CONFIG_TRACE_IRQFLAGS */
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 6d11cfb9b41f..8838f1d7c4a2 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -15,6 +15,7 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
 endif
 
+obj-$(CONFIG_DEBUG_IRQFLAGS) += irqflag-debug.o
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_LOCKDEP) += lockdep.o
 ifeq ($(CONFIG_PROC_FS),y)
diff --git a/kernel/locking/irqflag-debug.c b/kernel/locking/irqflag-debug.c
new file mode 100644
index 000000000000..3024c6837ac2
--- /dev/null
+++ b/kernel/locking/irqflag-debug.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bug.h>
+
+void warn_bogus_irq_restore(bool *warned)
+{
+	if (*warned)
+		return;
+
+	*warned = true;
+	WARN(1, "local_irq_restore() called with IRQs enabled\n");
+}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c789b39ed527..f7895d57bb08 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1312,6 +1312,7 @@ config LOCKDEP_SMALL
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
 	depends on DEBUG_KERNEL && LOCKDEP
+	select DEBUG_IRQFLAGS
 	help
 	  If you say Y here, the lock dependency engine will do
 	  additional runtime checks to debug itself, at the price
@@ -1400,6 +1401,12 @@ config TRACE_IRQFLAGS_NMI
 	depends on TRACE_IRQFLAGS
 	depends on TRACE_IRQFLAGS_NMI_SUPPORT
 
+config DEBUG_IRQFLAGS
+	bool "Interrupt mask debugging"
+	help
+	  Enables checks for potentially unsafe enabling or disabling of
+	  interrupts.
+
 config STACKTRACE
 	bool "Stack backtrace support"
 	depends on STACKTRACE_SUPPORT
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] lockdep: report broken irq restoration
  2020-12-09 18:33 [PATCH] lockdep: report broken irq restoration Mark Rutland
@ 2020-12-09 19:05 ` Andy Lutomirski
  2020-12-10 11:03   ` Mark Rutland
  2020-12-17 14:36 ` Peter Zijlstra
  1 sibling, 1 reply; 5+ messages in thread
From: Andy Lutomirski @ 2020-12-09 19:05 UTC (permalink / raw)
  To: Mark Rutland
  Cc: LKML, Andy Lutomirski, Ingo Molnar, Juergen Gross,
	Peter Zijlstra, Thomas Gleixner

On Wed, Dec 9, 2020 at 10:33 AM Mark Rutland <mark.rutland@arm.com> wrote:
>
> We generally expect local_irq_save() and local_irq_restore() to be
> paired and sanely nested, and so local_irq_restore() expects to be
> called with irqs disabled. Thus, within local_irq_restore() we only
> trace irq flag changes when unmasking irqs.
>
> This means that a seuence such as:
>
> | local_irq_disable();
> | local_irq_save(flags);
> | local_irq_enable();
> | local_irq_restore(flags);
>
> ... is liable to break things, as the local_irq_restore() would mask
> IRQs without tracing this change.
>
> We don't consider such sequences to be a good idea, so let's define
> those as forbidden, and add tooling to detect such broken cases.
>
> This patch adds debug code to WARN() when local_irq_restore() is called
> with irqs enabled. As local_irq_restore() is expected to pair with
> local_irq_save(), it should never be called with interrupts enabled.
>
> To avoid the possibility of circular header dependencies beteen
> irqflags.h and bug.h, the warning is handled in a separate C file.
>
> The new code is all conditional on a new CONFIG_DEBUG_IRQFLAGS symbol
> which is independent of CONFIG_TRACE_IRQFLAGS. As noted above such cases
> will confuse lockdep, so CONFIG_DEBUG_LOCKDEP now selects
> CONFIG_DEBUG_IRQFLAGS.
>
> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Juergen Gross <jgross@suse.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> ---
>  include/linux/irqflags.h       | 18 +++++++++++++++++-
>  kernel/locking/Makefile        |  1 +
>  kernel/locking/irqflag-debug.c | 12 ++++++++++++
>  lib/Kconfig.debug              |  7 +++++++
>  4 files changed, 37 insertions(+), 1 deletion(-)
>  create mode 100644 kernel/locking/irqflag-debug.c
>
> Note: as things stand this'll blow up at boot-time on x86 within the io-apic
> timer_irq_works() boot-time test. I've proposed a fix for that:
>
> https://lore.kernel.org/lkml/20201209181514.GA14235@C02TD0UTHF1T.local/
>
> ... which was sufficient for booting under QEMU without splats. I'm giving this
> a soak under Syzkaller on arm64 as that booted cleanly to begin with.
>
> Mark.
>
> diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
> index 3ed4e8771b64..bca3c6fa8270 100644
> --- a/include/linux/irqflags.h
> +++ b/include/linux/irqflags.h
> @@ -220,10 +220,26 @@ do {                                              \
>
>  #else /* !CONFIG_TRACE_IRQFLAGS */
>
> +#ifdef CONFIG_DEBUG_IRQFLAGS
> +extern void warn_bogus_irq_restore(bool *warned);
> +#define check_bogus_irq_restore()                              \
> +       do {                                                    \
> +               static bool __section(".data.once") __warned;   \
> +               if (unlikely(!raw_irqs_disabled()))             \
> +                       warn_bogus_irq_restore(&__warned);      \
> +       } while (0)

What's the benefit of having a per-caller __warned instead of just
having a single global one in warn_bogus_irq_restore()?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] lockdep: report broken irq restoration
  2020-12-09 19:05 ` Andy Lutomirski
@ 2020-12-10 11:03   ` Mark Rutland
  0 siblings, 0 replies; 5+ messages in thread
From: Mark Rutland @ 2020-12-10 11:03 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: LKML, Ingo Molnar, Juergen Gross, Peter Zijlstra, Thomas Gleixner

On Wed, Dec 09, 2020 at 11:05:21AM -0800, Andy Lutomirski wrote:
> On Wed, Dec 9, 2020 at 10:33 AM Mark Rutland <mark.rutland@arm.com> wrote:
> > diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
> > index 3ed4e8771b64..bca3c6fa8270 100644
> > --- a/include/linux/irqflags.h
> > +++ b/include/linux/irqflags.h
> > @@ -220,10 +220,26 @@ do {                                              \
> >
> >  #else /* !CONFIG_TRACE_IRQFLAGS */
> >
> > +#ifdef CONFIG_DEBUG_IRQFLAGS
> > +extern void warn_bogus_irq_restore(bool *warned);
> > +#define check_bogus_irq_restore()                              \
> > +       do {                                                    \
> > +               static bool __section(".data.once") __warned;   \
> > +               if (unlikely(!raw_irqs_disabled()))             \
> > +                       warn_bogus_irq_restore(&__warned);      \
> > +       } while (0)
> 
> What's the benefit of having a per-caller __warned instead of just
> having a single global one in warn_bogus_irq_restore()?

I'd copied that from the previous proposal, and had assumed that if we
had several distinct violations we'd want to report all of them in one
boot session. I'm perfectly happy to get rid of that and make
warn_bogus_irq_restore() use WARN_ONCE() directly. 

I'm using this with a local Syzkaller instance, and as that will kill
the kernel on the first WARN() anyway, it makes no difference to me. I
guess in the field having a single warning is likely to be good enough
too.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] lockdep: report broken irq restoration
  2020-12-09 18:33 [PATCH] lockdep: report broken irq restoration Mark Rutland
  2020-12-09 19:05 ` Andy Lutomirski
@ 2020-12-17 14:36 ` Peter Zijlstra
  2021-01-04 10:14   ` Mark Rutland
  1 sibling, 1 reply; 5+ messages in thread
From: Peter Zijlstra @ 2020-12-17 14:36 UTC (permalink / raw)
  To: Mark Rutland
  Cc: linux-kernel, Andy Lutomirski, Ingo Molnar, Juergen Gross,
	Thomas Gleixner

On Wed, Dec 09, 2020 at 06:33:37PM +0000, Mark Rutland wrote:
> This means that a seuence such as:

+q

> diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
> index 3ed4e8771b64..bca3c6fa8270 100644
> --- a/include/linux/irqflags.h
> +++ b/include/linux/irqflags.h
> @@ -220,10 +220,26 @@ do {						\
>  
>  #else /* !CONFIG_TRACE_IRQFLAGS */
>  
> +#ifdef CONFIG_DEBUG_IRQFLAGS
> +extern void warn_bogus_irq_restore(bool *warned);
> +#define check_bogus_irq_restore()				\
> +	do {							\
> +		static bool __section(".data.once") __warned;	\
> +		if (unlikely(!raw_irqs_disabled()))		\
> +			warn_bogus_irq_restore(&__warned);	\
> +	} while (0)
> +#else
> +#define check_bogus_irq_restore() do { } while (0)
> +#endif
> +
>  #define local_irq_enable()	do { raw_local_irq_enable(); } while (0)
>  #define local_irq_disable()	do { raw_local_irq_disable(); } while (0)
>  #define local_irq_save(flags)	do { raw_local_irq_save(flags); } while (0)
> -#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
> +#define local_irq_restore(flags)		\
> +	do {					\
> +		check_bogus_irq_restore();	\
> +		raw_local_irq_restore(flags);	\
> +	} while (0)

Shouldn't that be in raw_local_irq_restore() ?

>  #define safe_halt()		do { raw_safe_halt(); } while (0)
>  
>  #endif /* CONFIG_TRACE_IRQFLAGS */

--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -162,6 +162,7 @@ do {						\
 #define raw_local_irq_restore(flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
+		check_bogus_irq_restore();		\
 		arch_local_irq_restore(flags);		\
 	} while (0)
 #define raw_local_save_flags(flags)			\
@@ -235,11 +236,7 @@ extern void warn_bogus_irq_restore(bool
 #define local_irq_enable()	do { raw_local_irq_enable(); } while (0)
 #define local_irq_disable()	do { raw_local_irq_disable(); } while (0)
 #define local_irq_save(flags)	do { raw_local_irq_save(flags); } while (0)
-#define local_irq_restore(flags)		\
-	do {					\
-		check_bogus_irq_restore();	\
-		raw_local_irq_restore(flags);	\
-	} while (0)
+#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
 #define safe_halt()		do { raw_safe_halt(); } while (0)
 
 #endif /* CONFIG_TRACE_IRQFLAGS */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] lockdep: report broken irq restoration
  2020-12-17 14:36 ` Peter Zijlstra
@ 2021-01-04 10:14   ` Mark Rutland
  0 siblings, 0 replies; 5+ messages in thread
From: Mark Rutland @ 2021-01-04 10:14 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Andy Lutomirski, Ingo Molnar, Juergen Gross,
	Thomas Gleixner

On Thu, Dec 17, 2020 at 03:36:40PM +0100, Peter Zijlstra wrote:
> On Wed, Dec 09, 2020 at 06:33:37PM +0000, Mark Rutland wrote:
> > This means that a seuence such as:
> 
> +q
> 
> > diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
> > index 3ed4e8771b64..bca3c6fa8270 100644
> > --- a/include/linux/irqflags.h
> > +++ b/include/linux/irqflags.h
> > @@ -220,10 +220,26 @@ do {						\
> >  
> >  #else /* !CONFIG_TRACE_IRQFLAGS */
> >  
> > +#ifdef CONFIG_DEBUG_IRQFLAGS
> > +extern void warn_bogus_irq_restore(bool *warned);
> > +#define check_bogus_irq_restore()				\
> > +	do {							\
> > +		static bool __section(".data.once") __warned;	\
> > +		if (unlikely(!raw_irqs_disabled()))		\
> > +			warn_bogus_irq_restore(&__warned);	\
> > +	} while (0)
> > +#else
> > +#define check_bogus_irq_restore() do { } while (0)
> > +#endif
> > +
> >  #define local_irq_enable()	do { raw_local_irq_enable(); } while (0)
> >  #define local_irq_disable()	do { raw_local_irq_disable(); } while (0)
> >  #define local_irq_save(flags)	do { raw_local_irq_save(flags); } while (0)
> > -#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
> > +#define local_irq_restore(flags)		\
> > +	do {					\
> > +		check_bogus_irq_restore();	\
> > +		raw_local_irq_restore(flags);	\
> > +	} while (0)
> 
> Shouldn't that be in raw_local_irq_restore() ?

Yup, that'd be preferable. Note that will require a refactoring here if
we want the warning to be available regardless of CONFIG_TRACE_IRQFLAGS.

I'd intended to rejig that for v2, but I didn't get the chance before
finishing work for the year, and had only done some basic rework in my
WIP (e.g. removing the per-instance bool as per Andy's comments):

https://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git/commit/?h=fuzzing/5.10-rc7&id=61336e25d1415a4ac3aaf8cf75105c2ec2eb95e7

I'll try to respin that in the next few days if I get the chance.

Thanks,
Mark.

> 
> >  #define safe_halt()		do { raw_safe_halt(); } while (0)
> >  
> >  #endif /* CONFIG_TRACE_IRQFLAGS */
> 
> --- a/include/linux/irqflags.h
> +++ b/include/linux/irqflags.h
> @@ -162,6 +162,7 @@ do {						\
>  #define raw_local_irq_restore(flags)			\
>  	do {						\
>  		typecheck(unsigned long, flags);	\
> +		check_bogus_irq_restore();		\
>  		arch_local_irq_restore(flags);		\
>  	} while (0)
>  #define raw_local_save_flags(flags)			\
> @@ -235,11 +236,7 @@ extern void warn_bogus_irq_restore(bool
>  #define local_irq_enable()	do { raw_local_irq_enable(); } while (0)
>  #define local_irq_disable()	do { raw_local_irq_disable(); } while (0)
>  #define local_irq_save(flags)	do { raw_local_irq_save(flags); } while (0)
> -#define local_irq_restore(flags)		\
> -	do {					\
> -		check_bogus_irq_restore();	\
> -		raw_local_irq_restore(flags);	\
> -	} while (0)
> +#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
>  #define safe_halt()		do { raw_safe_halt(); } while (0)
>  
>  #endif /* CONFIG_TRACE_IRQFLAGS */

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-01-04 10:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-09 18:33 [PATCH] lockdep: report broken irq restoration Mark Rutland
2020-12-09 19:05 ` Andy Lutomirski
2020-12-10 11:03   ` Mark Rutland
2020-12-17 14:36 ` Peter Zijlstra
2021-01-04 10:14   ` Mark Rutland

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).