LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: tglx@linutronix.de, luto@amacapital.net, peterz@infradead.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	Lai Jiangshan <laijs@linux.alibaba.com>,
	sean.j.christopherson@intel.com, andrew.cooper3@citrix.com,
	daniel.thompson@linaro.org, a.darwish@linutronix.de,
	rostedt@goodmis.org, bigeasy@linutronix.de
Subject: [PATCH 14/14] x86/entry: Fix NMI vs IRQ state tracking
Date: Fri, 29 May 2020 23:27:42 +0200
Message-ID: <20200529213321.528803619@infradead.org> (raw)
In-Reply-To: <20200529212728.795169701@infradead.org>

While the nmi_enter() users did
trace_hardirqs_{off_prepare,on_finish}() there was no matching
lockdep_hardirqs_*() calls to complete the picture.

Introduce idtentry_{enter,exit}_nmi() to enable proper IRQ state
tracking across the NMIs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/common.c         |   42 ++++++++++++++++++++++++++++++++++++----
 arch/x86/include/asm/idtentry.h |    3 ++
 arch/x86/kernel/nmi.c           |    9 +++-----
 arch/x86/kernel/traps.c         |   20 ++++---------------
 include/linux/hardirq.h         |   28 ++++++++++++++++++--------
 5 files changed, 69 insertions(+), 33 deletions(-)

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -550,7 +550,7 @@ SYSCALL_DEFINE0(ni_syscall)
  * The return value must be fed into the rcu_exit argument of
  * idtentry_exit_cond_rcu().
  */
-bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
+noinstr bool idtentry_enter_cond_rcu(struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
 		enter_from_user_mode();
@@ -619,7 +619,7 @@ static void idtentry_exit_cond_resched(s
  * Counterpart to idtentry_enter_cond_rcu(). The return value of the entry
  * function must be fed into the @rcu_exit argument.
  */
-void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
+noinstr void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
 {
 	lockdep_assert_irqs_disabled();
 
@@ -663,7 +663,7 @@ void noinstr idtentry_exit_cond_rcu(stru
  * Invokes enter_from_user_mode() to establish the proper context for
  * NOHZ_FULL. Otherwise scheduling on exit would not be possible.
  */
-void noinstr idtentry_enter_user(struct pt_regs *regs)
+noinstr void idtentry_enter_user(struct pt_regs *regs)
 {
 	enter_from_user_mode();
 }
@@ -680,13 +680,47 @@ void noinstr idtentry_enter_user(struct
  *
  * Counterpart to idtentry_enter_user().
  */
-void noinstr idtentry_exit_user(struct pt_regs *regs)
+noinstr void idtentry_exit_user(struct pt_regs *regs)
 {
 	lockdep_assert_irqs_disabled();
 
 	prepare_exit_to_usermode(regs);
 }
 
+noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
+{
+	bool irq_state = lockdep_hardirqs_enabled(current);
+
+	__nmi_enter();
+	lockdep_hardirqs_off(CALLER_ADDR0);
+	lockdep_hardirq_enter();
+	rcu_nmi_enter();
+
+	instrumentation_begin();
+	trace_hardirqs_off_finish();
+	ftrace_nmi_enter();
+	instrumentation_end();
+
+	return irq_state;
+}
+
+noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
+{
+	instrumentation_begin();
+	ftrace_nmi_exit();
+	if (restore) {
+		trace_hardirqs_on_prepare();
+		lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+	}
+	instrumentation_end();
+
+	rcu_nmi_exit();
+	lockdep_hardirq_exit();
+	if (restore)
+		lockdep_hardirqs_on(CALLER_ADDR0);
+	__nmi_exit();
+}
+
 #ifdef CONFIG_XEN_PV
 #ifndef CONFIG_PREEMPTION
 /*
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -16,6 +16,9 @@ void idtentry_exit_user(struct pt_regs *
 bool idtentry_enter_cond_rcu(struct pt_regs *regs);
 void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit);
 
+bool idtentry_enter_nmi(struct pt_regs *regs);
+void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
+
 /**
  * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
  *		      No error code pushed by hardware
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struc
 	__this_cpu_write(last_nmi_rip, regs->ip);
 
 	instrumentation_begin();
-	trace_hardirqs_off_finish();
 
 	handled = nmi_handle(NMI_LOCAL, regs);
 	__this_cpu_add(nmi_stats.normal, handled);
@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struc
 		unknown_nmi_error(reason, regs);
 
 out:
-	if (regs->flags & X86_EFLAGS_IF)
-		trace_hardirqs_on_prepare();
 	instrumentation_end();
 }
 
@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi
 
 DEFINE_IDTENTRY_NMI(exc_nmi)
 {
+	bool irq_state;
+
 	if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id()))
 		return;
 
@@ -491,14 +490,14 @@ DEFINE_IDTENTRY_NMI(exc_nmi)
 
 	this_cpu_write(nmi_dr7, local_db_save());
 
-	nmi_enter();
+	irq_state = idtentry_enter_nmi(regs);
 
 	inc_irq_stat(__nmi_count);
 
 	if (!ignore_nmis)
 		default_do_nmi(regs);
 
-	nmi_exit();
+	idtentry_exit_nmi(regs, irq_state);
 
 	local_db_restore(this_cpu_read(nmi_dr7));
 
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -387,7 +387,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
 	}
 #endif
 
-	nmi_enter();
+	idtentry_enter_nmi(regs);
 	instrumentation_begin();
 	notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
 
@@ -632,15 +632,12 @@ DEFINE_IDTENTRY_RAW(exc_int3)
 		instrumentation_end();
 		idtentry_exit_user(regs);
 	} else {
-		nmi_enter();
+		bool irq_state = idtentry_enter_nmi(regs);
 		instrumentation_begin();
-		trace_hardirqs_off_finish();
 		if (!do_int3(regs))
 			die("int3", regs, 0);
-		if (regs->flags & X86_EFLAGS_IF)
-			trace_hardirqs_on_prepare();
 		instrumentation_end();
-		nmi_exit();
+		idtentry_exit_nmi(regs, irq_state);
 	}
 }
 
@@ -831,10 +828,7 @@ static void noinstr handle_debug(struct
 static __always_inline void exc_debug_kernel(struct pt_regs *regs,
 					     unsigned long dr6)
 {
-	nmi_enter();
-	instrumentation_begin();
-	trace_hardirqs_off_finish();
-	instrumentation_end();
+	bool irq_state = idtentry_enter_nmi(regs);
 
 	/*
 	 * The SDM says "The processor clears the BTF flag when it
@@ -857,11 +851,7 @@ static __always_inline void exc_debug_ke
 	if (dr6)
 		handle_debug(regs, dr6, false);
 
-	instrumentation_begin();
-	if (regs->flags & X86_EFLAGS_IF)
-		trace_hardirqs_on_prepare();
-	instrumentation_end();
-	nmi_exit();
+	idtentry_exit_nmi(regs, irq_state);
 }
 
 static __always_inline void exc_debug_user(struct pt_regs *regs,
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
 /*
  * nmi_enter() can nest up to 15 times; see NMI_BITS.
  */
-#define nmi_enter()						\
+#define __nmi_enter()						\
 	do {							\
+		lockdep_off();					\
 		arch_nmi_enter();				\
 		printk_nmi_enter();				\
-		lockdep_off();					\
 		BUG_ON(in_nmi() == NMI_MASK);			\
 		__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);	\
-		rcu_nmi_enter();				\
+	} while (0)
+
+#define nmi_enter()						\
+	do {							\
+		__nmi_enter();					\
 		lockdep_hardirq_enter();			\
+		rcu_nmi_enter();				\
 		instrumentation_begin();			\
 		ftrace_nmi_enter();				\
 		instrumentation_end();				\
 	} while (0)
 
+#define __nmi_exit()						\
+	do {							\
+		BUG_ON(!in_nmi());				\
+		__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		printk_nmi_exit();				\
+		arch_nmi_exit();				\
+		lockdep_on();					\
+	} while (0)
+
 #define nmi_exit()						\
 	do {							\
 		instrumentation_begin();			\
 		ftrace_nmi_exit();				\
 		instrumentation_end();				\
-		lockdep_hardirq_exit();				\
 		rcu_nmi_exit();					\
-		BUG_ON(!in_nmi());				\
-		__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);	\
-		lockdep_on();					\
-		printk_nmi_exit();				\
-		arch_nmi_exit();				\
+		lockdep_hardirq_exit();				\
+		__nmi_exit();					\
 	} while (0)
 
 #endif /* LINUX_HARDIRQ_H */



      parent reply index

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-29 21:27 [PATCH 00/14] x86/entry: disallow #DB more and x86/entry lockdep/nmi Peter Zijlstra
2020-05-29 21:27 ` [PATCH 01/14] x86/hw_breakpoint: Add within_area() to check data breakpoints Peter Zijlstra
2020-05-29 21:27 ` [PATCH 02/14] x86/hw_breakpoint: Prevent data breakpoints on direct GDT Peter Zijlstra
2020-05-30 12:45   ` Andrew Cooper
2020-05-30 15:15     ` Lai Jiangshan
2020-05-29 21:27 ` [PATCH 03/14] x86/hw_breakpoint: Prevent data breakpoints on per_cpu cpu_tss_rw Peter Zijlstra
2020-05-29 21:27 ` [PATCH 04/14] x86/hw_breakpoint: Prevent data breakpoints on user_pcid_flush_mask Peter Zijlstra
2020-05-29 21:27 ` [PATCH 05/14] x86/entry: Introduce local_db_{save,restore}() Peter Zijlstra
2020-05-30  9:57   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 06/14] x86/entry, nmi: Disable #DB Peter Zijlstra
2020-05-30  9:57   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 07/14] x86/entry, mce: Disallow #DB during #MC Peter Zijlstra
2020-05-30  9:57   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 08/14] x86/entry: Optimize local_db_save() for virt Peter Zijlstra
2020-05-30  9:57   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-06-03  1:17   ` [PATCH 08/14] " Sean Christopherson
2020-05-29 21:27 ` [PATCH 09/14] x86/entry: Remove debug IDT frobbing Peter Zijlstra
2020-05-30  9:57   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 10/14] x86/entry: Remove DBn stacks Peter Zijlstra
2020-05-30  9:57   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 11/14] x86/entry: Clarify irq_{enter,exit}_rcu() Peter Zijlstra
2020-05-30  9:57   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-06-02 14:42   ` [PATCH 11/14] " Qian Cai
2020-06-02 15:05     ` Peter Zijlstra
2020-06-02 18:47       ` Qian Cai
2020-06-03 17:50       ` [tip: x86/entry] x86/entry: Use __irq_exit_rcu() in irq_exit() tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 12/14] x86/entry: Rename trace_hardirqs_off_prepare() Peter Zijlstra
2020-05-30  9:57   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking Peter Zijlstra
2020-05-29 22:14   ` Steven Rostedt
2020-05-29 22:25     ` Peter Zijlstra
2020-05-29 22:28       ` Steven Rostedt
2020-05-29 22:33       ` Peter Zijlstra
2020-06-02 20:00       ` Peter Zijlstra
2020-05-29 21:27 ` Peter Zijlstra [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200529213321.528803619@infradead.org \
    --to=peterz@infradead.org \
    --cc=a.darwish@linutronix.de \
    --cc=andrew.cooper3@citrix.com \
    --cc=bigeasy@linutronix.de \
    --cc=daniel.thompson@linaro.org \
    --cc=laijs@linux.alibaba.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=rostedt@goodmis.org \
    --cc=sean.j.christopherson@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git