From: Peter Zijlstra <peterz@infradead.org>
To: tglx@linutronix.de, luto@amacapital.net, peterz@infradead.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
Lai Jiangshan <laijs@linux.alibaba.com>,
sean.j.christopherson@intel.com, andrew.cooper3@citrix.com,
daniel.thompson@linaro.org, a.darwish@linutronix.de,
rostedt@goodmis.org, bigeasy@linutronix.de
Subject: [PATCH 14/14] x86/entry: Fix NMI vs IRQ state tracking
Date: Fri, 29 May 2020 23:27:42 +0200 [thread overview]
Message-ID: <20200529213321.528803619@infradead.org> (raw)
In-Reply-To: 20200529212728.795169701@infradead.org
While the nmi_enter() users did
trace_hardirqs_{off_prepare,on_finish}() there was no matching
lockdep_hardirqs_*() calls to complete the picture.
Introduce idtentry_{enter,exit}_nmi() to enable proper IRQ state
tracking across the NMIs.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/entry/common.c | 42 ++++++++++++++++++++++++++++++++++++----
arch/x86/include/asm/idtentry.h | 3 ++
arch/x86/kernel/nmi.c | 9 +++-----
arch/x86/kernel/traps.c | 20 ++++---------------
include/linux/hardirq.h | 28 ++++++++++++++++++--------
5 files changed, 69 insertions(+), 33 deletions(-)
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -550,7 +550,7 @@ SYSCALL_DEFINE0(ni_syscall)
* The return value must be fed into the rcu_exit argument of
* idtentry_exit_cond_rcu().
*/
-bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
+noinstr bool idtentry_enter_cond_rcu(struct pt_regs *regs)
{
if (user_mode(regs)) {
enter_from_user_mode();
@@ -619,7 +619,7 @@ static void idtentry_exit_cond_resched(s
* Counterpart to idtentry_enter_cond_rcu(). The return value of the entry
* function must be fed into the @rcu_exit argument.
*/
-void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
+noinstr void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
{
lockdep_assert_irqs_disabled();
@@ -663,7 +663,7 @@ void noinstr idtentry_exit_cond_rcu(stru
* Invokes enter_from_user_mode() to establish the proper context for
* NOHZ_FULL. Otherwise scheduling on exit would not be possible.
*/
-void noinstr idtentry_enter_user(struct pt_regs *regs)
+noinstr void idtentry_enter_user(struct pt_regs *regs)
{
enter_from_user_mode();
}
@@ -680,13 +680,47 @@ void noinstr idtentry_enter_user(struct
*
* Counterpart to idtentry_enter_user().
*/
-void noinstr idtentry_exit_user(struct pt_regs *regs)
+noinstr void idtentry_exit_user(struct pt_regs *regs)
{
lockdep_assert_irqs_disabled();
prepare_exit_to_usermode(regs);
}
+noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
+{
+ bool irq_state = lockdep_hardirqs_enabled(current);
+
+ __nmi_enter();
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ lockdep_hardirq_enter();
+ rcu_nmi_enter();
+
+ instrumentation_begin();
+ trace_hardirqs_off_finish();
+ ftrace_nmi_enter();
+ instrumentation_end();
+
+ return irq_state;
+}
+
+noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
+{
+ instrumentation_begin();
+ ftrace_nmi_exit();
+ if (restore) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ }
+ instrumentation_end();
+
+ rcu_nmi_exit();
+ lockdep_hardirq_exit();
+ if (restore)
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ __nmi_exit();
+}
+
#ifdef CONFIG_XEN_PV
#ifndef CONFIG_PREEMPTION
/*
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -16,6 +16,9 @@ void idtentry_exit_user(struct pt_regs *
bool idtentry_enter_cond_rcu(struct pt_regs *regs);
void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit);
+bool idtentry_enter_nmi(struct pt_regs *regs);
+void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
+
/**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points
* No error code pushed by hardware
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struc
__this_cpu_write(last_nmi_rip, regs->ip);
instrumentation_begin();
- trace_hardirqs_off_finish();
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled);
@@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struc
unknown_nmi_error(reason, regs);
out:
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
instrumentation_end();
}
@@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi
DEFINE_IDTENTRY_NMI(exc_nmi)
{
+ bool irq_state;
+
if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id()))
return;
@@ -491,14 +490,14 @@ DEFINE_IDTENTRY_NMI(exc_nmi)
this_cpu_write(nmi_dr7, local_db_save());
- nmi_enter();
+ irq_state = idtentry_enter_nmi(regs);
inc_irq_stat(__nmi_count);
if (!ignore_nmis)
default_do_nmi(regs);
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
local_db_restore(this_cpu_read(nmi_dr7));
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -387,7 +387,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
}
#endif
- nmi_enter();
+ idtentry_enter_nmi(regs);
instrumentation_begin();
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -632,15 +632,12 @@ DEFINE_IDTENTRY_RAW(exc_int3)
instrumentation_end();
idtentry_exit_user(regs);
} else {
- nmi_enter();
+ bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
- trace_hardirqs_off_finish();
if (!do_int3(regs))
die("int3", regs, 0);
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
instrumentation_end();
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
}
}
@@ -831,10 +828,7 @@ static void noinstr handle_debug(struct
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
- nmi_enter();
- instrumentation_begin();
- trace_hardirqs_off_finish();
- instrumentation_end();
+ bool irq_state = idtentry_enter_nmi(regs);
/*
* The SDM says "The processor clears the BTF flag when it
@@ -857,11 +851,7 @@ static __always_inline void exc_debug_ke
if (dr6)
handle_debug(regs, dr6, false);
- instrumentation_begin();
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
- instrumentation_end();
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
/*
* nmi_enter() can nest up to 15 times; see NMI_BITS.
*/
-#define nmi_enter() \
+#define __nmi_enter() \
do { \
+ lockdep_off(); \
arch_nmi_enter(); \
printk_nmi_enter(); \
- lockdep_off(); \
BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
- rcu_nmi_enter(); \
+ } while (0)
+
+#define nmi_enter() \
+ do { \
+ __nmi_enter(); \
lockdep_hardirq_enter(); \
+ rcu_nmi_enter(); \
instrumentation_begin(); \
ftrace_nmi_enter(); \
instrumentation_end(); \
} while (0)
+#define __nmi_exit() \
+ do { \
+ BUG_ON(!in_nmi()); \
+ __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
+ printk_nmi_exit(); \
+ arch_nmi_exit(); \
+ lockdep_on(); \
+ } while (0)
+
#define nmi_exit() \
do { \
instrumentation_begin(); \
ftrace_nmi_exit(); \
instrumentation_end(); \
- lockdep_hardirq_exit(); \
rcu_nmi_exit(); \
- BUG_ON(!in_nmi()); \
- __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
- lockdep_on(); \
- printk_nmi_exit(); \
- arch_nmi_exit(); \
+ lockdep_hardirq_exit(); \
+ __nmi_exit(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
prev parent reply other threads:[~2020-05-29 21:37 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-29 21:27 [PATCH 00/14] x86/entry: disallow #DB more and x86/entry lockdep/nmi Peter Zijlstra
2020-05-29 21:27 ` [PATCH 01/14] x86/hw_breakpoint: Add within_area() to check data breakpoints Peter Zijlstra
2020-05-29 21:27 ` [PATCH 02/14] x86/hw_breakpoint: Prevent data breakpoints on direct GDT Peter Zijlstra
2020-05-30 12:45 ` Andrew Cooper
2020-05-30 15:15 ` Lai Jiangshan
2020-05-29 21:27 ` [PATCH 03/14] x86/hw_breakpoint: Prevent data breakpoints on per_cpu cpu_tss_rw Peter Zijlstra
2020-05-29 21:27 ` [PATCH 04/14] x86/hw_breakpoint: Prevent data breakpoints on user_pcid_flush_mask Peter Zijlstra
2020-05-29 21:27 ` [PATCH 05/14] x86/entry: Introduce local_db_{save,restore}() Peter Zijlstra
2020-05-30 9:57 ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 06/14] x86/entry, nmi: Disable #DB Peter Zijlstra
2020-05-30 9:57 ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 07/14] x86/entry, mce: Disallow #DB during #MC Peter Zijlstra
2020-05-30 9:57 ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 08/14] x86/entry: Optimize local_db_save() for virt Peter Zijlstra
2020-05-30 9:57 ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-06-03 1:17 ` [PATCH 08/14] " Sean Christopherson
2020-05-29 21:27 ` [PATCH 09/14] x86/entry: Remove debug IDT frobbing Peter Zijlstra
2020-05-30 9:57 ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 10/14] x86/entry: Remove DBn stacks Peter Zijlstra
2020-05-30 9:57 ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 11/14] x86/entry: Clarify irq_{enter,exit}_rcu() Peter Zijlstra
2020-05-30 9:57 ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-06-02 14:42 ` [PATCH 11/14] " Qian Cai
2020-06-02 14:42 ` Qian Cai
2020-06-02 15:05 ` Peter Zijlstra
2020-06-02 15:05 ` Peter Zijlstra
2020-06-02 18:47 ` Qian Cai
2020-06-02 18:47 ` Qian Cai
2020-06-03 17:50 ` [tip: x86/entry] x86/entry: Use __irq_exit_rcu() in irq_exit() tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 12/14] x86/entry: Rename trace_hardirqs_off_prepare() Peter Zijlstra
2020-05-30 9:57 ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2020-05-29 21:27 ` [PATCH 13/14] lockdep: Prepare for NMI IRQ state tracking Peter Zijlstra
2020-05-29 22:14 ` Steven Rostedt
2020-05-29 22:25 ` Peter Zijlstra
2020-05-29 22:28 ` Steven Rostedt
2020-05-29 22:33 ` Peter Zijlstra
2020-06-02 20:00 ` Peter Zijlstra
2020-05-29 21:27 ` Peter Zijlstra [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200529213321.528803619@infradead.org \
--to=peterz@infradead.org \
--cc=a.darwish@linutronix.de \
--cc=andrew.cooper3@citrix.com \
--cc=bigeasy@linutronix.de \
--cc=daniel.thompson@linaro.org \
--cc=laijs@linux.alibaba.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@amacapital.net \
--cc=rostedt@goodmis.org \
--cc=sean.j.christopherson@intel.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.