If we BUG or WARN in a funny RCU context, we cleverly optimize the BUG/WARN using the ud2 hack, which takes us through the idtentry_enter...() paths, which might helpfully WARN that the RCU context is invalid, which results in infinite recursion. Split the BUG/WARN handling into an nmi_enter()/nmi_exit() path in exc_invalid_op() to increase the chance that we survive the experience. Signed-off-by: Andy Lutomirski <luto@kernel.org> --- This is not as well tested as I would like, but it does cause the splat I'm chasing to display a nice warning instead of causing an undebuggable stack overflow. (It would have been debuggable on x86_64, but it's a 32-bit splat, and x86_32 doesn't have ORC.) arch/x86/kernel/traps.c | 61 +++++++++++++++++++++++------------------ arch/x86/mm/extable.c | 15 ++++++++-- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cb8c3d26cdf5..6340b12a6616 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -98,24 +98,6 @@ int is_valid_bugaddr(unsigned long addr) return ud == INSN_UD0 || ud == INSN_UD2; } -int fixup_bug(struct pt_regs *regs, int trapnr) -{ - if (trapnr != X86_TRAP_UD) - return 0; - - switch (report_bug(regs->ip, regs)) { - case BUG_TRAP_TYPE_NONE: - case BUG_TRAP_TYPE_BUG: - break; - - case BUG_TRAP_TYPE_WARN: - regs->ip += LEN_UD2; - return 1; - } - - return 0; -} - static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, struct pt_regs *regs, long error_code) @@ -191,13 +173,6 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, { RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - /* - * WARN*()s end up here; fix them up before we call the - * notifier chain. - */ - if (!user_mode(regs) && fixup_bug(regs, trapnr)) - return; - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); @@ -242,9 +217,43 @@ static inline void handle_invalid_op(struct pt_regs *regs) ILL_ILLOPN, error_get_trap_addr(regs)); } -DEFINE_IDTENTRY(exc_invalid_op) +DEFINE_IDTENTRY_RAW(exc_invalid_op) { + bool rcu_exit; + + /* + * Handle BUG/WARN like NMIs instead of like normal idtentries: + * if we bugged/warned in a bad RCU context, for example, the last + * thing we want is to BUG/WARN again in the idtentry code, ad + * infinitum. + */ + if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { + enum bug_trap_type type; + + nmi_enter(); + instrumentation_begin(); + type = report_bug(regs->ip, regs); + instrumentation_end(); + nmi_exit(); + + if (type == BUG_TRAP_TYPE_WARN) { + /* Skip the ud2. */ + regs->ip += LEN_UD2; + return; + } + + /* + * Else, if this was a BUG and report_bug returns or if this + * was just a normal #UD, we want to continue onward and + * crash. + */ + } + + rcu_exit = idtentry_enter_cond_rcu(regs); + instrumentation_begin(); handle_invalid_op(regs); + instrumentation_end(); + idtentry_exit_cond_rcu(regs, rcu_exit); } DEFINE_IDTENTRY(exc_coproc_segment_overrun) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b991aa4bdfae..1d6cb07f4f86 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -204,8 +204,19 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (fixup_exception(regs, trapnr, regs->orig_ax, 0)) return; - if (fixup_bug(regs, trapnr)) - return; + if (trapnr == X86_TRAP_UD) { + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + /* Skip the ud2. */ + regs->ip += LEN_UD2; + return; + } + + /* + * If this was a BUG and report_bug returns or if this + * was just a normal #UD, we want to continue onward and + * crash. + */ + } fail: early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", -- 2.25.4
On Thu, Jun 11, 2020 at 8:26 PM Andy Lutomirski <luto@kernel.org> wrote:
>
> If we BUG or WARN in a funny RCU context, we cleverly optimize the
> BUG/WARN using the ud2 hack, which takes us through the
> idtentry_enter...() paths, which might helpfully WARN that the RCU
> context is invalid, which results in infinite recursion.
>
> Split the BUG/WARN handling into an nmi_enter()/nmi_exit() path in
> exc_invalid_op() to increase the chance that we survive the
> experience.
>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>
> This is not as well tested as I would like, but it does cause the splat
> I'm chasing to display a nice warning instead of causing an undebuggable
> stack overflow.
>
> (It would have been debuggable on x86_64, but it's a 32-bit splat, and
> x86_32 doesn't have ORC.)
>
> arch/x86/kernel/traps.c | 61 +++++++++++++++++++++++------------------
> arch/x86/mm/extable.c | 15 ++++++++--
> 2 files changed, 48 insertions(+), 28 deletions(-)
>
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index cb8c3d26cdf5..6340b12a6616 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -98,24 +98,6 @@ int is_valid_bugaddr(unsigned long addr)
> return ud == INSN_UD0 || ud == INSN_UD2;
> }
>
> -int fixup_bug(struct pt_regs *regs, int trapnr)
> -{
> - if (trapnr != X86_TRAP_UD)
> - return 0;
> -
> - switch (report_bug(regs->ip, regs)) {
> - case BUG_TRAP_TYPE_NONE:
> - case BUG_TRAP_TYPE_BUG:
> - break;
> -
> - case BUG_TRAP_TYPE_WARN:
> - regs->ip += LEN_UD2;
> - return 1;
> - }
> -
> - return 0;
> -}
> -
> static nokprobe_inline int
> do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
> struct pt_regs *regs, long error_code)
> @@ -191,13 +173,6 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
> {
> RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
>
> - /*
> - * WARN*()s end up here; fix them up before we call the
> - * notifier chain.
> - */
> - if (!user_mode(regs) && fixup_bug(regs, trapnr))
> - return;
> -
> if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
> NOTIFY_STOP) {
> cond_local_irq_enable(regs);
> @@ -242,9 +217,43 @@ static inline void handle_invalid_op(struct pt_regs *regs)
> ILL_ILLOPN, error_get_trap_addr(regs));
> }
>
> -DEFINE_IDTENTRY(exc_invalid_op)
> +DEFINE_IDTENTRY_RAW(exc_invalid_op)
> {
> + bool rcu_exit;
> +
> + /*
> + * Handle BUG/WARN like NMIs instead of like normal idtentries:
> + * if we bugged/warned in a bad RCU context, for example, the last
> + * thing we want is to BUG/WARN again in the idtentry code, ad
> + * infinitum.
> + */
> + if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) {
> + enum bug_trap_type type;
> +
> + nmi_enter();
> + instrumentation_begin();
> + type = report_bug(regs->ip, regs);
> + instrumentation_end();
> + nmi_exit();
Hmm, maybe this should be:
nmi_enter();
instrumentation_begin();
trace_hardirqs_off_finish();
type = report_bug(regs->ip, regs);
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end();
nmi_exit();
tglx or peterz, feel free to fix this up and apply it however you like.
The following commit has been merged into the x86/entry branch of tip: Commit-ID: 15a416e8aaa758b5534f64a3972dae05275bc225 Gitweb: https://git.kernel.org/tip/15a416e8aaa758b5534f64a3972dae05275bc225 Author: Andy Lutomirski <luto@kernel.org> AuthorDate: Thu, 11 Jun 2020 20:26:38 -07:00 Committer: Thomas Gleixner <tglx@linutronix.de> CommitterDate: Fri, 12 Jun 2020 12:12:57 +02:00 x86/entry: Treat BUG/WARN as NMI-like entries BUG/WARN are cleverly optimized using UD2 to handle the BUG/WARN out of line in an exception fixup. But if BUG or WARN is issued in a funny RCU context, then the idtentry_enter...() path might helpfully WARN that the RCU context is invalid, which results in infinite recursion. Split the BUG/WARN handling into an nmi_enter()/nmi_exit() path in exc_invalid_op() to increase the chance to survive the experience. [ tglx: Make the declaration match the implementation ] Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/f8fe40e0088749734b4435b554f73eee53dcf7a8.1591932307.git.luto@kernel.org --- arch/x86/include/asm/idtentry.h | 2 +- arch/x86/kernel/traps.c | 64 +++++++++++++++++++------------- arch/x86/mm/extable.c | 15 ++++++-- 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index d203c54..2fc6b0c 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -543,7 +543,6 @@ SYM_CODE_END(spurious_entries_start) DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); -DECLARE_IDTENTRY(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); @@ -561,6 +560,7 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); /* Raw exception entries which need extra work */ +DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7febae3..af75109 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -97,24 +97,6 @@ int is_valid_bugaddr(unsigned long addr) return ud == INSN_UD0 || ud == INSN_UD2; } -int fixup_bug(struct pt_regs *regs, int trapnr) -{ - if (trapnr != X86_TRAP_UD) - return 0; - - switch (report_bug(regs->ip, regs)) { - case BUG_TRAP_TYPE_NONE: - case BUG_TRAP_TYPE_BUG: - break; - - case BUG_TRAP_TYPE_WARN: - regs->ip += LEN_UD2; - return 1; - } - - return 0; -} - static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, struct pt_regs *regs, long error_code) @@ -190,13 +172,6 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, { RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - /* - * WARN*()s end up here; fix them up before we call the - * notifier chain. - */ - if (!user_mode(regs) && fixup_bug(regs, trapnr)) - return; - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); @@ -241,9 +216,46 @@ static inline void handle_invalid_op(struct pt_regs *regs) ILL_ILLOPN, error_get_trap_addr(regs)); } -DEFINE_IDTENTRY(exc_invalid_op) +DEFINE_IDTENTRY_RAW(exc_invalid_op) { + bool rcu_exit; + + /* + * Handle BUG/WARN like NMIs instead of like normal idtentries: + * if we bugged/warned in a bad RCU context, for example, the last + * thing we want is to BUG/WARN again in the idtentry code, ad + * infinitum. + */ + if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { + enum bug_trap_type type; + + nmi_enter(); + instrumentation_begin(); + trace_hardirqs_off_finish(); + type = report_bug(regs->ip, regs); + if (regs->flags & X86_EFLAGS_IF) + trace_hardirqs_on_prepare(); + instrumentation_end(); + nmi_exit(); + + if (type == BUG_TRAP_TYPE_WARN) { + /* Skip the ud2. */ + regs->ip += LEN_UD2; + return; + } + + /* + * Else, if this was a BUG and report_bug returns or if this + * was just a normal #UD, we want to continue onward and + * crash. + */ + } + + rcu_exit = idtentry_enter_cond_rcu(regs); + instrumentation_begin(); handle_invalid_op(regs); + instrumentation_end(); + idtentry_exit_cond_rcu(regs, rcu_exit); } DEFINE_IDTENTRY(exc_coproc_segment_overrun) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b991aa4..1d6cb07 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -204,8 +204,19 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (fixup_exception(regs, trapnr, regs->orig_ax, 0)) return; - if (fixup_bug(regs, trapnr)) - return; + if (trapnr == X86_TRAP_UD) { + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + /* Skip the ud2. */ + regs->ip += LEN_UD2; + return; + } + + /* + * If this was a BUG and report_bug returns or if this + * was just a normal #UD, we want to continue onward and + * crash. + */ + } fail: early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
On Fri, Jun 12, 2020 at 07:50:08PM -0000, tip-bot2 for Andy Lutomirski wrote: > +DEFINE_IDTENTRY_RAW(exc_invalid_op) > { > + bool rcu_exit; > + > + /* > + * Handle BUG/WARN like NMIs instead of like normal idtentries: > + * if we bugged/warned in a bad RCU context, for example, the last > + * thing we want is to BUG/WARN again in the idtentry code, ad > + * infinitum. > + */ > + if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { vmlinux.o: warning: objtool: exc_invalid_op()+0x47: call to probe_kernel_read() leaves .noinstr.text section > + enum bug_trap_type type; > + > + nmi_enter(); > + instrumentation_begin(); > + trace_hardirqs_off_finish(); > + type = report_bug(regs->ip, regs); > + if (regs->flags & X86_EFLAGS_IF) > + trace_hardirqs_on_prepare(); > + instrumentation_end(); > + nmi_exit(); > + > + if (type == BUG_TRAP_TYPE_WARN) { > + /* Skip the ud2. */ > + regs->ip += LEN_UD2; > + return; > + } > + > + /* > + * Else, if this was a BUG and report_bug returns or if this > + * was just a normal #UD, we want to continue onward and > + * crash. > + */ > + } > + > + rcu_exit = idtentry_enter_cond_rcu(regs); > + instrumentation_begin(); > handle_invalid_op(regs); > + instrumentation_end(); > + idtentry_exit_cond_rcu(regs, rcu_exit); > } For now something like so will do, but we need a DEFINE_IDTENTRY_foo() for the whole: if (user_mode()) { rcu = idtentry_enter_cond_rcu() foo_user() idtentry_exit_cond_rcu(rcu); } else { nmi_enter(); foo_kernel() nmi_exit() } thing, we're repeating that far too often. --- diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index af75109485c26..a47e74923c4c8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -218,21 +218,22 @@ static inline void handle_invalid_op(struct pt_regs *regs) DEFINE_IDTENTRY_RAW(exc_invalid_op) { - bool rcu_exit; - /* * Handle BUG/WARN like NMIs instead of like normal idtentries: * if we bugged/warned in a bad RCU context, for example, the last * thing we want is to BUG/WARN again in the idtentry code, ad * infinitum. */ - if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { - enum bug_trap_type type; + if (!user_mode(regs)) { + enum bug_trap_type type = BUG_TRAP_TYPE_NONE; nmi_enter(); instrumentation_begin(); trace_hardirqs_off_finish(); - type = report_bug(regs->ip, regs); + + if (is_valid_bugaddr(regs->ip)) + type = report_bug(regs->ip, regs); + if (regs->flags & X86_EFLAGS_IF) trace_hardirqs_on_prepare(); instrumentation_end(); @@ -249,13 +250,16 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op) * was just a normal #UD, we want to continue onward and * crash. */ - } + handle_invalid_op(regs); + } else { + bool rcu_exit; - rcu_exit = idtentry_enter_cond_rcu(regs); - instrumentation_begin(); - handle_invalid_op(regs); - instrumentation_end(); - idtentry_exit_cond_rcu(regs, rcu_exit); + rcu_exit = idtentry_enter_cond_rcu(regs); + instrumentation_begin(); + handle_invalid_op(regs); + instrumentation_end(); + idtentry_exit_cond_rcu(regs, rcu_exit); + } } DEFINE_IDTENTRY(exc_coproc_segment_overrun)
On Mon, Jun 15, 2020 at 7:50 AM Peter Zijlstra <peterz@infradead.org> wrote: > > On Fri, Jun 12, 2020 at 07:50:08PM -0000, tip-bot2 for Andy Lutomirski wrote: > > +DEFINE_IDTENTRY_RAW(exc_invalid_op) > > { > > + bool rcu_exit; > > + > > + /* > > + * Handle BUG/WARN like NMIs instead of like normal idtentries: > > + * if we bugged/warned in a bad RCU context, for example, the last > > + * thing we want is to BUG/WARN again in the idtentry code, ad > > + * infinitum. > > + */ > > + if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { > > vmlinux.o: warning: objtool: exc_invalid_op()+0x47: call to probe_kernel_read() leaves .noinstr.text section > > > + enum bug_trap_type type; > > + > > + nmi_enter(); > > + instrumentation_begin(); > > + trace_hardirqs_off_finish(); > > + type = report_bug(regs->ip, regs); > > + if (regs->flags & X86_EFLAGS_IF) > > + trace_hardirqs_on_prepare(); > > + instrumentation_end(); > > + nmi_exit(); > > + > > + if (type == BUG_TRAP_TYPE_WARN) { > > + /* Skip the ud2. */ > > + regs->ip += LEN_UD2; > > + return; > > + } > > + > > + /* > > + * Else, if this was a BUG and report_bug returns or if this > > + * was just a normal #UD, we want to continue onward and > > + * crash. > > + */ > > + } > > + > > + rcu_exit = idtentry_enter_cond_rcu(regs); > > + instrumentation_begin(); > > handle_invalid_op(regs); > > + instrumentation_end(); > > + idtentry_exit_cond_rcu(regs, rcu_exit); > > } > > > For now something like so will do, but we need a DEFINE_IDTENTRY_foo() > for the whole: > > if (user_mode()) { > rcu = idtentry_enter_cond_rcu() > foo_user() > idtentry_exit_cond_rcu(rcu); > } else { > nmi_enter(); > foo_kernel() > nmi_exit() > } > > thing, we're repeating that far too often. > > Hmm. IMO you're making two changes here, and this is fiddly enough that it might be worth separating them for bisection purposes. > --- > > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > index af75109485c26..a47e74923c4c8 100644 > --- a/arch/x86/kernel/traps.c > +++ b/arch/x86/kernel/traps.c > @@ -218,21 +218,22 @@ static inline void handle_invalid_op(struct pt_regs *regs) > > DEFINE_IDTENTRY_RAW(exc_invalid_op) > { > - bool rcu_exit; > - > /* > * Handle BUG/WARN like NMIs instead of like normal idtentries: > * if we bugged/warned in a bad RCU context, for example, the last > * thing we want is to BUG/WARN again in the idtentry code, ad > * infinitum. > */ > - if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { > - enum bug_trap_type type; > + if (!user_mode(regs)) { > + enum bug_trap_type type = BUG_TRAP_TYPE_NONE; > > nmi_enter(); > instrumentation_begin(); > trace_hardirqs_off_finish(); > - type = report_bug(regs->ip, regs); > + > + if (is_valid_bugaddr(regs->ip)) > + type = report_bug(regs->ip, regs); > + Sigh, this is indeed necessary. > if (regs->flags & X86_EFLAGS_IF) > trace_hardirqs_on_prepare(); > instrumentation_end(); > @@ -249,13 +250,16 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op) > * was just a normal #UD, we want to continue onward and > * crash. > */ > - } > + handle_invalid_op(regs); But this is really a separate change. This makes handle_invalid_op() be NMI-like even for non-BUG/WARN kernel #UD entries. One might argue that this doesn't matter, and that's probably right, but I think it should be its own change with its own justification. With just my patch, I intentionally call handle_invalid_op() via the normal idtentry_enter_cond_rcu() path. --Andy
On Mon, Jun 15, 2020 at 10:06:20AM -0700, Andy Lutomirski wrote: > On Mon, Jun 15, 2020 at 7:50 AM Peter Zijlstra <peterz@infradead.org> wrote: > Hmm. IMO you're making two changes here, and this is fiddly enough > that it might be worth separating them for bisection purposes. Sure, can do. > > --- > > > > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > > index af75109485c26..a47e74923c4c8 100644 > > --- a/arch/x86/kernel/traps.c > > +++ b/arch/x86/kernel/traps.c > > @@ -218,21 +218,22 @@ static inline void handle_invalid_op(struct pt_regs *regs) > > > > DEFINE_IDTENTRY_RAW(exc_invalid_op) > > { > > - bool rcu_exit; > > - > > /* > > * Handle BUG/WARN like NMIs instead of like normal idtentries: > > * if we bugged/warned in a bad RCU context, for example, the last > > * thing we want is to BUG/WARN again in the idtentry code, ad > > * infinitum. > > */ > > - if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { > > - enum bug_trap_type type; > > + if (!user_mode(regs)) { > > + enum bug_trap_type type = BUG_TRAP_TYPE_NONE; > > > > nmi_enter(); > > instrumentation_begin(); > > trace_hardirqs_off_finish(); > > - type = report_bug(regs->ip, regs); > > + > > + if (is_valid_bugaddr(regs->ip)) > > + type = report_bug(regs->ip, regs); > > + > > Sigh, this is indeed necessary. :-) > > if (regs->flags & X86_EFLAGS_IF) > > trace_hardirqs_on_prepare(); > > instrumentation_end(); > > @@ -249,13 +250,16 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op) > > * was just a normal #UD, we want to continue onward and > > * crash. > > */ > > - } > > + handle_invalid_op(regs); > > But this is really a separate change. This makes handle_invalid_op() > be NMI-like even for non-BUG/WARN kernel #UD entries. One might argue > that this doesn't matter, and that's probably right, but I think it > should be its own change with its own justification. With just my > patch, I intentionally call handle_invalid_op() via the normal > idtentry_enter_cond_rcu() path. All !user exceptions really should be NMI-like. If you want to go overboard, I suppose you can look at IF and have them behave interrupt like when set, but why make things complicated. Anyway, let me to smaller and proper patches for this.
> On Jun 15, 2020, at 12:45 PM, Peter Zijlstra <peterz@infradead.org> wrote: > > On Mon, Jun 15, 2020 at 10:06:20AM -0700, Andy Lutomirski wrote: >>> On Mon, Jun 15, 2020 at 7:50 AM Peter Zijlstra <peterz@infradead.org> wrote: >> >> Hmm. IMO you're making two changes here, and this is fiddly enough >> that it might be worth separating them for bisection purposes. > > Sure, can do. > >>> --- >>> >>> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c >>> index af75109485c26..a47e74923c4c8 100644 >>> --- a/arch/x86/kernel/traps.c >>> +++ b/arch/x86/kernel/traps.c >>> @@ -218,21 +218,22 @@ static inline void handle_invalid_op(struct pt_regs *regs) >>> >>> DEFINE_IDTENTRY_RAW(exc_invalid_op) >>> { >>> - bool rcu_exit; >>> - >>> /* >>> * Handle BUG/WARN like NMIs instead of like normal idtentries: >>> * if we bugged/warned in a bad RCU context, for example, the last >>> * thing we want is to BUG/WARN again in the idtentry code, ad >>> * infinitum. >>> */ >>> - if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { >>> - enum bug_trap_type type; >>> + if (!user_mode(regs)) { >>> + enum bug_trap_type type = BUG_TRAP_TYPE_NONE; >>> >>> nmi_enter(); >>> instrumentation_begin(); >>> trace_hardirqs_off_finish(); >>> - type = report_bug(regs->ip, regs); >>> + >>> + if (is_valid_bugaddr(regs->ip)) >>> + type = report_bug(regs->ip, regs); >>> + >> >> Sigh, this is indeed necessary. > > :-) > >>> if (regs->flags & X86_EFLAGS_IF) >>> trace_hardirqs_on_prepare(); >>> instrumentation_end(); >>> @@ -249,13 +250,16 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op) >>> * was just a normal #UD, we want to continue onward and >>> * crash. >>> */ >>> - } >>> + handle_invalid_op(regs); >> >> But this is really a separate change. This makes handle_invalid_op() >> be NMI-like even for non-BUG/WARN kernel #UD entries. One might argue >> that this doesn't matter, and that's probably right, but I think it >> should be its own change with its own justification. With just my >> patch, I intentionally call handle_invalid_op() via the normal >> idtentry_enter_cond_rcu() path. > > All !user exceptions really should be NMI-like. If you want to go > overboard, I suppose you can look at IF and have them behave interrupt > like when set, but why make things complicated. This entire rabbit hole opened because of #PF. So we at least need the set of exceptions that are permitted to schedule if they came from kernel mode to remain schedulable. Prior to the giant changes, all the non-IST *exceptions*, but not the interrupts, were schedulable from kernel mode, assuming the original context could schedule. Right now, interrupts can schedule, too, which is nice if we ever want to fully clean up the Xen abomination. I suppose we could make it so #PF opts in to special treatment again, but we should decide that the result is simpler or otherwise better before we do this. One possible justification would be that the schedulable entry variant is more complicated, and most kernel exceptions except the ones with fixups are bad news, and we want the oopses to succeed. But page faults are probably the most common source of oopses, so this is a bit weak, and we really want page faults to work even from nasty contexts. > > Anyway, let me to smaller and proper patches for this.
On Mon, Jun 15, 2020 at 02:08:16PM -0700, Andy Lutomirski wrote: > > All !user exceptions really should be NMI-like. If you want to go > > overboard, I suppose you can look at IF and have them behave interrupt > > like when set, but why make things complicated. > > This entire rabbit hole opened because of #PF. So we at least need the > set of exceptions that are permitted to schedule if they came from > kernel mode to remain schedulable. What exception, other than #PF, actually needs to schedule from kernel? > Prior to the giant changes, all the non-IST *exceptions*, but not the > interrupts, were schedulable from kernel mode, assuming the original > context could schedule. Right now, interrupts can schedule, too, which > is nice if we ever want to fully clean up the Xen abomination. I > suppose we could make it so #PF opts in to special treatment again, > but we should decide that the result is simpler or otherwise better > before we do this. > > One possible justification would be that the schedulable entry variant > is more complicated, and most kernel exceptions except the ones with > fixups are bad news, and we want the oopses to succeed. But page > faults are probably the most common source of oopses, so this is a bit > weak, and we really want page faults to work even from nasty contexts. I think I'd prefer the argument of consistent failure. Do we ever want #UD to schedule? If not, then why allow it to sometimes schedule and sometimes fail, better to always fail. #DB is still a giant trainwreck in this regard as well. Something like this... --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -216,10 +216,25 @@ static inline void handle_invalid_op(str ILL_ILLOPN, error_get_trap_addr(regs)); } -DEFINE_IDTENTRY_RAW(exc_invalid_op) +static void handle_invalid_op_kernel(struct pt_regs *regs) +{ + if (is_valid_bugaddr(regs->ip) && + report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + /* Skip the ud2. */ + regs->ip += LEN_UD2; + return; + } + + handle_invalid_op(regs); +} + +static void handle_invalid_op_user(struct pt_regs *regs) { - bool rcu_exit; + handle_invalid_op(regs); +} +DEFINE_IDTENTRY_RAW(exc_invalid_op) +{ /* * Handle BUG/WARN like NMIs instead of like normal idtentries: * if we bugged/warned in a bad RCU context, for example, the last @@ -227,38 +242,25 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op) * infinitum. */ if (!user_mode(regs)) { - enum bug_trap_type type = BUG_TRAP_TYPE_NONE; - nmi_enter(); instrumentation_begin(); trace_hardirqs_off_finish(); - if (is_valid_bugaddr(regs->ip)) - type = report_bug(regs->ip, regs); + handle_invalid_op_kernel(regs); if (regs->flags & X86_EFLAGS_IF) trace_hardirqs_on_prepare(); instrumentation_end(); nmi_exit(); + } else { + bool rcu_exit; - if (type == BUG_TRAP_TYPE_WARN) { - /* Skip the ud2. */ - regs->ip += LEN_UD2; - return; - } - - /* - * Else, if this was a BUG and report_bug returns or if this - * was just a normal #UD, we want to continue onward and - * crash. - */ + rcu_exit = idtentry_enter_cond_rcu(regs); + instrumentation_begin(); + handle_invalid_op_user(regs); + instrumentation_end(); + idtentry_exit_cond_rcu(regs, rcu_exit); } - - rcu_exit = idtentry_enter_cond_rcu(regs); - instrumentation_begin(); - handle_invalid_op(regs); - instrumentation_end(); - idtentry_exit_cond_rcu(regs, rcu_exit); } DEFINE_IDTENTRY(exc_coproc_segment_overrun)
On Mon, Jun 15, 2020 at 3:23 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Jun 15, 2020 at 02:08:16PM -0700, Andy Lutomirski wrote:
>
> > > All !user exceptions really should be NMI-like. If you want to go
> > > overboard, I suppose you can look at IF and have them behave interrupt
> > > like when set, but why make things complicated.
> >
> > This entire rabbit hole opened because of #PF. So we at least need the
> > set of exceptions that are permitted to schedule if they came from
> > kernel mode to remain schedulable.
>
> What exception, other than #PF, actually needs to schedule from kernel?
>
> > Prior to the giant changes, all the non-IST *exceptions*, but not the
> > interrupts, were schedulable from kernel mode, assuming the original
> > context could schedule. Right now, interrupts can schedule, too, which
> > is nice if we ever want to fully clean up the Xen abomination. I
> > suppose we could make it so #PF opts in to special treatment again,
> > but we should decide that the result is simpler or otherwise better
> > before we do this.
> >
> > One possible justification would be that the schedulable entry variant
> > is more complicated, and most kernel exceptions except the ones with
> > fixups are bad news, and we want the oopses to succeed. But page
> > faults are probably the most common source of oopses, so this is a bit
> > weak, and we really want page faults to work even from nasty contexts.
>
> I think I'd prefer the argument of consistent failure.
>
> Do we ever want #UD to schedule? If not, then why allow it to sometimes
> schedule and sometimes fail, better to always fail.
>
> #DB is still a giant trainwreck in this regard as well.
>
> Something like this...
>
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -216,10 +216,25 @@ static inline void handle_invalid_op(str
> ILL_ILLOPN, error_get_trap_addr(regs));
> }
>
> -DEFINE_IDTENTRY_RAW(exc_invalid_op)
> +static void handle_invalid_op_kernel(struct pt_regs *regs)
> +{
> + if (is_valid_bugaddr(regs->ip) &&
> + report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
> + /* Skip the ud2. */
> + regs->ip += LEN_UD2;
> + return;
> + }
> +
> + handle_invalid_op(regs);
> +}
> +
> +static void handle_invalid_op_user(struct pt_regs *regs)
> {
> - bool rcu_exit;
> + handle_invalid_op(regs);
> +}
>
> +DEFINE_IDTENTRY_RAW(exc_invalid_op)
> +{
Meh, I guess I'm okay with this.
In some sense, #UD and #PF are fundamentally different. #PF wants to
be able to schedule in the kernel. #UD wants to be as minimal as
possible in the kernel but probably still wants to do the nmi_enter()
dance in case it's an RCU warning and the warning handler code wants
to use RCU.
One solution would be to get rid of ud2 for warnings and replace it
with CALL warning_thunk :) But I guess I'm okay with your patch.
--Andy
On Mon, Jun 15, 2020 at 03:46:00PM -0700, Andy Lutomirski wrote: > In some sense, #UD and #PF are fundamentally different. #PF wants to > be able to schedule in the kernel. #UD wants to be as minimal as > possible in the kernel but probably still wants to do the nmi_enter() > dance in case it's an RCU warning and the warning handler code wants > to use RCU. > > One solution would be to get rid of ud2 for warnings and replace it > with CALL warning_thunk :) But I guess I'm okay with your patch. Well, the raisin we use UD2 is because it's only 2 bytes, which makes for nice and compact code. Ideally we'd have a single byte #UD instruction, but alas. However, I realized that there's another analogy with #PF that does transfer to #UD. For #PF we state that in-kernel #PF only happens when RCU is already watching -- by virtue of us being careful in noinstr. But similarly we can state we only have UD2 when we want to call WARN/BUG and can forgo exception entry. That would then result in something like this... --- diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index af75109485c2..8fe57b07a03b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -216,40 +216,35 @@ static inline void handle_invalid_op(struct pt_regs *regs) ILL_ILLOPN, error_get_trap_addr(regs)); } -DEFINE_IDTENTRY_RAW(exc_invalid_op) +static noinstr bool handle_bug(struct pt_regs *regs) { - bool rcu_exit; + bool handled = false; /* - * Handle BUG/WARN like NMIs instead of like normal idtentries: - * if we bugged/warned in a bad RCU context, for example, the last - * thing we want is to BUG/WARN again in the idtentry code, ad - * infinitum. + * All lies, just get the WARN/BUG out. */ - if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { - enum bug_trap_type type; + instrumentation_begin(); + if (is_valid_bugaddr(regs->ip) && + report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + regs->ip += LEN_UD2; + handled = true; + } + instrumentation_end(); - nmi_enter(); - instrumentation_begin(); - trace_hardirqs_off_finish(); - type = report_bug(regs->ip, regs); - if (regs->flags & X86_EFLAGS_IF) - trace_hardirqs_on_prepare(); - instrumentation_end(); - nmi_exit(); + return handled; +} - if (type == BUG_TRAP_TYPE_WARN) { - /* Skip the ud2. */ - regs->ip += LEN_UD2; - return; - } +DEFINE_IDTENTRY_RAW(exc_invalid_op) +{ + bool rcu_exit; - /* - * Else, if this was a BUG and report_bug returns or if this - * was just a normal #UD, we want to continue onward and - * crash. - */ - } + /* + * We use UD2 as a short encoding for 'CALL __WARN', as such + * handle it before exception entry to avoid recursive WARN + * in case exception entry is the one triggering WARNs. + */ + if (!user_mode(regs) && handle_bug(regs)) + return; rcu_exit = idtentry_enter_cond_rcu(regs); instrumentation_begin();