All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] x86_64: Make int3 non-magical
@ 2015-07-23 22:37 Andy Lutomirski
  2015-07-23 22:37 ` [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe Andy Lutomirski
                   ` (3 more replies)
  0 siblings, 4 replies; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-23 22:37 UTC (permalink / raw)
  To: X86 ML, linux-kernel
  Cc: Brian Gerst, Steven Rostedt, Willy Tarreau, Borislav Petkov,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds, Andy Lutomirski

int3 uses IST and the paranoid gsbase path.  Neither is necessary,
although the IST stack may currently be necessary to avoid stack
overruns.

Clean up IRQ stacks, make them NMI safe, teach idtentry to use
irqstacks if requested, and move int3 to the IRQ stack.

This prepares us to return from int3 using RET.  While we could,
in principle, return from an IST entry using RET, making that work
seems likely to be much messier and more fragile than this approach.

Andy Lutomirski (3):
  x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  x86/entry/64: Teach idtentry to use the IRQ stack
  x86/entry/64: Move #BP from IST to the IRQ stack

 arch/x86/entry/entry_64.S    | 88 ++++++++++++++++++++++++++++----------------
 arch/x86/kernel/cpu/common.c |  2 +-
 arch/x86/kernel/process_64.c |  4 ++
 arch/x86/kernel/traps.c      | 26 ++++++-------
 4 files changed, 74 insertions(+), 46 deletions(-)

-- 
2.4.3


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-23 22:37 [PATCH 0/3] x86_64: Make int3 non-magical Andy Lutomirski
@ 2015-07-23 22:37 ` Andy Lutomirski
  2015-07-24  6:08   ` Andy Lutomirski
  2015-08-05  8:59   ` Ingo Molnar
  2015-07-23 22:37 ` [PATCH 2/3] x86/entry/64: Teach idtentry to use the IRQ stack Andy Lutomirski
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-23 22:37 UTC (permalink / raw)
  To: X86 ML, linux-kernel
  Cc: Brian Gerst, Steven Rostedt, Willy Tarreau, Borislav Petkov,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds, Andy Lutomirski

This will allow IRQ stacks to nest inside NMIs or similar entries
that can happen during IRQ stack setup or teardown.

The Xen code here has a confusing comment.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_64.S    | 72 ++++++++++++++++++++++++++------------------
 arch/x86/kernel/cpu/common.c |  2 +-
 arch/x86/kernel/process_64.c |  4 +++
 3 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d3033183ed70..5f7df8949fa7 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -491,6 +491,39 @@ ENTRY(irq_entries_start)
 END(irq_entries_start)
 
 /*
+ * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
+ * Requires kernel GSBASE.
+ *
+ * The invariant is that, if irq_count != 0, then we're either on the
+ * IRQ stack or an IST stack, even if an NMI interrupts IRQ stack entry
+ * or exit.
+ */
+.macro ENTER_IRQ_STACK old_rsp
+	movq	%rsp, \old_rsp
+	cmpl	$0, PER_CPU_VAR(irq_count)
+	jne 694f
+	movq	PER_CPU_VAR(irq_stack_ptr), %rsp
+	/*
+	 * Right now, we're on the irq stack with irq_count == 0.  A nested
+	 * IRQ stack switch could clobber the stack.  That's fine: the stack
+	 * is empty.
+	 */
+694:
+	incl	PER_CPU_VAR(irq_count)
+	pushq	\old_rsp
+.endm
+
+/*
+ * Undoes ENTER_IRQ_STACK
+ */
+.macro LEAVE_IRQ_STACK
+	/* We need to be off the IRQ stack before decrementing irq_count. */
+	popq	%rsp
+	decl	PER_CPU_VAR(irq_count)
+.endm
+
+/*
  * Interrupt entry/exit.
  *
  * Interrupt entry points save only callee clobbered registers in fast path.
@@ -518,17 +551,7 @@ END(irq_entries_start)
 #endif
 
 1:
-	/*
-	 * Save previous stack pointer, optionally switch to interrupt stack.
-	 * irq_count is used to check if a CPU is already on an interrupt stack
-	 * or not. While this is essentially redundant with preempt_count it is
-	 * a little cheaper to use a separate counter in the PDA (short of
-	 * moving irq_enter into assembly, which would be too much work)
-	 */
-	movq	%rsp, %rdi
-	incl	PER_CPU_VAR(irq_count)
-	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
-	pushq	%rdi
+	ENTER_IRQ_STACK old_rsp=%rdi
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 
@@ -548,10 +571,8 @@ common_interrupt:
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	decl	PER_CPU_VAR(irq_count)
 
-	/* Restore saved previous stack */
-	popq	%rsp
+	LEAVE_IRQ_STACK
 
 	testb	$3, CS(%rsp)
 	jz	retint_kernel
@@ -863,14 +884,9 @@ bad_gs:
 
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(do_softirq_own_stack)
-	pushq	%rbp
-	mov	%rsp, %rbp
-	incl	PER_CPU_VAR(irq_count)
-	cmove	PER_CPU_VAR(irq_stack_ptr), %rsp
-	push	%rbp				/* frame pointer backlink */
+	ENTER_IRQ_STACK old_rsp=%r11
 	call	__do_softirq
-	leaveq
-	decl	PER_CPU_VAR(irq_count)
+	LEAVE_IRQ_STACK
 	ret
 END(do_softirq_own_stack)
 
@@ -889,25 +905,21 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  * So, on entry to the handler we detect whether we interrupted an
  * existing activation in its critical region -- if so, we pop the current
  * activation and restart the handler using the previous one.
+ *
+ * XXX: I have no idea what this comment is talking about.  --luto
  */
 ENTRY(xen_do_hypervisor_callback)		/* do_hypervisor_callback(struct *pt_regs) */
-
+	ENTER_IRQ_STACK old_rsp=%r11
 /*
  * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  * see the correct pointer to the pt_regs
  */
-	movq	%rdi, %rsp			/* we don't return, adjust the stack frame */
-11:	incl	PER_CPU_VAR(irq_count)
-	movq	%rsp, %rbp
-	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
-	pushq	%rbp				/* frame pointer backlink */
 	call	xen_evtchn_do_upcall
-	popq	%rsp
-	decl	PER_CPU_VAR(irq_count)
+	LEAVE_IRQ_STACK
 #ifndef CONFIG_PREEMPT
 	call	xen_maybe_preempt_hcall
 #endif
-	jmp	error_exit
+	ret
 END(xen_do_hypervisor_callback)
 
 /*
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1c528b06f802..e9968531ce56 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1161,7 +1161,7 @@ EXPORT_PER_CPU_SYMBOL(current_task);
 DEFINE_PER_CPU(char *, irq_stack_ptr) =
 	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
 
-DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
+DEFINE_PER_CPU(unsigned int, irq_count) __visible;
 
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0831ba3bcf95..65783f6eb22c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -280,6 +280,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	unsigned fsindex, gsindex;
 	fpu_switch_t fpu_switch;
 
+#ifdef CONFIG_DEBUG_ENTRY
+	WARN_ON(this_cpu_read(irq_count));
+#endif
+
 	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
 
 	/* We must save %fs and %gs before load_TLS() because
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/3] x86/entry/64: Teach idtentry to use the IRQ stack
  2015-07-23 22:37 [PATCH 0/3] x86_64: Make int3 non-magical Andy Lutomirski
  2015-07-23 22:37 ` [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe Andy Lutomirski
@ 2015-07-23 22:37 ` Andy Lutomirski
  2015-07-23 22:37 ` [PATCH 3/3] x86/entry/64: Move #BP from IST to " Andy Lutomirski
  2015-07-23 22:39 ` [PATCH 0/3] x86_64: Make int3 non-magical Andy Lutomirski
  3 siblings, 0 replies; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-23 22:37 UTC (permalink / raw)
  To: X86 ML, linux-kernel
  Cc: Brian Gerst, Steven Rostedt, Willy Tarreau, Borislav Petkov,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds, Andy Lutomirski

We don't specifically need IST for things like kprobes, but we do
want to avoid rare, surprising extra stack usage if a kprobe hits
with a deep stack.

Teach idtentry to use the IRQ stack for selected entries.

This implementation uses the IRQ stack even if we entered from user
mode.  This disallows tricks like ist_begin_non_atomic.  If we ever
need such a trick in one of these entries, we can rework this.  For
now, let's keep it simple.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_64.S | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5f7df8949fa7..ce72beba6045 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -739,13 +739,17 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
 
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+.macro idtentry sym do_sym has_error_code:req irqstack=0 paranoid=0 shift_ist=-1
 ENTRY(\sym)
 	/* Sanity check */
 	.if \shift_ist != -1 && \paranoid == 0
 	.error "using shift_ist requires paranoid=1"
 	.endif
 
+	.if \irqstack && \paranoid
+	.error "using irqstack requires !paranoid"
+	.endif
+
 	ASM_CLAC
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 
@@ -787,8 +791,16 @@ ENTRY(\sym)
 	subq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
 
+	.if \irqstack
+	ENTER_IRQ_STACK old_rsp=%rcx
+	.endif
+
 	call	\do_sym
 
+	.if \irqstack
+	LEAVE_IRQ_STACK
+	.endif
+
 	.if \shift_ist != -1
 	addq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/3] x86/entry/64: Move #BP from IST to the IRQ stack
  2015-07-23 22:37 [PATCH 0/3] x86_64: Make int3 non-magical Andy Lutomirski
  2015-07-23 22:37 ` [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe Andy Lutomirski
  2015-07-23 22:37 ` [PATCH 2/3] x86/entry/64: Teach idtentry to use the IRQ stack Andy Lutomirski
@ 2015-07-23 22:37 ` Andy Lutomirski
  2015-07-24 11:02   ` Borislav Petkov
  2015-07-23 22:39 ` [PATCH 0/3] x86_64: Make int3 non-magical Andy Lutomirski
  3 siblings, 1 reply; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-23 22:37 UTC (permalink / raw)
  To: X86 ML, linux-kernel
  Cc: Brian Gerst, Steven Rostedt, Willy Tarreau, Borislav Petkov,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds, Andy Lutomirski

There's nothing IST-worthy about #BP/int3.  We don't allow kprobes
in the small handful of places in the kernel that run at CPL0 with
an invalid stack, and 32-bit kernels have used normal interrupt
gates for #BP forever.

Furthermore, we don't allow kprobes in places that have usergs while
in kernel mode, so "paranoid" is also unnecessary.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_64.S |  2 +-
 arch/x86/kernel/traps.c   | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ce72beba6045..fb3253ae7ecc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -990,7 +990,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 #endif /* CONFIG_HYPERV */
 
 idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
-idtentry int3			do_int3			has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
+idtentry int3			do_int3			has_error_code=0	irqstack=1
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
 #ifdef CONFIG_XEN
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8e65d8a9b8db..d823db70f492 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -479,7 +479,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
 }
 NOKPROBE_SYMBOL(do_general_protection);
 
-/* May run on IST stack. */
+/* Runs on IRQ stack. */
 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -494,7 +494,15 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 	if (poke_int3_handler(regs))
 		return;
 
+	/*
+	 * Use ist_enter despite the fact that we don't use an IST stack.
+	 * We can be called from a kprobe in non-CONTEXT_KERNEL kernel
+	 * mode or even during context tracking state changes.
+	 *
+	 * This means that we can't schedule.  That's okay.
+	 */
 	ist_enter(regs);
+
 	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
@@ -511,15 +519,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 			SIGTRAP) == NOTIFY_STOP)
 		goto exit;
 
-	/*
-	 * Let others (NMI) know that the debug stack is in use
-	 * as we may switch to the interrupt stack.
-	 */
-	debug_stack_usage_inc();
 	preempt_conditional_sti(regs);
 	do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
 	preempt_conditional_cli(regs);
-	debug_stack_usage_dec();
+
 exit:
 	ist_exit(regs);
 }
@@ -885,19 +888,16 @@ void __init trap_init(void)
 	cpu_init();
 
 	/*
-	 * X86_TRAP_DB and X86_TRAP_BP have been set
-	 * in early_trap_init(). However, ITS works only after
-	 * cpu_init() loads TSS. See comments in early_trap_init().
+	 * X86_TRAP_DB was installed in early_trap_init(). However,
+	 * IST works only after cpu_init() loads TSS. See comments
+	 * in early_trap_init().
 	 */
 	set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
-	/* int3 can be called from all */
-	set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
 
 	x86_init.irqs.trap_init();
 
 #ifdef CONFIG_X86_64
 	memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
 	set_nmi_gate(X86_TRAP_DB, &debug);
-	set_nmi_gate(X86_TRAP_BP, &int3);
 #endif
 }
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/3] x86_64: Make int3 non-magical
  2015-07-23 22:37 [PATCH 0/3] x86_64: Make int3 non-magical Andy Lutomirski
                   ` (2 preceding siblings ...)
  2015-07-23 22:37 ` [PATCH 3/3] x86/entry/64: Move #BP from IST to " Andy Lutomirski
@ 2015-07-23 22:39 ` Andy Lutomirski
  3 siblings, 0 replies; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-23 22:39 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Brian Gerst, Steven Rostedt, Willy Tarreau,
	Borislav Petkov, Thomas Gleixner, Peter Zijlstra, Linus Torvalds

On Thu, Jul 23, 2015 at 3:37 PM, Andy Lutomirski <luto@kernel.org> wrote:
> int3 uses IST and the paranoid gsbase path.  Neither is necessary,
> although the IST stack may currently be necessary to avoid stack
> overruns.
>
> Clean up IRQ stacks, make them NMI safe, teach idtentry to use
> irqstacks if requested, and move int3 to the IRQ stack.
>
> This prepares us to return from int3 using RET.  While we could,
> in principle, return from an IST entry using RET, making that work
> seems likely to be much messier and more fragile than this approach.

Also, don't let the diffstat fool you.  If this works and if we can do
the same thing to do_debug, then we can do this:

https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=x86/entry_ist&id=1bc1f0ae8f1ea76486059a98cdbdfbdbc668aaf9

which makes it a big net win in complexity.

--Andy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-23 22:37 ` [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe Andy Lutomirski
@ 2015-07-24  6:08   ` Andy Lutomirski
  2015-07-24 10:25     ` Borislav Petkov
  2015-08-05  8:59   ` Ingo Molnar
  1 sibling, 1 reply; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-24  6:08 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Brian Gerst, Steven Rostedt, Willy Tarreau,
	Borislav Petkov, Thomas Gleixner, Peter Zijlstra, Linus Torvalds

On Thu, Jul 23, 2015 at 3:37 PM, Andy Lutomirski <luto@kernel.org> wrote:
> This will allow IRQ stacks to nest inside NMIs or similar entries
> that can happen during IRQ stack setup or teardown.
>
> The Xen code here has a confusing comment.
>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/entry/entry_64.S    | 72 ++++++++++++++++++++++++++------------------
>  arch/x86/kernel/cpu/common.c |  2 +-
>  arch/x86/kernel/process_64.c |  4 +++
>  3 files changed, 47 insertions(+), 31 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index d3033183ed70..5f7df8949fa7 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -491,6 +491,39 @@ ENTRY(irq_entries_start)
>  END(irq_entries_start)

This code is much more subtle than I thought when I wrote it:

>
>  /*
> + * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
> + * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
> + * Requires kernel GSBASE.
> + *
> + * The invariant is that, if irq_count != 0, then we're either on the
> + * IRQ stack or an IST stack, even if an NMI interrupts IRQ stack entry
> + * or exit.
> + */
> +.macro ENTER_IRQ_STACK old_rsp
> +       movq    %rsp, \old_rsp
> +       cmpl    $0, PER_CPU_VAR(irq_count)
> +       jne 694f
> +       movq    PER_CPU_VAR(irq_stack_ptr), %rsp
> +       /*
> +        * Right now, we're on the irq stack with irq_count == 0.  A nested
> +        * IRQ stack switch could clobber the stack.  That's fine: the stack
> +        * is empty.
> +        */

A nested ENTER_IRQ_STACK/LEAVE_IRQ_STACK pair is fine here.  Anything
else that does PUSH (or non-IST interrupt delivery) right here is not
safe because something could interrupt *that* and do ENTER_IRQ_STACK,
thus clobbering whatever got pushed here.

In a world populated by sane people, the only things that can
interrupt here are a vmalloc fault (let's just kill that), NMI, or
MCE.  But we're insane and we're talking about removing breakpoints
from the IST stack and even returning from IST entries using RET,
either of which will write something to (%rsp) and expect it not to
get clobbered.

We can't interchange the incl and the movq, because then we aren't
safe against nested ENTER_IRQ_STACK.

To be obviously safe against any local exception, we want a single
instruction that will change %rsp and some in-memory flag at the same
time.  There aren't a whole lot of candidates.  Cmpxchg isn't useful
(cmpxchg with a memory operand doesn't modify its register operand).
xchg could plausibly be abused to work, but it's slow because it's
always atomic.  Enter isn't going to work without a window in which
rsp contains something bogus.

Xadd, on the other hand, just might work.

We need two percpu variables: irq_stack_ptr and irq_stack_flag.
irq_stack_ptr points to the IRQ stack and isn't modified.
irq_stack_flag == irq_stack_ptr if we're on the IRQ stack and
irq_stack_flag has any other value if we're not on the IRQ stack.
Then algebra happens.  Unfortunately, the best I came up with so far
uses xadd to enter the IRQ stack and xchg to leave.

https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/commit/?h=x86/entry_ist&id=36825112b6082f2711605647366cd682a6be678a

I don't love it because it probably adds 60 cycles or so to IRQs.  On
the other hand, it was fun to write.

--Andy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-24  6:08   ` Andy Lutomirski
@ 2015-07-24 10:25     ` Borislav Petkov
  2015-07-24 18:02       ` Andy Lutomirski
  0 siblings, 1 reply; 22+ messages in thread
From: Borislav Petkov @ 2015-07-24 10:25 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Thu, Jul 23, 2015 at 11:08:39PM -0700, Andy Lutomirski wrote:
> To be obviously safe against any local exception, we want a single
> instruction that will change %rsp and some in-memory flag at the same
> time.  There aren't a whole lot of candidates.  Cmpxchg isn't useful
> (cmpxchg with a memory operand doesn't modify its register operand).

Why would you even need that?

You do LOCK; CMPXCHG on a per_cpu variable and then test ZF? I.e., use
it as a mutex in asm. With ZF=1, you switch stacks, with ZF=0, you
busy-wait ...

Or am I missing something?

This way you serialize all irq stack switchers...

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] x86/entry/64: Move #BP from IST to the IRQ stack
  2015-07-23 22:37 ` [PATCH 3/3] x86/entry/64: Move #BP from IST to " Andy Lutomirski
@ 2015-07-24 11:02   ` Borislav Petkov
  0 siblings, 0 replies; 22+ messages in thread
From: Borislav Petkov @ 2015-07-24 11:02 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Brian Gerst, Steven Rostedt, Willy Tarreau,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds

On Thu, Jul 23, 2015 at 03:37:48PM -0700, Andy Lutomirski wrote:
> There's nothing IST-worthy about #BP/int3.  We don't allow kprobes
> in the small handful of places in the kernel that run at CPL0 with
> an invalid stack, and 32-bit kernels have used normal interrupt
> gates for #BP forever.
> 
> Furthermore, we don't allow kprobes in places that have usergs while
> in kernel mode, so "paranoid" is also unnecessary.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/entry/entry_64.S |  2 +-
>  arch/x86/kernel/traps.c   | 26 +++++++++++++-------------
>  2 files changed, 14 insertions(+), 14 deletions(-)

...

> @@ -494,7 +494,15 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
>  	if (poke_int3_handler(regs))
>  		return;
>  
> +	/*
> +	 * Use ist_enter despite the fact that we don't use an IST stack.
> +	 * We can be called from a kprobe in non-CONTEXT_KERNEL kernel
> +	 * mode or even during context tracking state changes.
> +	 *
> +	 * This means that we can't schedule.  That's okay.
> +	 */
>  	ist_enter(regs);

A good sign that this "ist_enter" name needs to be changed. Otherwise,
this call site will confuse people.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-24 10:25     ` Borislav Petkov
@ 2015-07-24 18:02       ` Andy Lutomirski
  2015-07-25  4:16         ` Borislav Petkov
  0 siblings, 1 reply; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-24 18:02 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Fri, Jul 24, 2015 at 3:25 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Thu, Jul 23, 2015 at 11:08:39PM -0700, Andy Lutomirski wrote:
>> To be obviously safe against any local exception, we want a single
>> instruction that will change %rsp and some in-memory flag at the same
>> time.  There aren't a whole lot of candidates.  Cmpxchg isn't useful
>> (cmpxchg with a memory operand doesn't modify its register operand).
>
> Why would you even need that?
>
> You do LOCK; CMPXCHG on a per_cpu variable and then test ZF? I.e., use
> it as a mutex in asm. With ZF=1, you switch stacks, with ZF=0, you
> busy-wait ...
>
> Or am I missing something?

I'm not worried about other CPUs at all, so the LOCK isn't needed.
I'm worried about an interrupt coming while the in-memory state says
we're on the IRQ stack but RSP isn't pointing at the IRQ stack or vice
versa.

This isn't possible in current kernels because nothing ever switches
to the IRQ stack except IRQs, and those don't happen with IF = 0
(unless Xen does more awful things than I realize), but I want to use
IRQ stacks for int3, and that can happen inside NMI.

An alternative solution would be to never switch to the IRQ stack if
RSP points to the IRQ stack already or if we're already on an IST
stack, but that seems full of corner cases.

But wait.  Maybe a really simple approach is fine: first increment
irq_count and then switch RSP.  That leaves a window with irq_count
marking the IRQ stack as being in use but RSP not pointing to the IRQ
stack.

What can go wrong?  We can get an NMI, an MCE, a breakpoint, or a
vmalloc fault.  An NMI is fine.  It will switch to the NMI stack.  The
IRQ stack is marked as in use, but that doesn't matter -- the NMI
stack has plenty of space.  An MCE is fine.  It will switch to the IST
stack.  It might return using RET, which will push one single extra
word to the kernel stack, but that's not a problem for stack overruns
(unless there's a never-ending stream of MCEs, but we're already
terminally screwed if that happens).  A breakpoint will *not* switch
to an IST stack because we're going to get rid of the debug stack, and
it will fail to switch to the IRQ stack, so we need to limit the stack
depth of do_debug.  Maybe that's okay.  A breakpoint with NMI or MCE
inside is still fine.

So really the only difference between this simple approach (which is
more or less what we do now) and my fancy approach is that a kernel
instruction breakpoint will cause do_debug to run on the initial stack
instead of the IRQ stack.

I'm still tempted to say we should use my overly paranoid atomic
approach for now and optimize later, but I'm fine with spinning a v3,
too.

--Andy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-24 18:02       ` Andy Lutomirski
@ 2015-07-25  4:16         ` Borislav Petkov
  2015-07-25  4:28           ` Andy Lutomirski
  0 siblings, 1 reply; 22+ messages in thread
From: Borislav Petkov @ 2015-07-25  4:16 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Fri, Jul 24, 2015 at 11:02:51AM -0700, Andy Lutomirski wrote:
> So really the only difference between this simple approach (which is
> more or less what we do now) and my fancy approach is that a kernel
> instruction breakpoint will cause do_debug to run on the initial stack
> instead of the IRQ stack.

Sounds ok to me. What would be the worst thing if we limited the #DB
stack? Some breakpoints will get ignored? In an endless stream of
breakpoints hammering? Doesn't sound like a valid use case to me, does
it?

> I'm still tempted to say we should use my overly paranoid atomic
> approach for now and optimize later,...

But why change it if the simple approach of incrementing irq_count first
is still fine? I think we want to KISS here exactly because apparently
complexity in that area is a serious PITA...

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-25  4:16         ` Borislav Petkov
@ 2015-07-25  4:28           ` Andy Lutomirski
  2015-07-25  4:32             ` Borislav Petkov
  0 siblings, 1 reply; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-25  4:28 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Fri, Jul 24, 2015 at 9:16 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Fri, Jul 24, 2015 at 11:02:51AM -0700, Andy Lutomirski wrote:
>> So really the only difference between this simple approach (which is
>> more or less what we do now) and my fancy approach is that a kernel
>> instruction breakpoint will cause do_debug to run on the initial stack
>> instead of the IRQ stack.
>
> Sounds ok to me. What would be the worst thing if we limited the #DB
> stack? Some breakpoints will get ignored? In an endless stream of
> breakpoints hammering? Doesn't sound like a valid use case to me, does
> it?
>
>> I'm still tempted to say we should use my overly paranoid atomic
>> approach for now and optimize later,...
>
> But why change it if the simple approach of incrementing irq_count first
> is still fine? I think we want to KISS here exactly because apparently
> complexity in that area is a serious PITA...

Yeah, I'm going to submit v2 with the simple approach.  I admit I'm
rather fond of xadd as a way to switch rsp and set a flag at the same
time, though :)

--Andy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-25  4:28           ` Andy Lutomirski
@ 2015-07-25  4:32             ` Borislav Petkov
  2015-07-25  4:59               ` Andy Lutomirski
  0 siblings, 1 reply; 22+ messages in thread
From: Borislav Petkov @ 2015-07-25  4:32 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Fri, Jul 24, 2015 at 09:28:47PM -0700, Andy Lutomirski wrote:
> Yeah, I'm going to submit v2 with the simple approach.  I admit I'm
> rather fond of xadd as a way to switch rsp and set a flag at the same
> time, though :)

I know you are.

But people will rip your head out if you added 60 cycles to the IRQ
path.

:-)

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-25  4:32             ` Borislav Petkov
@ 2015-07-25  4:59               ` Andy Lutomirski
  2015-07-25  8:39                 ` Borislav Petkov
  2015-07-25 17:56                 ` Linus Torvalds
  0 siblings, 2 replies; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-25  4:59 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Fri, Jul 24, 2015 at 9:32 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Fri, Jul 24, 2015 at 09:28:47PM -0700, Andy Lutomirski wrote:
>> Yeah, I'm going to submit v2 with the simple approach.  I admit I'm
>> rather fond of xadd as a way to switch rsp and set a flag at the same
>> time, though :)
>
> I know you are.
>
> But people will rip your head out if you added 60 cycles to the IRQ
> path.

And people will give me five new heads if I ignore Linus and do RET
even with IF=1, saving 300 cycles?

--Andy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-25  4:59               ` Andy Lutomirski
@ 2015-07-25  8:39                 ` Borislav Petkov
  2015-07-25 17:56                 ` Linus Torvalds
  1 sibling, 0 replies; 22+ messages in thread
From: Borislav Petkov @ 2015-07-25  8:39 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Fri, Jul 24, 2015 at 09:59:16PM -0700, Andy Lutomirski wrote:
> And people will give me five new heads if I ignore Linus and do RET
> even with IF=1, saving 300 cycles?

As long as you don't make it too complex and corner-casy, I'll give you
hats for those heads.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-25  4:59               ` Andy Lutomirski
  2015-07-25  8:39                 ` Borislav Petkov
@ 2015-07-25 17:56                 ` Linus Torvalds
  2015-07-25 17:59                   ` Andy Lutomirski
  1 sibling, 1 reply; 22+ messages in thread
From: Linus Torvalds @ 2015-07-25 17:56 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Borislav Petkov, Andy Lutomirski, X86 ML, linux-kernel,
	Brian Gerst, Steven Rostedt, Willy Tarreau, Thomas Gleixner,
	Peter Zijlstra

On Fri, Jul 24, 2015 at 9:59 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>
> And people will give me five new heads if I ignore Linus and do RET
> even with IF=1, saving 300 cycles?

So I'm still nervous about that "sti; ret" when we're back on the
original kernel stack that took the original fault or interrupt.  But
it's probably ok.

Yes, it's irq-safe. But it's not NMI-safe, so if an NMI happens there,
when the NMI returns, an interrupt might occur there too. But since
we're back on the original stack where the original fault happened,
and since interrupts were enabled, I don't see why that would be
horrible. In theory, we might have a growing stack if this keeps
happening, but since the only way to get that is to get the NMI in
that one-instruction window (and apparently on at least _some_
microarchitectures the sti shadow stops even NMI's), I don't see how
any kind of unbounded growth would happen.

So.

I think it would work, and it might even be good for "coverage" (ie
the whole "iret-to-ret-conversion" will not have a lot of testing if
it only happens for faults with interrupts disabled).

But it still worries me a bit.

                  Linus

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-25 17:56                 ` Linus Torvalds
@ 2015-07-25 17:59                   ` Andy Lutomirski
  2015-07-25 18:12                     ` Linus Torvalds
  0 siblings, 1 reply; 22+ messages in thread
From: Andy Lutomirski @ 2015-07-25 17:59 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Borislav Petkov, Andy Lutomirski, X86 ML, linux-kernel,
	Brian Gerst, Steven Rostedt, Willy Tarreau, Thomas Gleixner,
	Peter Zijlstra

On Sat, Jul 25, 2015 at 10:56 AM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Fri, Jul 24, 2015 at 9:59 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>
>> And people will give me five new heads if I ignore Linus and do RET
>> even with IF=1, saving 300 cycles?
>
> So I'm still nervous about that "sti; ret" when we're back on the
> original kernel stack that took the original fault or interrupt.  But
> it's probably ok.
>
> Yes, it's irq-safe. But it's not NMI-safe, so if an NMI happens there,
> when the NMI returns, an interrupt might occur there too. But since
> we're back on the original stack where the original fault happened,
> and since interrupts were enabled, I don't see why that would be
> horrible. In theory, we might have a growing stack if this keeps
> happening, but since the only way to get that is to get the NMI in
> that one-instruction window (and apparently on at least _some_
> microarchitectures the sti shadow stops even NMI's), I don't see how
> any kind of unbounded growth would happen.
>
> So.
>
> I think it would work, and it might even be good for "coverage" (ie
> the whole "iret-to-ret-conversion" will not have a lot of testing if
> it only happens for faults with interrupts disabled).
>
> But it still worries me a bit.
>

What if we added something like:

if (regs->ip == ret_after_sti && !user_mode(regs) && (regs->flags &
X86_EFLAGS_IF)) {
  regs->ip--;
  regs->flags &= ~X86_EFLAGS_IF;
}

to do_nmi, do_machine_check, and do_debug (the latter because kernel
breakpoints, damnit)?

--Andy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-25 17:59                   ` Andy Lutomirski
@ 2015-07-25 18:12                     ` Linus Torvalds
  0 siblings, 0 replies; 22+ messages in thread
From: Linus Torvalds @ 2015-07-25 18:12 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Borislav Petkov, Andy Lutomirski, X86 ML, linux-kernel,
	Brian Gerst, Steven Rostedt, Willy Tarreau, Thomas Gleixner,
	Peter Zijlstra

On Sat, Jul 25, 2015 at 10:59 AM, Andy Lutomirski <luto@amacapital.net> wrote:
>
> What if we added something like:
>
> if (regs->ip == ret_after_sti && !user_mode(regs) && (regs->flags &
> X86_EFLAGS_IF)) {
>   regs->ip--;
>   regs->flags &= ~X86_EFLAGS_IF;
> }
>
> to do_nmi, do_machine_check, and do_debug (the latter because kernel
> breakpoints, damnit)?

Hmm. And how would you test it?

Putting an instruction breakpoint on the final 'ret' might do it, I
guess. "mov ss" disables even that (and is documented to disable nmi
too), but maybe it works with 'sti; ret'.

But yes, as long as we'd have some test coverage, that sounds doable.

                Linus

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-07-23 22:37 ` [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe Andy Lutomirski
  2015-07-24  6:08   ` Andy Lutomirski
@ 2015-08-05  8:59   ` Ingo Molnar
  2015-08-05 18:24     ` Andy Lutomirski
  1 sibling, 1 reply; 22+ messages in thread
From: Ingo Molnar @ 2015-08-05  8:59 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Brian Gerst, Steven Rostedt, Willy Tarreau,
	Borislav Petkov, Thomas Gleixner, Peter Zijlstra, Linus Torvalds


* Andy Lutomirski <luto@kernel.org> wrote:

> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -280,6 +280,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
>  	unsigned fsindex, gsindex;
>  	fpu_switch_t fpu_switch;
>  
> +#ifdef CONFIG_DEBUG_ENTRY
> +	WARN_ON(this_cpu_read(irq_count));
> +#endif

Please introduce a less noisy (to the eyes) version of this, something like:

	WARN_ON_DEBUG_ENTRY(this_cpu_read(irq_count));

or so, similar to WARN_ON_FPU().

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-08-05  8:59   ` Ingo Molnar
@ 2015-08-05 18:24     ` Andy Lutomirski
  2015-08-05 18:27       ` Steven Rostedt
  0 siblings, 1 reply; 22+ messages in thread
From: Andy Lutomirski @ 2015-08-05 18:24 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Borislav Petkov, Thomas Gleixner,
	Peter Zijlstra, Linus Torvalds

On Wed, Aug 5, 2015 at 1:59 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Andy Lutomirski <luto@kernel.org> wrote:
>
>> --- a/arch/x86/kernel/process_64.c
>> +++ b/arch/x86/kernel/process_64.c
>> @@ -280,6 +280,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
>>       unsigned fsindex, gsindex;
>>       fpu_switch_t fpu_switch;
>>
>> +#ifdef CONFIG_DEBUG_ENTRY
>> +     WARN_ON(this_cpu_read(irq_count));
>> +#endif
>
> Please introduce a less noisy (to the eyes) version of this, something like:
>
>         WARN_ON_DEBUG_ENTRY(this_cpu_read(irq_count));
>
> or so, similar to WARN_ON_FPU().

I can do that (or "DEBUG_ENTRY_WARN_ON"?  we seem to be inconsistent
about ordering).

Or would if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON(...) be better?

--Andy

>
> Thanks,
>
>         Ingo



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-08-05 18:24     ` Andy Lutomirski
@ 2015-08-05 18:27       ` Steven Rostedt
  2015-08-05 18:32         ` Andy Lutomirski
  0 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2015-08-05 18:27 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Ingo Molnar, Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Willy Tarreau, Borislav Petkov, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Wed, 5 Aug 2015 11:24:54 -0700
Andy Lutomirski <luto@amacapital.net> wrote:

> On Wed, Aug 5, 2015 at 1:59 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > * Andy Lutomirski <luto@kernel.org> wrote:
> >
> >> --- a/arch/x86/kernel/process_64.c
> >> +++ b/arch/x86/kernel/process_64.c
> >> @@ -280,6 +280,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
> >>       unsigned fsindex, gsindex;
> >>       fpu_switch_t fpu_switch;
> >>
> >> +#ifdef CONFIG_DEBUG_ENTRY
> >> +     WARN_ON(this_cpu_read(irq_count));
> >> +#endif
> >
> > Please introduce a less noisy (to the eyes) version of this, something like:
> >
> >         WARN_ON_DEBUG_ENTRY(this_cpu_read(irq_count));
> >
> > or so, similar to WARN_ON_FPU().
> 
> I can do that (or "DEBUG_ENTRY_WARN_ON"?  we seem to be inconsistent
> about ordering).
> 
> Or would if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON(...) be better?
> 

Does WARN_ON(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count))
work?

-- Steve

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-08-05 18:27       ` Steven Rostedt
@ 2015-08-05 18:32         ` Andy Lutomirski
  2015-08-22 13:55           ` Ingo Molnar
  0 siblings, 1 reply; 22+ messages in thread
From: Andy Lutomirski @ 2015-08-05 18:32 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Willy Tarreau, Borislav Petkov, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Wed, Aug 5, 2015 at 11:27 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Wed, 5 Aug 2015 11:24:54 -0700
> Andy Lutomirski <luto@amacapital.net> wrote:
>
>> On Wed, Aug 5, 2015 at 1:59 AM, Ingo Molnar <mingo@kernel.org> wrote:
>> >
>> > * Andy Lutomirski <luto@kernel.org> wrote:
>> >
>> >> --- a/arch/x86/kernel/process_64.c
>> >> +++ b/arch/x86/kernel/process_64.c
>> >> @@ -280,6 +280,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
>> >>       unsigned fsindex, gsindex;
>> >>       fpu_switch_t fpu_switch;
>> >>
>> >> +#ifdef CONFIG_DEBUG_ENTRY
>> >> +     WARN_ON(this_cpu_read(irq_count));
>> >> +#endif
>> >
>> > Please introduce a less noisy (to the eyes) version of this, something like:
>> >
>> >         WARN_ON_DEBUG_ENTRY(this_cpu_read(irq_count));
>> >
>> > or so, similar to WARN_ON_FPU().
>>
>> I can do that (or "DEBUG_ENTRY_WARN_ON"?  we seem to be inconsistent
>> about ordering).
>>
>> Or would if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON(...) be better?
>>
>
> Does WARN_ON(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count))
> work?

I'd be okay with it.  Ingo?

(Except that that line of code is from v1, and v2 looks slightly
different here, but that's beside the point.)

--Andy

>
> -- Steve



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe
  2015-08-05 18:32         ` Andy Lutomirski
@ 2015-08-22 13:55           ` Ingo Molnar
  0 siblings, 0 replies; 22+ messages in thread
From: Ingo Molnar @ 2015-08-22 13:55 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Steven Rostedt, Andy Lutomirski, X86 ML, linux-kernel,
	Brian Gerst, Willy Tarreau, Borislav Petkov, Thomas Gleixner,
	Peter Zijlstra, Linus Torvalds


* Andy Lutomirski <luto@amacapital.net> wrote:

> On Wed, Aug 5, 2015 at 11:27 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
> > On Wed, 5 Aug 2015 11:24:54 -0700
> > Andy Lutomirski <luto@amacapital.net> wrote:
> >
> >> On Wed, Aug 5, 2015 at 1:59 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >> >
> >> > * Andy Lutomirski <luto@kernel.org> wrote:
> >> >
> >> >> --- a/arch/x86/kernel/process_64.c
> >> >> +++ b/arch/x86/kernel/process_64.c
> >> >> @@ -280,6 +280,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
> >> >>       unsigned fsindex, gsindex;
> >> >>       fpu_switch_t fpu_switch;
> >> >>
> >> >> +#ifdef CONFIG_DEBUG_ENTRY
> >> >> +     WARN_ON(this_cpu_read(irq_count));
> >> >> +#endif
> >> >
> >> > Please introduce a less noisy (to the eyes) version of this, something like:
> >> >
> >> >         WARN_ON_DEBUG_ENTRY(this_cpu_read(irq_count));
> >> >
> >> > or so, similar to WARN_ON_FPU().
> >>
> >> I can do that (or "DEBUG_ENTRY_WARN_ON"?  we seem to be inconsistent
> >> about ordering).
> >>
> >> Or would if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON(...) be better?
> >>
> >
> > Does WARN_ON(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count))
> > work?
> 
> I'd be okay with it.  Ingo?

Yeah, that one is more compact than the #ifdef variant.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2015-08-22 13:55 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-23 22:37 [PATCH 0/3] x86_64: Make int3 non-magical Andy Lutomirski
2015-07-23 22:37 ` [PATCH 1/3] x86/entry/64: Refactor IRQ stacks and make then NMI-safe Andy Lutomirski
2015-07-24  6:08   ` Andy Lutomirski
2015-07-24 10:25     ` Borislav Petkov
2015-07-24 18:02       ` Andy Lutomirski
2015-07-25  4:16         ` Borislav Petkov
2015-07-25  4:28           ` Andy Lutomirski
2015-07-25  4:32             ` Borislav Petkov
2015-07-25  4:59               ` Andy Lutomirski
2015-07-25  8:39                 ` Borislav Petkov
2015-07-25 17:56                 ` Linus Torvalds
2015-07-25 17:59                   ` Andy Lutomirski
2015-07-25 18:12                     ` Linus Torvalds
2015-08-05  8:59   ` Ingo Molnar
2015-08-05 18:24     ` Andy Lutomirski
2015-08-05 18:27       ` Steven Rostedt
2015-08-05 18:32         ` Andy Lutomirski
2015-08-22 13:55           ` Ingo Molnar
2015-07-23 22:37 ` [PATCH 2/3] x86/entry/64: Teach idtentry to use the IRQ stack Andy Lutomirski
2015-07-23 22:37 ` [PATCH 3/3] x86/entry/64: Move #BP from IST to " Andy Lutomirski
2015-07-24 11:02   ` Borislav Petkov
2015-07-23 22:39 ` [PATCH 0/3] x86_64: Make int3 non-magical Andy Lutomirski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.