linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] x86_64: Make int3 non-magical
@ 2015-07-25  5:57 Andy Lutomirski
  2015-07-25  5:57 ` [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Andy Lutomirski @ 2015-07-25  5:57 UTC (permalink / raw)
  To: X86 ML, linux-kernel
  Cc: Brian Gerst, Steven Rostedt, Willy Tarreau, Borislav Petkov,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds, Andy Lutomirski

int3 uses IST and the paranoid gsbase path.  Neither is necessary,
although the IST stack may currently be necessary to avoid stack
overruns.

Clean up IRQ stacks, make them NMI safe, teach idtentry to use
irqstacks if requested, and move int3 to the IRQ stack.

This prepares us to return from int3 using RET.  While we could,
in principle, return from an IST entry using RET, making that work
seems likely to be much messier and more fragile than this approach.

Changes from v1: Actually NMI-safe

Andy Lutomirski (3):
  x86/entry/64: Refactor IRQ stacks and make them NMI-safe
  x86/entry/64: Teach idtentry to use the IRQ stack
  x86/entry/64: Move #BP from IST to the IRQ stack

 arch/x86/Kconfig.debug       |   2 -
 arch/x86/entry/entry_64.S    | 105 ++++++++++++++++++++++++++++++-------------
 arch/x86/kernel/process_64.c |   4 ++
 arch/x86/kernel/traps.c      |  26 +++++------
 4 files changed, 90 insertions(+), 47 deletions(-)

-- 
2.4.3


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
  2015-07-25  5:57 [PATCH v2 0/3] x86_64: Make int3 non-magical Andy Lutomirski
@ 2015-07-25  5:57 ` Andy Lutomirski
  2015-07-26  6:05   ` Borislav Petkov
                     ` (2 more replies)
  2015-07-25  5:57 ` [PATCH v2 2/3] x86/entry/64: Teach idtentry to use the IRQ stack Andy Lutomirski
  2015-07-25  5:57 ` [PATCH v2 3/3] x86/entry/64: Move #BP from IST to " Andy Lutomirski
  2 siblings, 3 replies; 13+ messages in thread
From: Andy Lutomirski @ 2015-07-25  5:57 UTC (permalink / raw)
  To: X86 ML, linux-kernel
  Cc: Brian Gerst, Steven Rostedt, Willy Tarreau, Borislav Petkov,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds, Andy Lutomirski

This will allow IRQ stacks to nest inside NMIs or similar entries
that can happen during IRQ stack setup or teardown.

The Xen code here has a confusing comment.

The new macros won't work correctly if they're invoked with IRQs on.
Add a check under CONFIG_DEBUG_ENTRY to detect that.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/Kconfig.debug       |  2 -
 arch/x86/entry/entry_64.S    | 89 +++++++++++++++++++++++++++++---------------
 arch/x86/kernel/process_64.c |  4 ++
 3 files changed, 63 insertions(+), 32 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8c0d3266173..4196e2325971 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
 	  Some of these sanity checks may slow down kernel entries and
 	  exits or otherwise impact performance.
 
-	  This is currently used to help test NMI code.
-
 	  If unsure, say N.
 
 config DEBUG_NMI_SELFTEST
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d3033183ed70..3227fb6a5b94 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -490,6 +490,56 @@ ENTRY(irq_entries_start)
     .endr
 END(irq_entries_start)
 
+.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+#ifdef CONFIG_DEBUG_ENTRY
+	pushfq
+	testl $X86_EFLAGS_IF, (%rsp)
+	jz 693f
+	ud2
+693:
+	addq $8, %rsp
+#endif
+.endm
+
+/*
+ * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
+ * Requires kernel GSBASE.
+ *
+ * The invariant is that, if irq_count != 1, then the IRQ stack is in use.
+ */
+.macro ENTER_IRQ_STACK old_rsp
+	DEBUG_ENTRY_ASSERT_IRQS_OFF
+	movq	%rsp, \old_rsp
+	incl	PER_CPU_VAR(irq_count)
+
+	/*
+	 * Right now, if we just incremented irq_count to zero, we've
+	 * claimed the IRQ stack but we haven't switched to it yet.
+	 * Anything that can interrupt us here without using IST
+	 * must be *extremely* careful to limit its stack usage.
+	 */
+
+	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
+	pushq	\old_rsp
+.endm
+
+/*
+ * Undoes ENTER_IRQ_STACK.
+ */
+.macro LEAVE_IRQ_STACK
+	DEBUG_ENTRY_ASSERT_IRQS_OFF
+	/* We need to be off the IRQ stack before decrementing irq_count. */
+	popq	%rsp
+
+	/*
+	 * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
+	 * the irq stack but we're not on it.
+	 */
+
+	decl	PER_CPU_VAR(irq_count)
+.endm
+
 /*
  * Interrupt entry/exit.
  *
@@ -518,17 +568,7 @@ END(irq_entries_start)
 #endif
 
 1:
-	/*
-	 * Save previous stack pointer, optionally switch to interrupt stack.
-	 * irq_count is used to check if a CPU is already on an interrupt stack
-	 * or not. While this is essentially redundant with preempt_count it is
-	 * a little cheaper to use a separate counter in the PDA (short of
-	 * moving irq_enter into assembly, which would be too much work)
-	 */
-	movq	%rsp, %rdi
-	incl	PER_CPU_VAR(irq_count)
-	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
-	pushq	%rdi
+	ENTER_IRQ_STACK old_rsp=%rdi
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 
@@ -548,10 +588,8 @@ common_interrupt:
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	decl	PER_CPU_VAR(irq_count)
 
-	/* Restore saved previous stack */
-	popq	%rsp
+	LEAVE_IRQ_STACK
 
 	testb	$3, CS(%rsp)
 	jz	retint_kernel
@@ -863,14 +901,9 @@ bad_gs:
 
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(do_softirq_own_stack)
-	pushq	%rbp
-	mov	%rsp, %rbp
-	incl	PER_CPU_VAR(irq_count)
-	cmove	PER_CPU_VAR(irq_stack_ptr), %rsp
-	push	%rbp				/* frame pointer backlink */
+	ENTER_IRQ_STACK old_rsp=%r11
 	call	__do_softirq
-	leaveq
-	decl	PER_CPU_VAR(irq_count)
+	LEAVE_IRQ_STACK
 	ret
 END(do_softirq_own_stack)
 
@@ -889,25 +922,21 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  * So, on entry to the handler we detect whether we interrupted an
  * existing activation in its critical region -- if so, we pop the current
  * activation and restart the handler using the previous one.
+ *
+ * XXX: I have no idea what this comment is talking about.  --luto
  */
 ENTRY(xen_do_hypervisor_callback)		/* do_hypervisor_callback(struct *pt_regs) */
-
+	ENTER_IRQ_STACK old_rsp=%r11
 /*
  * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  * see the correct pointer to the pt_regs
  */
-	movq	%rdi, %rsp			/* we don't return, adjust the stack frame */
-11:	incl	PER_CPU_VAR(irq_count)
-	movq	%rsp, %rbp
-	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
-	pushq	%rbp				/* frame pointer backlink */
 	call	xen_evtchn_do_upcall
-	popq	%rsp
-	decl	PER_CPU_VAR(irq_count)
+	LEAVE_IRQ_STACK
 #ifndef CONFIG_PREEMPT
 	call	xen_maybe_preempt_hcall
 #endif
-	jmp	error_exit
+	ret
 END(xen_do_hypervisor_callback)
 
 /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0831ba3bcf95..70aa5917456e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -280,6 +280,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	unsigned fsindex, gsindex;
 	fpu_switch_t fpu_switch;
 
+#ifdef CONFIG_DEBUG_ENTRY
+	WARN_ON_ONCE(this_cpu_read(irq_count) != -1);
+#endif
+
 	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
 
 	/* We must save %fs and %gs before load_TLS() because
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 2/3] x86/entry/64: Teach idtentry to use the IRQ stack
  2015-07-25  5:57 [PATCH v2 0/3] x86_64: Make int3 non-magical Andy Lutomirski
  2015-07-25  5:57 ` [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
@ 2015-07-25  5:57 ` Andy Lutomirski
  2015-07-28  9:50   ` Borislav Petkov
  2015-07-25  5:57 ` [PATCH v2 3/3] x86/entry/64: Move #BP from IST to " Andy Lutomirski
  2 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-07-25  5:57 UTC (permalink / raw)
  To: X86 ML, linux-kernel
  Cc: Brian Gerst, Steven Rostedt, Willy Tarreau, Borislav Petkov,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds, Andy Lutomirski

We don't specifically need IST for things like kprobes, but we do
want to avoid rare, surprising extra stack usage if a kprobe hits
with a deep stack.

Teach idtentry to use the IRQ stack for selected entries.

This implementation uses the IRQ stack even if we entered from user
mode.  This disallows tricks like ist_begin_non_atomic.  If we ever
need such a trick in one of these entries, we can rework this.  For
now, let's keep it simple.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_64.S | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3227fb6a5b94..af3573e75ed4 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -756,13 +756,17 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
 
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+.macro idtentry sym do_sym has_error_code:req irqstack=0 paranoid=0 shift_ist=-1
 ENTRY(\sym)
 	/* Sanity check */
 	.if \shift_ist != -1 && \paranoid == 0
 	.error "using shift_ist requires paranoid=1"
 	.endif
 
+	.if \irqstack && \paranoid
+	.error "using irqstack requires !paranoid"
+	.endif
+
 	ASM_CLAC
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 
@@ -804,8 +808,16 @@ ENTRY(\sym)
 	subq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
 
+	.if \irqstack
+	ENTER_IRQ_STACK old_rsp=%rcx
+	.endif
+
 	call	\do_sym
 
+	.if \irqstack
+	LEAVE_IRQ_STACK
+	.endif
+
 	.if \shift_ist != -1
 	addq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 3/3] x86/entry/64: Move #BP from IST to the IRQ stack
  2015-07-25  5:57 [PATCH v2 0/3] x86_64: Make int3 non-magical Andy Lutomirski
  2015-07-25  5:57 ` [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
  2015-07-25  5:57 ` [PATCH v2 2/3] x86/entry/64: Teach idtentry to use the IRQ stack Andy Lutomirski
@ 2015-07-25  5:57 ` Andy Lutomirski
  2015-07-28  9:54   ` Borislav Petkov
  2 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-07-25  5:57 UTC (permalink / raw)
  To: X86 ML, linux-kernel
  Cc: Brian Gerst, Steven Rostedt, Willy Tarreau, Borislav Petkov,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds, Andy Lutomirski

There's nothing IST-worthy about #BP/int3.  We don't allow kprobes
in the small handful of places in the kernel that run at CPL0 with
an invalid stack, and 32-bit kernels have used normal interrupt
gates for #BP forever.

Furthermore, we don't allow kprobes in places that have usergs while
in kernel mode, so "paranoid" is also unnecessary.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_64.S |  2 +-
 arch/x86/kernel/traps.c   | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index af3573e75ed4..e353709d1fcc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1007,7 +1007,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 #endif /* CONFIG_HYPERV */
 
 idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
-idtentry int3			do_int3			has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
+idtentry int3			do_int3			has_error_code=0	irqstack=1
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
 #ifdef CONFIG_XEN
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8e65d8a9b8db..d823db70f492 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -479,7 +479,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
 }
 NOKPROBE_SYMBOL(do_general_protection);
 
-/* May run on IST stack. */
+/* Runs on IRQ stack. */
 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -494,7 +494,15 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 	if (poke_int3_handler(regs))
 		return;
 
+	/*
+	 * Use ist_enter despite the fact that we don't use an IST stack.
+	 * We can be called from a kprobe in non-CONTEXT_KERNEL kernel
+	 * mode or even during context tracking state changes.
+	 *
+	 * This means that we can't schedule.  That's okay.
+	 */
 	ist_enter(regs);
+
 	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
@@ -511,15 +519,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 			SIGTRAP) == NOTIFY_STOP)
 		goto exit;
 
-	/*
-	 * Let others (NMI) know that the debug stack is in use
-	 * as we may switch to the interrupt stack.
-	 */
-	debug_stack_usage_inc();
 	preempt_conditional_sti(regs);
 	do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
 	preempt_conditional_cli(regs);
-	debug_stack_usage_dec();
+
 exit:
 	ist_exit(regs);
 }
@@ -885,19 +888,16 @@ void __init trap_init(void)
 	cpu_init();
 
 	/*
-	 * X86_TRAP_DB and X86_TRAP_BP have been set
-	 * in early_trap_init(). However, ITS works only after
-	 * cpu_init() loads TSS. See comments in early_trap_init().
+	 * X86_TRAP_DB was installed in early_trap_init(). However,
+	 * IST works only after cpu_init() loads TSS. See comments
+	 * in early_trap_init().
 	 */
 	set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
-	/* int3 can be called from all */
-	set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
 
 	x86_init.irqs.trap_init();
 
 #ifdef CONFIG_X86_64
 	memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
 	set_nmi_gate(X86_TRAP_DB, &debug);
-	set_nmi_gate(X86_TRAP_BP, &int3);
 #endif
 }
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
  2015-07-25  5:57 ` [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
@ 2015-07-26  6:05   ` Borislav Petkov
  2015-07-26 18:50     ` Andy Lutomirski
  2015-07-28  9:41   ` Borislav Petkov
  2015-08-17 11:18   ` Andy Shevchenko
  2 siblings, 1 reply; 13+ messages in thread
From: Borislav Petkov @ 2015-07-26  6:05 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Brian Gerst, Steven Rostedt, Willy Tarreau,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds

On Fri, Jul 24, 2015 at 10:57:04PM -0700, Andy Lutomirski wrote:
> This will allow IRQ stacks to nest inside NMIs or similar entries
> that can happen during IRQ stack setup or teardown.
> 
> The Xen code here has a confusing comment.
> 
> The new macros won't work correctly if they're invoked with IRQs on.
> Add a check under CONFIG_DEBUG_ENTRY to detect that.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/Kconfig.debug       |  2 -
>  arch/x86/entry/entry_64.S    | 89 +++++++++++++++++++++++++++++---------------
>  arch/x86/kernel/process_64.c |  4 ++
>  3 files changed, 63 insertions(+), 32 deletions(-)

What does that apply against?

I get conflicts against current Linus master:

$ patch -p1 --dry-run -i /tmp/0001-x86-entry-64-refactor_irq_stacks_and_make_them_nmi-safe.patch 
checking file arch/x86/Kconfig.debug
checking file arch/x86/entry/entry_64.S
Hunk #1 succeeded at 544 (offset 54 lines).
Hunk #2 FAILED at 568.
Hunk #3 FAILED at 598.
Hunk #4 succeeded at 1012 (offset 99 lines).
Hunk #5 succeeded at 1033 (offset 99 lines).
2 out of 5 hunks FAILED
checking file arch/x86/kernel/process_64.c

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
  2015-07-26  6:05   ` Borislav Petkov
@ 2015-07-26 18:50     ` Andy Lutomirski
  0 siblings, 0 replies; 13+ messages in thread
From: Andy Lutomirski @ 2015-07-26 18:50 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Sat, Jul 25, 2015 at 11:05 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Fri, Jul 24, 2015 at 10:57:04PM -0700, Andy Lutomirski wrote:
>> This will allow IRQ stacks to nest inside NMIs or similar entries
>> that can happen during IRQ stack setup or teardown.
>>
>> The Xen code here has a confusing comment.
>>
>> The new macros won't work correctly if they're invoked with IRQs on.
>> Add a check under CONFIG_DEBUG_ENTRY to detect that.
>>
>> Signed-off-by: Andy Lutomirski <luto@kernel.org>
>> ---
>>  arch/x86/Kconfig.debug       |  2 -
>>  arch/x86/entry/entry_64.S    | 89 +++++++++++++++++++++++++++++---------------
>>  arch/x86/kernel/process_64.c |  4 ++
>>  3 files changed, 63 insertions(+), 32 deletions(-)
>
> What does that apply against?
>
> I get conflicts against current Linus master:

It applies to the result of merging Linus' master with tip/x86/asm.  I
think it also applies to tip/x86/asm directly, but I haven't tried it.
It's an NMI-related thing (sort of), so it seemed sensible to develop
against something that has the other NMI fixes.

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
  2015-07-25  5:57 ` [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
  2015-07-26  6:05   ` Borislav Petkov
@ 2015-07-28  9:41   ` Borislav Petkov
  2015-08-17 11:18   ` Andy Shevchenko
  2 siblings, 0 replies; 13+ messages in thread
From: Borislav Petkov @ 2015-07-28  9:41 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Brian Gerst, Steven Rostedt, Willy Tarreau,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds

On Fri, Jul 24, 2015 at 10:57:04PM -0700, Andy Lutomirski wrote:
> This will allow IRQ stacks to nest inside NMIs or similar entries
> that can happen during IRQ stack setup or teardown.
> 
> The Xen code here has a confusing comment.
> 
> The new macros won't work correctly if they're invoked with IRQs on.
> Add a check under CONFIG_DEBUG_ENTRY to detect that.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/Kconfig.debug       |  2 -
>  arch/x86/entry/entry_64.S    | 89 +++++++++++++++++++++++++++++---------------
>  arch/x86/kernel/process_64.c |  4 ++
>  3 files changed, 63 insertions(+), 32 deletions(-)

Reviewed-by: Borislav Petkov <bp@suse.de>

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 2/3] x86/entry/64: Teach idtentry to use the IRQ stack
  2015-07-25  5:57 ` [PATCH v2 2/3] x86/entry/64: Teach idtentry to use the IRQ stack Andy Lutomirski
@ 2015-07-28  9:50   ` Borislav Petkov
  0 siblings, 0 replies; 13+ messages in thread
From: Borislav Petkov @ 2015-07-28  9:50 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Brian Gerst, Steven Rostedt, Willy Tarreau,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds

On Fri, Jul 24, 2015 at 10:57:05PM -0700, Andy Lutomirski wrote:
> We don't specifically need IST for things like kprobes, but we do
> want to avoid rare, surprising extra stack usage if a kprobe hits
> with a deep stack.
> 
> Teach idtentry to use the IRQ stack for selected entries.
> 
> This implementation uses the IRQ stack even if we entered from user
> mode.  This disallows tricks like ist_begin_non_atomic.  If we ever
> need such a trick in one of these entries, we can rework this.  For
> now, let's keep it simple.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/entry/entry_64.S | 14 +++++++++++++-
>  1 file changed, 13 insertions(+), 1 deletion(-)

Reviewed-by: Borislav Petkov <bp@suse.de>

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/3] x86/entry/64: Move #BP from IST to the IRQ stack
  2015-07-25  5:57 ` [PATCH v2 3/3] x86/entry/64: Move #BP from IST to " Andy Lutomirski
@ 2015-07-28  9:54   ` Borislav Petkov
  2015-07-29 17:57     ` Andy Lutomirski
  0 siblings, 1 reply; 13+ messages in thread
From: Borislav Petkov @ 2015-07-28  9:54 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Brian Gerst, Steven Rostedt, Willy Tarreau,
	Thomas Gleixner, Peter Zijlstra, Linus Torvalds

On Fri, Jul 24, 2015 at 10:57:06PM -0700, Andy Lutomirski wrote:
> There's nothing IST-worthy about #BP/int3.  We don't allow kprobes
> in the small handful of places in the kernel that run at CPL0 with
> an invalid stack, and 32-bit kernels have used normal interrupt
> gates for #BP forever.
> 
> Furthermore, we don't allow kprobes in places that have usergs while
> in kernel mode, so "paranoid" is also unnecessary.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/entry/entry_64.S |  2 +-
>  arch/x86/kernel/traps.c   | 26 +++++++++++++-------------
>  2 files changed, 14 insertions(+), 14 deletions(-)

...

> @@ -494,7 +494,15 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
>  	if (poke_int3_handler(regs))
>  		return;
>  
> +	/*
> +	 * Use ist_enter despite the fact that we don't use an IST stack.
> +	 * We can be called from a kprobe in non-CONTEXT_KERNEL kernel
> +	 * mode or even during context tracking state changes.
> +	 *
> +	 * This means that we can't schedule.  That's okay.
> +	 */
>  	ist_enter(regs);

Let's rename that thing. Call it atomic_ctxt_enter or whatever...

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/3] x86/entry/64: Move #BP from IST to the IRQ stack
  2015-07-28  9:54   ` Borislav Petkov
@ 2015-07-29 17:57     ` Andy Lutomirski
  2015-07-30  4:19       ` Borislav Petkov
  0 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-07-29 17:57 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Tue, Jul 28, 2015 at 2:54 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Fri, Jul 24, 2015 at 10:57:06PM -0700, Andy Lutomirski wrote:
>> There's nothing IST-worthy about #BP/int3.  We don't allow kprobes
>> in the small handful of places in the kernel that run at CPL0 with
>> an invalid stack, and 32-bit kernels have used normal interrupt
>> gates for #BP forever.
>>
>> Furthermore, we don't allow kprobes in places that have usergs while
>> in kernel mode, so "paranoid" is also unnecessary.
>>
>> Signed-off-by: Andy Lutomirski <luto@kernel.org>
>> ---
>>  arch/x86/entry/entry_64.S |  2 +-
>>  arch/x86/kernel/traps.c   | 26 +++++++++++++-------------
>>  2 files changed, 14 insertions(+), 14 deletions(-)
>
> ...
>
>> @@ -494,7 +494,15 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
>>       if (poke_int3_handler(regs))
>>               return;
>>
>> +     /*
>> +      * Use ist_enter despite the fact that we don't use an IST stack.
>> +      * We can be called from a kprobe in non-CONTEXT_KERNEL kernel
>> +      * mode or even during context tracking state changes.
>> +      *
>> +      * This means that we can't schedule.  That's okay.
>> +      */
>>       ist_enter(regs);
>
> Let's rename that thing. Call it atomic_ctxt_enter or whatever...
>

OK if I do that as a follow-up?  It would probably want to be a
separate patch anyway.

Hmm, I'm starting to like this new regime in which we never ever
switch to user mode from anywhere other than the standard kernel
stack.  It looks like even Xen may play along and do it cleanly soon
:)  Maybe I'll even add an assertion somewhere to make sure we don't
break it.  (I think this also means that the bad iret fixup can be
simplified.)

Also, with all this stuff applied (and the modify_ldt thing, once the
Xen folks figure out what's wrong), I think we can reinstate the old
LARL check for 16-bit segments and thus prevent naughty users from
banging on espfix using only sigreturn.

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 3/3] x86/entry/64: Move #BP from IST to the IRQ stack
  2015-07-29 17:57     ` Andy Lutomirski
@ 2015-07-30  4:19       ` Borislav Petkov
  0 siblings, 0 replies; 13+ messages in thread
From: Borislav Petkov @ 2015-07-30  4:19 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Thomas Gleixner, Peter Zijlstra,
	Linus Torvalds

On Wed, Jul 29, 2015 at 10:57:26AM -0700, Andy Lutomirski wrote:
> OK if I do that as a follow-up?  It would probably want to be a
> separate patch anyway.

Of course.

> Hmm, I'm starting to like this new regime in which we never ever
> switch to user mode from anywhere other than the standard kernel
> stack.  It looks like even Xen may play along and do it cleanly soon
> :)  Maybe I'll even add an assertion somewhere to make sure we don't
> break it.  (I think this also means that the bad iret fixup can be
> simplified.)

Definitely sounds like a nice, logical thing. We sometimes switch stacks
to land on the kernel stack before returning to user mode (IST and all)
but I guess that's a clean enough thing to do. Oh, and only a couple of
insns so yeah.

> Also, with all this stuff applied (and the modify_ldt thing, once the
> Xen folks figure out what's wrong), I think we can reinstate the old
> LARL check for 16-bit segments and thus prevent naughty users from
> banging on espfix using only sigreturn.

Uuh, and then only check ZF. I guess this should cover all the legacy
cases, which is nice.

Yeah, sounds coolio. :-)

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
  2015-07-25  5:57 ` [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
  2015-07-26  6:05   ` Borislav Petkov
  2015-07-28  9:41   ` Borislav Petkov
@ 2015-08-17 11:18   ` Andy Shevchenko
  2015-08-19 15:50     ` Andy Lutomirski
  2 siblings, 1 reply; 13+ messages in thread
From: Andy Shevchenko @ 2015-08-17 11:18 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, linux-kernel, Brian Gerst, Steven Rostedt, Willy Tarreau,
	Borislav Petkov, Thomas Gleixner, Peter Zijlstra, Linus Torvalds

On Sat, Jul 25, 2015 at 8:57 AM, Andy Lutomirski <luto@kernel.org> wrote:
> This will allow IRQ stacks to nest inside NMIs or similar entries
> that can happen during IRQ stack setup or teardown.
>
> The Xen code here has a confusing comment.
>
> The new macros won't work correctly if they're invoked with IRQs on.
> Add a check under CONFIG_DEBUG_ENTRY to detect that.
>

> +/*
> + * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
> + * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
> + * Requires kernel GSBASE.
> + *
> + * The invariant is that, if irq_count != 1, then the IRQ stack is in use.
> + */

I might be wrong, but shouldn't be this read as 'if irq_count != -1' ?

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
  2015-08-17 11:18   ` Andy Shevchenko
@ 2015-08-19 15:50     ` Andy Lutomirski
  0 siblings, 0 replies; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-19 15:50 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Andy Lutomirski, X86 ML, linux-kernel, Brian Gerst,
	Steven Rostedt, Willy Tarreau, Borislav Petkov, Thomas Gleixner,
	Peter Zijlstra, Linus Torvalds

On Mon, Aug 17, 2015 at 4:18 AM, Andy Shevchenko
<andy.shevchenko@gmail.com> wrote:
> On Sat, Jul 25, 2015 at 8:57 AM, Andy Lutomirski <luto@kernel.org> wrote:
>> This will allow IRQ stacks to nest inside NMIs or similar entries
>> that can happen during IRQ stack setup or teardown.
>>
>> The Xen code here has a confusing comment.
>>
>> The new macros won't work correctly if they're invoked with IRQs on.
>> Add a check under CONFIG_DEBUG_ENTRY to detect that.
>>
>
>> +/*
>> + * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
>> + * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
>> + * Requires kernel GSBASE.
>> + *
>> + * The invariant is that, if irq_count != 1, then the IRQ stack is in use.
>> + */
>
> I might be wrong, but shouldn't be this read as 'if irq_count != -1' ?
>

Indeed.  Will fix for the next version.

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-08-19 15:50 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-25  5:57 [PATCH v2 0/3] x86_64: Make int3 non-magical Andy Lutomirski
2015-07-25  5:57 ` [PATCH v2 1/3] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
2015-07-26  6:05   ` Borislav Petkov
2015-07-26 18:50     ` Andy Lutomirski
2015-07-28  9:41   ` Borislav Petkov
2015-08-17 11:18   ` Andy Shevchenko
2015-08-19 15:50     ` Andy Lutomirski
2015-07-25  5:57 ` [PATCH v2 2/3] x86/entry/64: Teach idtentry to use the IRQ stack Andy Lutomirski
2015-07-28  9:50   ` Borislav Petkov
2015-07-25  5:57 ` [PATCH v2 3/3] x86/entry/64: Move #BP from IST to " Andy Lutomirski
2015-07-28  9:54   ` Borislav Petkov
2015-07-29 17:57     ` Andy Lutomirski
2015-07-30  4:19       ` Borislav Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).