All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] irqstack changes for Josh
@ 2017-06-30 15:56 Andy Lutomirski
  2017-06-30 15:56 ` [PATCH 1/2] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Andy Lutomirski @ 2017-06-30 15:56 UTC (permalink / raw)
  To: X86 ML; +Cc: linux-kernel, Borislav Petkov, Josh Poimboeuf, Andy Lutomirski

Patch 1 consolidates code and patch 2 fixes a glitch that Josh noticed.
These could go straight to -tip, but it may make more sense for Josh
to add them to his undwarf series.

P.S. Should undwarf be called balrog instead?  It seems that Josh has
been delving very deeply indeed lately.

Andy Lutomirski (2):
  x86/entry/64: Refactor IRQ stacks and make them NMI-safe
  x86/entry/64: Initialize the top of the IRQ stack before switching
    stacks

 arch/x86/Kconfig.debug       |   2 -
 arch/x86/entry/entry_64.S    | 107 +++++++++++++++++++++++++++++++++----------
 arch/x86/kernel/process_64.c |   3 ++
 3 files changed, 86 insertions(+), 26 deletions(-)

-- 
2.9.4

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] x86/entry/64: Refactor IRQ stacks and make them NMI-safe
  2017-06-30 15:56 [PATCH 0/2] irqstack changes for Josh Andy Lutomirski
@ 2017-06-30 15:56 ` Andy Lutomirski
  2017-06-30 15:56 ` [PATCH 2/2] x86/entry/64: Initialize the top of the IRQ stack before switching stacks Andy Lutomirski
  2017-06-30 16:37 ` [PATCH 0/2] irqstack changes for Josh Josh Poimboeuf
  2 siblings, 0 replies; 4+ messages in thread
From: Andy Lutomirski @ 2017-06-30 15:56 UTC (permalink / raw)
  To: X86 ML; +Cc: linux-kernel, Borislav Petkov, Josh Poimboeuf, Andy Lutomirski

This will allow IRQ stacks to nest inside NMIs or similar entries
that can happen during IRQ stack setup or teardown.

The new macros won't work correctly if they're invoked with IRQs on.
Add a check under CONFIG_DEBUG_ENTRY to detect that.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/Kconfig.debug       |  2 --
 arch/x86/entry/entry_64.S    | 85 +++++++++++++++++++++++++++++++-------------
 arch/x86/kernel/process_64.c |  3 ++
 3 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index fcb7604172ce..353ed09f5fba 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
 	  Some of these sanity checks may slow down kernel entries and
 	  exits or otherwise impact performance.
 
-	  This is currently used to help test NMI code.
-
 	  If unsure, say N.
 
 config DEBUG_NMI_SELFTEST
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4a4c0834f965..3ace85965242 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -446,6 +446,59 @@ ENTRY(irq_entries_start)
     .endr
 END(irq_entries_start)
 
+.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+#ifdef CONFIG_DEBUG_ENTRY
+	pushfq
+	testl $X86_EFLAGS_IF, (%rsp)
+	jz .Lokay_\@
+	ud2
+.Lokay_\@:
+	addq $8, %rsp
+#endif
+.endm
+
+/*
+ * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
+ * Requires kernel GSBASE.
+ *
+ * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
+ */
+.macro ENTER_IRQ_STACK old_rsp
+	DEBUG_ENTRY_ASSERT_IRQS_OFF
+	movq	%rsp, \old_rsp
+	incl	PER_CPU_VAR(irq_count)
+
+	/*
+	 * Right now, if we just incremented irq_count to zero, we've
+	 * claimed the IRQ stack but we haven't switched to it yet.
+	 *
+	 * If anything is added that can interrupt us here without using IST,
+	 * it must be *extremely* careful to limit its stack usage.  This
+	 * could include kprobes and a hypothetical future IST-less #DB
+	 * handler.
+	 */
+
+	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
+	pushq	\old_rsp
+.endm
+
+/*
+ * Undoes ENTER_IRQ_STACK.
+ */
+.macro LEAVE_IRQ_STACK
+	DEBUG_ENTRY_ASSERT_IRQS_OFF
+	/* We need to be off the IRQ stack before decrementing irq_count. */
+	popq	%rsp
+
+	/*
+	 * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
+	 * the irq stack but we're not on it.
+	 */
+
+	decl	PER_CPU_VAR(irq_count)
+.endm
+
 /*
  * Interrupt entry/exit.
  *
@@ -484,17 +537,7 @@ END(irq_entries_start)
 	CALL_enter_from_user_mode
 
 1:
-	/*
-	 * Save previous stack pointer, optionally switch to interrupt stack.
-	 * irq_count is used to check if a CPU is already on an interrupt stack
-	 * or not. While this is essentially redundant with preempt_count it is
-	 * a little cheaper to use a separate counter in the PDA (short of
-	 * moving irq_enter into assembly, which would be too much work)
-	 */
-	movq	%rsp, %rdi
-	incl	PER_CPU_VAR(irq_count)
-	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
-	pushq	%rdi
+	ENTER_IRQ_STACK old_rsp=%rdi
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 
@@ -514,10 +557,8 @@ common_interrupt:
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
-	decl	PER_CPU_VAR(irq_count)
 
-	/* Restore saved previous stack */
-	popq	%rsp
+	LEAVE_IRQ_STACK
 
 	testb	$3, CS(%rsp)
 	jz	retint_kernel
@@ -890,12 +931,10 @@ bad_gs:
 ENTRY(do_softirq_own_stack)
 	pushq	%rbp
 	mov	%rsp, %rbp
-	incl	PER_CPU_VAR(irq_count)
-	cmove	PER_CPU_VAR(irq_stack_ptr), %rsp
-	push	%rbp				/* frame pointer backlink */
+	ENTER_IRQ_STACK old_rsp=%r11
 	call	__do_softirq
+	LEAVE_IRQ_STACK
 	leaveq
-	decl	PER_CPU_VAR(irq_count)
 	ret
 END(do_softirq_own_stack)
 
@@ -922,13 +961,11 @@ ENTRY(xen_do_hypervisor_callback)		/* do_hypervisor_callback(struct *pt_regs) */
  * see the correct pointer to the pt_regs
  */
 	movq	%rdi, %rsp			/* we don't return, adjust the stack frame */
-11:	incl	PER_CPU_VAR(irq_count)
-	movq	%rsp, %rbp
-	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
-	pushq	%rbp				/* frame pointer backlink */
+
+	ENTER_IRQ_STACK old_rsp=%r11
 	call	xen_evtchn_do_upcall
-	popq	%rsp
-	decl	PER_CPU_VAR(irq_count)
+	LEAVE_IRQ_STACK
+
 #ifndef CONFIG_PREEMPT
 	call	xen_maybe_preempt_hcall
 #endif
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b6840bf3940b..1d910d97c1c6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -279,6 +279,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	unsigned prev_fsindex, prev_gsindex;
 
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
+		     this_cpu_read(irq_count) != -1);
+
 	switch_fpu_prepare(prev_fpu, cpu);
 
 	/* We must save %fs and %gs before load_TLS() because
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] x86/entry/64: Initialize the top of the IRQ stack before switching stacks
  2017-06-30 15:56 [PATCH 0/2] irqstack changes for Josh Andy Lutomirski
  2017-06-30 15:56 ` [PATCH 1/2] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
@ 2017-06-30 15:56 ` Andy Lutomirski
  2017-06-30 16:37 ` [PATCH 0/2] irqstack changes for Josh Josh Poimboeuf
  2 siblings, 0 replies; 4+ messages in thread
From: Andy Lutomirski @ 2017-06-30 15:56 UTC (permalink / raw)
  To: X86 ML; +Cc: linux-kernel, Borislav Petkov, Josh Poimboeuf, Andy Lutomirski

The OOPS unwinder wants the word at the top of the IRQ stack to
point back to the previous stack at all times when the IRQ stack
is in use.  There's currently a one-instruction window in ENTER_IRQ_STACK
during which this isn't the case.  Fix it by writing the old RSP to the
top of the IRQ stack before jumping.

This currently writes the pointer to the stack twice, which is a bit
ugly.  We could get rid of this by replacing irq_stack_ptr with
irq_stack_ptr_minus_eight (better name welcome).  OTOH, there may be
all kinds of odd microarchitectural considerations in play that
affect performance by a few cycles here.

Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/entry_64.S | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3ace85965242..f914a9207bcc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -468,6 +468,7 @@ END(irq_entries_start)
 	DEBUG_ENTRY_ASSERT_IRQS_OFF
 	movq	%rsp, \old_rsp
 	incl	PER_CPU_VAR(irq_count)
+	jnz	.Lirq_stack_push_old_rsp_\@
 
 	/*
 	 * Right now, if we just incremented irq_count to zero, we've
@@ -477,9 +478,30 @@ END(irq_entries_start)
 	 * it must be *extremely* careful to limit its stack usage.  This
 	 * could include kprobes and a hypothetical future IST-less #DB
 	 * handler.
+	 *
+	 * The OOPS unwinder relies on the word at the top of the IRQ
+	 * stack linking back to the previous RSP for the entire time we're
+	 * on the IRQ stack.  For this to work reliably, we need to write
+	 * it before we actually move ourselves to the IRQ stack.
+	 */
+
+	movq	\old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
+	movq	PER_CPU_VAR(irq_stack_ptr), %rsp
+
+#ifdef CONFIG_DEBUG_ENTRY
+	/*
+	 * If the first movq above becomes wrong due to IRQ stack layout
+	 * changes, the only way we'll notice is if we try to unwind right
+	 * here.  Assert that we set up the stack right to catch this type
+	 * of bug quickly.
 	 */
+	cmpq	-8(%rsp), \old_rsp
+	je	.Lirq_stack_okay\@
+	ud2
+	.Lirq_stack_okay\@:
+#endif
 
-	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
+.Lirq_stack_push_old_rsp_\@:
 	pushq	\old_rsp
 .endm
 
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 0/2] irqstack changes for Josh
  2017-06-30 15:56 [PATCH 0/2] irqstack changes for Josh Andy Lutomirski
  2017-06-30 15:56 ` [PATCH 1/2] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
  2017-06-30 15:56 ` [PATCH 2/2] x86/entry/64: Initialize the top of the IRQ stack before switching stacks Andy Lutomirski
@ 2017-06-30 16:37 ` Josh Poimboeuf
  2 siblings, 0 replies; 4+ messages in thread
From: Josh Poimboeuf @ 2017-06-30 16:37 UTC (permalink / raw)
  To: Andy Lutomirski; +Cc: X86 ML, linux-kernel, Borislav Petkov

On Fri, Jun 30, 2017 at 08:56:31AM -0700, Andy Lutomirski wrote:
> Patch 1 consolidates code and patch 2 fixes a glitch that Josh noticed.
> These could go straight to -tip, but it may make more sense for Josh
> to add them to his undwarf series.
> 
> P.S. Should undwarf be called balrog instead?  It seems that Josh has
> been delving very deeply indeed lately.

Who cares about that entry code stuff.  I like the Balrog idea.  The top
contenders in my mind are:

- Balrog unwinder
- Orc unwinder
- Undwarf unwinder

-- 
Josh

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-06-30 16:37 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-30 15:56 [PATCH 0/2] irqstack changes for Josh Andy Lutomirski
2017-06-30 15:56 ` [PATCH 1/2] x86/entry/64: Refactor IRQ stacks and make them NMI-safe Andy Lutomirski
2017-06-30 15:56 ` [PATCH 2/2] x86/entry/64: Initialize the top of the IRQ stack before switching stacks Andy Lutomirski
2017-06-30 16:37 ` [PATCH 0/2] irqstack changes for Josh Josh Poimboeuf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.