linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/14] x86/unwind/orc: ORC fixes
@ 2020-03-12 17:30 Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 01/14] x86/dumpstack: Add SHOW_REGS_IRET mode Josh Poimboeuf
                   ` (15 more replies)
  0 siblings, 16 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

Several ORC unwinder cleanups, fixes, and debug improvements.

Jann Horn (1):
  x86/entry/64: Fix unwind hints in rewind_stack_do_exit()

Josh Poimboeuf (12):
  x86/dumpstack: Add SHOW_REGS_IRET mode
  objtool: Fix stack offset tracking for indirect CFAs
  x86/entry/64: Fix unwind hints in register clearing code
  x86/entry/64: Fix unwind hints in kernel exit path
  x86/entry/64: Fix unwind hints in __switch_to_asm()
  x86/unwind/orc: Convert global variables to static
  x86/unwind: Prevent false warnings for non-current tasks
  x86/unwind/orc: Prevent unwinding before ORC initialization
  x86/unwind/orc: Fix error path for bad ORC entry type
  x86/unwind/orc: Fix premature unwind stoppage due to IRET frames
  x86/unwind/orc: Add more unwinder warnings
  x86/unwind/orc: Add 'unwind_debug' cmdline option

Miroslav Benes (1):
  x86/unwind/orc: Don't skip the first frame for inactive tasks

 .../admin-guide/kernel-parameters.txt         |   6 +
 arch/x86/entry/calling.h                      |  40 ++--
 arch/x86/entry/entry_64.S                     |  14 +-
 arch/x86/include/asm/kdebug.h                 |   1 +
 arch/x86/include/asm/unwind.h                 |   2 +-
 arch/x86/kernel/dumpstack.c                   |  27 +--
 arch/x86/kernel/dumpstack_64.c                |   3 +-
 arch/x86/kernel/process_64.c                  |   7 +-
 arch/x86/kernel/unwind_frame.c                |   3 +
 arch/x86/kernel/unwind_orc.c                  | 185 ++++++++++++++----
 tools/objtool/check.c                         |   2 +-
 11 files changed, 201 insertions(+), 89 deletions(-)

-- 
2.21.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 01/14] x86/dumpstack: Add SHOW_REGS_IRET mode
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-13 11:10   ` Miroslav Benes
  2020-03-12 17:30 ` [PATCH 02/14] objtool: Fix stack offset tracking for indirect CFAs Josh Poimboeuf
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

Now that __show_regs() has the concept of "modes" to indicate which
registers should be displayed, replace show_iret_regs() with a new
SHOW_REGS_IRET mode.  This is only a cleanup and doesn't change any
behavior.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/include/asm/kdebug.h |  1 +
 arch/x86/kernel/dumpstack.c   | 27 ++++++++++-----------------
 arch/x86/kernel/process_64.c  |  7 ++++++-
 3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 247ab14c6309..6112227368e7 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -23,6 +23,7 @@ enum die_val {
 };
 
 enum show_regs_mode {
+	SHOW_REGS_IRET,
 	SHOW_REGS_SHORT,
 	/*
 	 * For when userspace crashed, but we don't think it's our fault, and
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ae64ec7f752f..8a9ff25779ec 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -126,15 +126,8 @@ void show_ip(struct pt_regs *regs, const char *loglvl)
 	show_opcodes(regs, loglvl);
 }
 
-void show_iret_regs(struct pt_regs *regs)
-{
-	show_ip(regs, KERN_DEFAULT);
-	printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
-		regs->sp, regs->flags);
-}
-
 static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
-				  bool partial)
+				  bool iret_only)
 {
 	/*
 	 * These on_stack() checks aren't strictly necessary: the unwind code
@@ -145,17 +138,17 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
 	 * next stack, this function will be called again with the same regs so
 	 * they can be printed in the right context.
 	 */
-	if (!partial && on_stack(info, regs, sizeof(*regs))) {
+	if (!iret_only && on_stack(info, regs, sizeof(*regs))) {
 		__show_regs(regs, SHOW_REGS_SHORT);
 
-	} else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
-				       IRET_FRAME_SIZE)) {
+	} else if (iret_only && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+					 IRET_FRAME_SIZE)) {
 		/*
 		 * When an interrupt or exception occurs in entry code, the
 		 * full pt_regs might not have been saved yet.  In that case
 		 * just print the iret frame.
 		 */
-		show_iret_regs(regs);
+		__show_regs(regs, SHOW_REGS_IRET);
 	}
 }
 
@@ -166,13 +159,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	struct stack_info stack_info = {0};
 	unsigned long visit_mask = 0;
 	int graph_idx = 0;
-	bool partial = false;
+	bool iret_only = false;
 
 	printk("%sCall Trace:\n", log_lvl);
 
 	unwind_start(&state, task, regs, stack);
 	stack = stack ? : get_stack_pointer(task, regs);
-	regs = unwind_get_entry_regs(&state, &partial);
+	regs = unwind_get_entry_regs(&state, &iret_only);
 
 	/*
 	 * Iterate through the stacks, starting with the current stack pointer.
@@ -210,7 +203,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			printk("%s <%s>\n", log_lvl, stack_name);
 
 		if (regs)
-			show_regs_if_on_stack(&stack_info, regs, partial);
+			show_regs_if_on_stack(&stack_info, regs, iret_only);
 
 		/*
 		 * Scan the stack, printing any text addresses we find.  At the
@@ -269,9 +262,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			unwind_next_frame(&state);
 
 			/* if the frame has entry regs, print them */
-			regs = unwind_get_entry_regs(&state, &partial);
+			regs = unwind_get_entry_regs(&state, &iret_only);
 			if (regs)
-				show_regs_if_on_stack(&stack_info, regs, partial);
+				show_regs_if_on_stack(&stack_info, regs, iret_only);
 		}
 
 		if (stack_name)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ffd497804dbc..d1986a63d403 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -71,7 +71,12 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 	unsigned int fsindex, gsindex;
 	unsigned int ds, es;
 
-	show_iret_regs(regs);
+	show_ip(regs, KERN_DEFAULT);
+	printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
+		regs->sp, regs->flags);
+
+	if (mode == SHOW_REGS_IRET)
+		return;
 
 	if (regs->orig_ax != -1)
 		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 02/14] objtool: Fix stack offset tracking for indirect CFAs
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 01/14] x86/dumpstack: Add SHOW_REGS_IRET mode Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 03/14] x86/entry/64: Fix unwind hints in register clearing code Josh Poimboeuf
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner, Vegard Nossum, Joe Mario

When the current frame address (CFA) is stored on the stack (i.e.,
cfa->base == CFI_SP_INDIRECT), objtool neglects to adjust the stack
offset when there are subsequent pushes or pops.  This results in bad
ORC data at the end of the ENTER_IRQ_STACK macro, when it puts the
previous stack pointer on the stack and does a subsequent push.

This fixes the following unwinder warning:

  WARNING: can't dereference registers at 00000000f0a6bdba for ip interrupt_entry+0x9f/0xa0

Fixes: 627fce14809b ("objtool: Add ORC unwind table generation")
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Reported-by: Dave Jones <dsj@fb.com>
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Reported-by: Joe Mario <jmario@redhat.com>
---
 tools/objtool/check.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index e637a4a38d2a..fe906864e1c1 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1464,7 +1464,7 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
 	struct cfi_reg *cfa = &state->cfa;
 	struct stack_op *op = &insn->stack_op;
 
-	if (cfa->base != CFI_SP)
+	if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
 		return 0;
 
 	/* push */
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 03/14] x86/entry/64: Fix unwind hints in register clearing code
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 01/14] x86/dumpstack: Add SHOW_REGS_IRET mode Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 02/14] objtool: Fix stack offset tracking for indirect CFAs Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 19:29   ` Andy Lutomirski
  2020-03-12 17:30 ` [PATCH 04/14] x86/entry/64: Fix unwind hints in kernel exit path Josh Poimboeuf
                   ` (12 subsequent siblings)
  15 siblings, 1 reply; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

The PUSH_AND_CLEAR_REGS macro zeroes each register immediately after
pushing it.  If an NMI or exception hits after a register is cleared,
but before the UNWIND_HINT_REGS annotation, the ORC unwinder will
wrongly think the previous value of the register was zero.  This can
confuse the unwinding process and cause it to exit early.

Because ORC is simpler than DWARF, there are a limited number of unwind
annotation states, so it's not possible to add an individual unwind hint
after each push/clear combination.  Instead, the register clearing
instructions need to be consolidated and moved to after the
UNWIND_HINT_REGS annotation.

Fixes: 3f01daecd545 ("x86/entry/64: Introduce the PUSH_AND_CLEAN_REGS macro")
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/entry/calling.h | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 0789e13ece90..1c7f13bb6728 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
 #define SIZEOF_PTREGS	21*8
 
 .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
-	/*
-	 * Push registers and sanitize registers of values that a
-	 * speculation attack might otherwise want to exploit. The
-	 * lower registers are likely clobbered well before they
-	 * could be put to use in a speculative execution gadget.
-	 * Interleave XOR with PUSH for better uop scheduling:
-	 */
 	.if \save_ret
 	pushq	%rsi		/* pt_regs->si */
 	movq	8(%rsp), %rsi	/* temporarily store the return address in %rsi */
@@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
 	pushq   %rsi		/* pt_regs->si */
 	.endif
 	pushq	\rdx		/* pt_regs->dx */
-	xorl	%edx, %edx	/* nospec   dx */
 	pushq   %rcx		/* pt_regs->cx */
-	xorl	%ecx, %ecx	/* nospec   cx */
 	pushq   \rax		/* pt_regs->ax */
 	pushq   %r8		/* pt_regs->r8 */
-	xorl	%r8d, %r8d	/* nospec   r8 */
 	pushq   %r9		/* pt_regs->r9 */
-	xorl	%r9d, %r9d	/* nospec   r9 */
 	pushq   %r10		/* pt_regs->r10 */
-	xorl	%r10d, %r10d	/* nospec   r10 */
 	pushq   %r11		/* pt_regs->r11 */
-	xorl	%r11d, %r11d	/* nospec   r11*/
 	pushq	%rbx		/* pt_regs->rbx */
-	xorl    %ebx, %ebx	/* nospec   rbx*/
 	pushq	%rbp		/* pt_regs->rbp */
-	xorl    %ebp, %ebp	/* nospec   rbp*/
 	pushq	%r12		/* pt_regs->r12 */
-	xorl	%r12d, %r12d	/* nospec   r12*/
 	pushq	%r13		/* pt_regs->r13 */
-	xorl	%r13d, %r13d	/* nospec   r13*/
 	pushq	%r14		/* pt_regs->r14 */
-	xorl	%r14d, %r14d	/* nospec   r14*/
 	pushq	%r15		/* pt_regs->r15 */
-	xorl	%r15d, %r15d	/* nospec   r15*/
 	UNWIND_HINT_REGS
+
 	.if \save_ret
 	pushq	%rsi		/* return address on top of stack */
 	.endif
+
+	/*
+	 * Sanitize registers of values that a speculation attack might
+	 * otherwise want to exploit. The lower registers are likely clobbered
+	 * well before they could be put to use in a speculative execution
+	 * gadget.
+	 */
+	xorl	%edx,  %edx	/* nospec dx  */
+	xorl	%ecx,  %ecx	/* nospec cx  */
+	xorl	%r8d,  %r8d	/* nospec r8  */
+	xorl	%r9d,  %r9d	/* nospec r9  */
+	xorl	%r10d, %r10d	/* nospec r10 */
+	xorl	%r11d, %r11d	/* nospec r11 */
+	xorl	%ebx,  %ebx	/* nospec rbx */
+	xorl	%ebp,  %ebp	/* nospec rbp */
+	xorl	%r12d, %r12d	/* nospec r12 */
+	xorl	%r13d, %r13d	/* nospec r13 */
+	xorl	%r14d, %r14d	/* nospec r14 */
+	xorl	%r15d, %r15d	/* nospec r15 */
+
 .endm
 
 .macro POP_REGS pop_rdi=1 skip_r11rcx=0
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 04/14] x86/entry/64: Fix unwind hints in kernel exit path
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (2 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 03/14] x86/entry/64: Fix unwind hints in register clearing code Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 05/14] x86/entry/64: Fix unwind hints in __switch_to_asm() Josh Poimboeuf
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner, Dr. David Alan Gilbert, Joe Mario

In swapgs_restore_regs_and_return_to_usermode, after the stack is
switched to the trampoline stack, the existing UNWIND_HINT_REGS hint is
no longer valid, which can result in the following ORC unwinder warning:

  WARNING: can't dereference registers at 000000003aeb0cdd for ip swapgs_restore_regs_and_return_to_usermode+0x93/0xa0

For full correctness, we could try to add complicated unwind hints so
the unwinder could continue to find the registers, but when when it's
this close to kernel exit, unwind hints aren't really needed anymore and
it's fine to just use an empty hint which tells the unwinder to stop.

For consistency, also move the UNWIND_HINT_EMPTY in
entry_SYSCALL_64_after_hwframe to a similar location.

Fixes: 3e3b9293d392 ("x86/entry/64: Return to userspace from the trampoline stack")
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Reported-by: Dave Jones <dsj@fb.com>
Reported-by: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Reported-by: Joe Mario <jmario@redhat.com>
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/entry/entry_64.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f2bb91e87877..6a21d7bc60df 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
 	 */
 syscall_return_via_sysret:
 	/* rcx and r11 are already restored (see code above) */
-	UNWIND_HINT_EMPTY
 	POP_REGS pop_rdi=0 skip_r11rcx=1
 
 	/*
@@ -258,6 +257,7 @@ syscall_return_via_sysret:
 	 */
 	movq	%rsp, %rdi
 	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+	UNWIND_HINT_EMPTY
 
 	pushq	RSP-RDI(%rdi)	/* RSP */
 	pushq	(%rdi)		/* RDI */
@@ -637,6 +637,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
 	 */
 	movq	%rsp, %rdi
 	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+	UNWIND_HINT_EMPTY
 
 	/* Copy the IRET frame to the trampoline stack. */
 	pushq	6*8(%rdi)	/* SS */
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 05/14] x86/entry/64: Fix unwind hints in __switch_to_asm()
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (3 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 04/14] x86/entry/64: Fix unwind hints in kernel exit path Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 06/14] x86/entry/64: Fix unwind hints in rewind_stack_do_exit() Josh Poimboeuf
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

UNWIND_HINT_FUNC has some limitations: specifically, it doesn't reset
all the registers to undefined.  This causes objtool to get confused
about the RBP push in __switch_to_asm(), resulting in bad ORC data.

While __switch_to_asm() does do some stack magic, it's otherwise a
normal callable-from-C function, so just annotate it as a function,
which makes objtool happy and allows it to produces the correct hints
automatically.

Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations")
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/entry/entry_64.S | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 6a21d7bc60df..5d6dac54e487 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64)
  * %rdi: prev task
  * %rsi: next task
  */
-SYM_CODE_START(__switch_to_asm)
-	UNWIND_HINT_FUNC
+SYM_FUNC_START(__switch_to_asm)
 	/*
 	 * Save callee-saved registers
 	 * This must match the order in inactive_task_frame
@@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm)
 	popq	%rbp
 
 	jmp	__switch_to
-SYM_CODE_END(__switch_to_asm)
+SYM_FUNC_END(__switch_to_asm)
 
 /*
  * A newly forked process directly context switches into this address.
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 06/14] x86/entry/64: Fix unwind hints in rewind_stack_do_exit()
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (4 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 05/14] x86/entry/64: Fix unwind hints in __switch_to_asm() Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 07/14] x86/unwind/orc: Convert global variables to static Josh Poimboeuf
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

From: Jann Horn <jannh@google.com>

The leaq instruction in rewind_stack_do_exit moves the stack pointer
directly below the pt_regs at the top of the task stack before calling
do_exit(). Tell the unwinder to expect pt_regs.

Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/entry/entry_64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5d6dac54e487..c1eaeab0e0d4 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit)
 
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rax
 	leaq	-PTREGS_SIZE(%rax), %rsp
-	UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
+	UNWIND_HINT_REGS
 
 	call	do_exit
 SYM_CODE_END(rewind_stack_do_exit)
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 07/14] x86/unwind/orc: Convert global variables to static
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (5 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 06/14] x86/entry/64: Fix unwind hints in rewind_stack_do_exit() Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 08/14] x86/unwind: Prevent false warnings for non-current tasks Josh Poimboeuf
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

These variables aren't used outside of unwind_orc.c, make them static.

Also annotate some of them with '__ro_after_init', as applicable.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/unwind_orc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index e9cc182aa97e..64889da666f4 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -15,12 +15,12 @@ extern int __stop_orc_unwind_ip[];
 extern struct orc_entry __start_orc_unwind[];
 extern struct orc_entry __stop_orc_unwind[];
 
-static DEFINE_MUTEX(sort_mutex);
-int *cur_orc_ip_table = __start_orc_unwind_ip;
-struct orc_entry *cur_orc_table = __start_orc_unwind;
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
 
-unsigned int lookup_num_blocks;
-bool orc_init;
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
 
 static inline unsigned long orc_ip(const int *ip)
 {
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 08/14] x86/unwind: Prevent false warnings for non-current tasks
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (6 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 07/14] x86/unwind/orc: Convert global variables to static Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 09/14] x86/unwind/orc: Don't skip the first frame for inactive tasks Josh Poimboeuf
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

There's some daring kernel code out there which dumps the stack of
another task without first making sure the task is inactive.  If the
task happens to be running while the unwinder is reading the stack,
unusual unwinder warnings can result.

There's no race-free way for the unwinder to know whether such a warning
is legitimate, so just disable unwinder warnings for all non-current
tasks.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/dumpstack_64.c |  3 ++-
 arch/x86/kernel/unwind_frame.c |  3 +++
 arch/x86/kernel/unwind_orc.c   | 40 +++++++++++++++++++---------------
 3 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 87b97897a881..460ae7f66818 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -183,7 +183,8 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
 	 */
 	if (visit_mask) {
 		if (*visit_mask & (1UL << info->type)) {
-			printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+			if (task == current)
+				printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
 			goto unknown;
 		}
 		*visit_mask |= 1UL << info->type;
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a224b5ab103f..54226110bc7f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -344,6 +344,9 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (IS_ENABLED(CONFIG_X86_32))
 		goto the_end;
 
+	if (state->task != current)
+		goto the_end;
+
 	if (state->regs) {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 64889da666f4..45166fd50be3 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -8,7 +8,13 @@
 #include <asm/orc_lookup.h>
 
 #define orc_warn(fmt, ...) \
-	printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+	printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+#define orc_warn_current(args...)					\
+({									\
+	if (state->task == current)					\
+		orc_warn(args);						\
+})
 
 extern int __start_orc_unwind_ip[];
 extern int __stop_orc_unwind_ip[];
@@ -446,8 +452,8 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_R10:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg R10 at ip %pB\n",
-				 (void *)state->ip);
+			orc_warn_current("missing R10 value at %pB\n",
+					 (void *)state->ip);
 			goto err;
 		}
 		sp = state->regs->r10;
@@ -455,8 +461,8 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_R13:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg R13 at ip %pB\n",
-				 (void *)state->ip);
+			orc_warn_current("missing R13 value at %pB\n",
+					 (void *)state->ip);
 			goto err;
 		}
 		sp = state->regs->r13;
@@ -464,8 +470,8 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_DI:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg DI at ip %pB\n",
-				 (void *)state->ip);
+			orc_warn_current("missing RDI value at %pB\n",
+					 (void *)state->ip);
 			goto err;
 		}
 		sp = state->regs->di;
@@ -473,15 +479,15 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_DX:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg DX at ip %pB\n",
-				 (void *)state->ip);
+			orc_warn_current("missing DX value at %pB\n",
+					 (void *)state->ip);
 			goto err;
 		}
 		sp = state->regs->dx;
 		break;
 
 	default:
-		orc_warn("unknown SP base reg %d for ip %pB\n",
+		orc_warn("unknown SP base reg %d at %pB\n",
 			 orc->sp_reg, (void *)state->ip);
 		goto err;
 	}
@@ -509,8 +515,8 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_TYPE_REGS:
 		if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
-			orc_warn("can't dereference registers at %p for ip %pB\n",
-				 (void *)sp, (void *)orig_ip);
+			orc_warn_current("can't access registers at %pB\n",
+					 (void *)orig_ip);
 			goto err;
 		}
 
@@ -521,8 +527,8 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_TYPE_REGS_IRET:
 		if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
-			orc_warn("can't dereference iret registers at %p for ip %pB\n",
-				 (void *)sp, (void *)orig_ip);
+			orc_warn_current("can't access iret registers at %pB\n",
+					 (void *)orig_ip);
 			goto err;
 		}
 
@@ -532,7 +538,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+		orc_warn("unknown .orc_unwind entry type %d at %pB\n",
 			 orc->type, (void *)orig_ip);
 		break;
 	}
@@ -564,8 +570,8 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->stack_info.type == prev_type &&
 	    on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
 	    state->sp <= prev_sp) {
-		orc_warn("stack going in the wrong direction? ip=%pB\n",
-			 (void *)orig_ip);
+		orc_warn_current("stack going in the wrong direction? at %pB\n",
+				 (void *)orig_ip);
 		goto err;
 	}
 
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 09/14] x86/unwind/orc: Don't skip the first frame for inactive tasks
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (7 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 08/14] x86/unwind: Prevent false warnings for non-current tasks Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 10/14] x86/unwind/orc: Prevent unwinding before ORC initialization Josh Poimboeuf
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

From: Miroslav Benes <mbenes@suse.cz>

When unwinding an inactive task, the ORC unwinder skips the first frame
by default.  If both the 'regs' and 'first_frame' parameters of
unwind_start() are NULL, 'state->sp' and 'first_frame' are later
initialized to the same value for an inactive task.  Given there is a
"less than or equal to" comparison used at the end of __unwind_start()
for skipping stack frames, the first frame is skipped.

Drop the equal part of the comparison and make the behavior equivalent
to the frame pointer unwinder.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/unwind_orc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 45166fd50be3..e9f5a20c69c6 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -657,7 +657,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	/* Otherwise, skip ahead to the user-specified starting frame: */
 	while (!unwind_done(state) &&
 	       (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
-			state->sp <= (unsigned long)first_frame))
+			state->sp < (unsigned long)first_frame))
 		unwind_next_frame(state);
 
 	return;
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 10/14] x86/unwind/orc: Prevent unwinding before ORC initialization
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (8 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 09/14] x86/unwind/orc: Don't skip the first frame for inactive tasks Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-13 12:24   ` Miroslav Benes
  2020-03-12 17:30 ` [PATCH 11/14] x86/unwind/orc: Fix error path for bad ORC entry type Josh Poimboeuf
                   ` (5 subsequent siblings)
  15 siblings, 1 reply; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

If the unwinder is called before the ORC data has been initialized,
orc_find() returns NULL, and it tries to fall back to using frame
pointers.  This can cause some unexpected warnings during boot.

Move the 'orc_init' check from orc_find() to __unwind_init(), so that it
doesn't even try to unwind from an uninitialized state.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/unwind_orc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index e9f5a20c69c6..cb11567361cc 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -148,9 +148,6 @@ static struct orc_entry *orc_find(unsigned long ip)
 {
 	static struct orc_entry *orc;
 
-	if (!orc_init)
-		return NULL;
-
 	if (ip == 0)
 		return &null_orc_entry;
 
@@ -591,6 +588,9 @@ EXPORT_SYMBOL_GPL(unwind_next_frame);
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		    struct pt_regs *regs, unsigned long *first_frame)
 {
+	if (!orc_init)
+		goto done;
+
 	memset(state, 0, sizeof(*state));
 	state->task = task;
 
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 11/14] x86/unwind/orc: Fix error path for bad ORC entry type
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (9 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 10/14] x86/unwind/orc: Prevent unwinding before ORC initialization Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 12/14] x86/unwind/orc: Fix premature unwind stoppage due to IRET frames Josh Poimboeuf
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

If the ORC entry type is unknown, nothing else can be done other than
reporting an error.  Exit the function instead of breaking out of the
switch statement.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/unwind_orc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index cb11567361cc..33b80a7f998f 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -537,7 +537,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	default:
 		orc_warn("unknown .orc_unwind entry type %d at %pB\n",
 			 orc->type, (void *)orig_ip);
-		break;
+		goto err;
 	}
 
 	/* Find BP: */
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 12/14] x86/unwind/orc: Fix premature unwind stoppage due to IRET frames
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (10 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 11/14] x86/unwind/orc: Fix error path for bad ORC entry type Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 17:30 ` [PATCH 13/14] x86/unwind/orc: Add more unwinder warnings Josh Poimboeuf
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

The following execution path is possible:

  fsnotify()
    [ realign the stack and store previous SP in R10 ]
    <IRQ>
      [ only IRET regs saved ]
      common_interrupt()
        interrupt_entry()
	  <NMI>
	    [ full pt_regs saved ]
	    ...
	    [ unwind stack ]

When the unwinder goes through the NMI and the IRQ on the stack, and
then sees fsnotify(), it doesn't have access to the value of R10,
because it only has the five IRET registers.  So the unwind stops
prematurely.

However, because the interrupt_entry() code is careful not to clobber
R10 before saving the full regs, the unwinder should be able to read R10
from the previously saved full pt_regs associated with the NMI.

Handle this case properly.  When encountering an IRET regs frame
immediately after a full pt_regs frame, use the pt_regs as a backup
which can be used to get the C register values.

Also, note that a call frame resets the 'prev_regs' value, because a
function is free to clobber the registers.  For this fix to work, the
IRET and full regs frames must be adjacent, with no FUNC frames in
between.  So replace the FUNC hint in interrupt_entry() with an
IRET_REGS hint.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/entry/entry_64.S     |  4 +--
 arch/x86/include/asm/unwind.h |  2 +-
 arch/x86/kernel/unwind_orc.c  | 51 +++++++++++++++++++++++++++--------
 3 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c1eaeab0e0d4..faa53fee0663 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -511,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
  * +----------------------------------------------------+
  */
 SYM_CODE_START(interrupt_entry)
-	UNWIND_HINT_FUNC
+	UNWIND_HINT_IRET_REGS offset=16
 	ASM_CLAC
 	cld
 
@@ -543,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
 	pushq	5*8(%rdi)		/* regs->eflags */
 	pushq	4*8(%rdi)		/* regs->cs */
 	pushq	3*8(%rdi)		/* regs->ip */
+	UNWIND_HINT_IRET_REGS
 	pushq	2*8(%rdi)		/* regs->orig_ax */
 	pushq	8(%rdi)			/* return address */
-	UNWIND_HINT_FUNC
 
 	movq	(%rdi), %rdi
 	jmp	2f
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 499578f7e6d7..70fc159ebe69 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -19,7 +19,7 @@ struct unwind_state {
 #if defined(CONFIG_UNWINDER_ORC)
 	bool signal, full_regs;
 	unsigned long sp, bp, ip;
-	struct pt_regs *regs;
+	struct pt_regs *regs, *prev_regs;
 #elif defined(CONFIG_UNWINDER_FRAME_POINTER)
 	bool got_irq;
 	unsigned long *bp, *orig_sp, ip;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 33b80a7f998f..0ebc11a8bb45 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -384,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
 	return true;
 }
 
+/*
+ * If state->regs is non-NULL, and points to a full pt_regs, just get the reg
+ * value from state->regs.
+ *
+ * Otherwise, if state->regs just points to IRET regs, and the previous frame
+ * had full regs, it's safe to get the value from the previous regs.  This can
+ * happen when early/late IRQ entry code gets interrupted by an NMI.
+ */
+static bool get_reg(struct unwind_state *state, unsigned int reg_off,
+		    unsigned long *val)
+{
+	unsigned int reg = reg_off/8;
+
+	if (!state->regs)
+		return false;
+
+	if (state->full_regs) {
+		*val = ((unsigned long *)state->regs)[reg];
+		return true;
+	}
+
+	if (state->prev_regs) {
+		*val = ((unsigned long *)state->prev_regs)[reg];
+		return true;
+	}
+
+	return false;
+}
+
 bool unwind_next_frame(struct unwind_state *state)
 {
-	unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
+	unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
 	enum stack_type prev_type = state->stack_info.type;
 	struct orc_entry *orc;
 	bool indirect = false;
@@ -448,39 +477,35 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	case ORC_REG_R10:
-		if (!state->regs || !state->full_regs) {
+		if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
 			orc_warn_current("missing R10 value at %pB\n",
 					 (void *)state->ip);
 			goto err;
 		}
-		sp = state->regs->r10;
 		break;
 
 	case ORC_REG_R13:
-		if (!state->regs || !state->full_regs) {
+		if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
 			orc_warn_current("missing R13 value at %pB\n",
 					 (void *)state->ip);
 			goto err;
 		}
-		sp = state->regs->r13;
 		break;
 
 	case ORC_REG_DI:
-		if (!state->regs || !state->full_regs) {
+		if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
 			orc_warn_current("missing RDI value at %pB\n",
 					 (void *)state->ip);
 			goto err;
 		}
-		sp = state->regs->di;
 		break;
 
 	case ORC_REG_DX:
-		if (!state->regs || !state->full_regs) {
+		if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
 			orc_warn_current("missing DX value at %pB\n",
 					 (void *)state->ip);
 			goto err;
 		}
-		sp = state->regs->dx;
 		break;
 
 	default:
@@ -507,6 +532,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 		state->sp = sp;
 		state->regs = NULL;
+		state->prev_regs = NULL;
 		state->signal = false;
 		break;
 
@@ -518,6 +544,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		}
 
 		state->regs = (struct pt_regs *)sp;
+		state->prev_regs = NULL;
 		state->full_regs = true;
 		state->signal = true;
 		break;
@@ -529,6 +556,8 @@ bool unwind_next_frame(struct unwind_state *state)
 			goto err;
 		}
 
+		if (state->full_regs)
+			state->prev_regs = state->regs;
 		state->regs = (void *)sp - IRET_FRAME_OFFSET;
 		state->full_regs = false;
 		state->signal = true;
@@ -543,8 +572,8 @@ bool unwind_next_frame(struct unwind_state *state)
 	/* Find BP: */
 	switch (orc->bp_reg) {
 	case ORC_REG_UNDEFINED:
-		if (state->regs && state->full_regs)
-			state->bp = state->regs->bp;
+		if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
+			state->bp = tmp;
 		break;
 
 	case ORC_REG_PREV_SP:
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 13/14] x86/unwind/orc: Add more unwinder warnings
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (11 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 12/14] x86/unwind/orc: Fix premature unwind stoppage due to IRET frames Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 19:12   ` Jann Horn
  2020-03-12 17:30 ` [PATCH 14/14] x86/unwind/orc: Add 'unwind_debug' cmdline option Josh Poimboeuf
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

Make sure warnings are displayed for all error scenarios (except when
encountering an empty unwind hint).

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/unwind_orc.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 0ebc11a8bb45..4118013a574a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -450,8 +450,15 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	/* End-of-stack check for kernel threads: */
 	if (orc->sp_reg == ORC_REG_UNDEFINED) {
-		if (!orc->end)
+		if (!orc->end) {
+			/*
+			 * This is reported as an error for the caller, but
+			 * otherwise it isn't worth warning about.  In theory
+			 * it can only happen when hitting UNWIND_HINT_EMPTY in
+			 * entry code, close to a kernel exit point.
+			 */
 			goto err;
+		}
 
 		goto the_end;
 	}
@@ -515,8 +522,11 @@ bool unwind_next_frame(struct unwind_state *state)
 	}
 
 	if (indirect) {
-		if (!deref_stack_reg(state, sp, &sp))
+		if (!deref_stack_reg(state, sp, &sp)) {
+			orc_warn_current("can't access indirect SP at %pB\n",
+					 (void *)state->ip);
 			goto err;
+		}
 	}
 
 	/* Find IP, SP and possibly regs: */
@@ -524,8 +534,11 @@ bool unwind_next_frame(struct unwind_state *state)
 	case ORC_TYPE_CALL:
 		ip_p = sp - sizeof(long);
 
-		if (!deref_stack_reg(state, ip_p, &state->ip))
+		if (!deref_stack_reg(state, ip_p, &state->ip)) {
+			orc_warn_current("can't access call return IP (0x%lx) at %pB\n",
+					 ip_p, (void *)orig_ip);
 			goto err;
+		}
 
 		state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
 						  state->ip, (void *)ip_p);
@@ -577,13 +590,19 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	case ORC_REG_PREV_SP:
-		if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
+		if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp)) {
+			orc_warn_current("can't access BP (from SP) at %pB\n",
+					 (void *)orig_ip);
 			goto err;
+		}
 		break;
 
 	case ORC_REG_BP:
-		if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
+		if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp)) {
+			orc_warn_current("can't access BP at %pB\n",
+					 (void *)orig_ip);
 			goto err;
+		}
 		break;
 
 	default:
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 14/14] x86/unwind/orc: Add 'unwind_debug' cmdline option
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (12 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 13/14] x86/unwind/orc: Add more unwinder warnings Josh Poimboeuf
@ 2020-03-12 17:30 ` Josh Poimboeuf
  2020-03-12 19:15 ` [PATCH 00/14] x86/unwind/orc: ORC fixes Peter Zijlstra
  2020-03-13 14:00 ` Miroslav Benes
  15 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 17:30 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

Sometimes the one-line ORC unwinder warnings aren't very helpful.  Add a
new 'unwind_debug' cmdline option which will dump the full stack
contents of the current task when an error condition is encountered.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 .../admin-guide/kernel-parameters.txt         |  6 +++
 arch/x86/kernel/unwind_orc.c                  | 49 ++++++++++++++++++-
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1b7d3ad2a35..fd7d71b908b2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5098,6 +5098,12 @@
 	unknown_nmi_panic
 			[X86] Cause panic on unknown NMI.
 
+	unwind_debug	[X86-64]
+			Enable unwinder debug output.  This can be
+			useful for debugging certain unwinder error
+			conditions, including corrupt stacks and
+			bad/missing unwinder metadata.
+
 	usbcore.authorized_default=
 			[USB] Default USB device authorization:
 			(default -1 = authorized except for wireless USB,
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 4118013a574a..139b476f848a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -12,8 +12,13 @@
 
 #define orc_warn_current(args...)					\
 ({									\
-	if (state->task == current)					\
+	static bool dumped_before;					\
+	if (state->task == current) {					\
 		orc_warn(args);						\
+		if (unwind_debug && !dumped_before)			\
+			unwind_dump(state);				\
+		dumped_before = true;					\
+	}								\
 })
 
 extern int __start_orc_unwind_ip[];
@@ -22,11 +27,53 @@ extern struct orc_entry __start_orc_unwind[];
 extern struct orc_entry __stop_orc_unwind[];
 
 static bool orc_init __ro_after_init;
+static bool unwind_debug __ro_after_init;
 static unsigned int lookup_num_blocks __ro_after_init;
 
 static DEFINE_MUTEX(sort_mutex);
 static int *cur_orc_ip_table = __start_orc_unwind_ip;
 static struct orc_entry *cur_orc_table = __start_orc_unwind;
+static unsigned int lookup_num_blocks __ro_after_init;
+
+static int __init unwind_debug_cmdline(char *str)
+{
+	unwind_debug = true;
+
+	return 0;
+}
+early_param("unwind_debug", unwind_debug_cmdline);
+
+static void unwind_dump(struct unwind_state *state)
+{
+	static bool dumped_before;
+	unsigned long word, *sp;
+	struct stack_info stack_info = {0};
+	unsigned long visit_mask = 0;
+
+	if (dumped_before)
+		return;
+
+	dumped_before = true;
+
+	printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n",
+			state->stack_info.type, state->stack_info.next_sp,
+			state->stack_mask, state->graph_idx);
+
+	for (sp = __builtin_frame_address(0); sp;
+	     sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+		if (get_stack_info(sp, state->task, &stack_info, &visit_mask))
+			break;
+
+		for (; sp < stack_info.end; sp++) {
+
+			word = READ_ONCE_NOCHECK(*sp);
+
+			printk_deferred("%0*lx: %0*lx (%pB)\n", BITS_PER_LONG/4,
+					(unsigned long)sp, BITS_PER_LONG/4,
+					word, (void *)word);
+		}
+	}
+}
 
 static inline unsigned long orc_ip(const int *ip)
 {
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 13/14] x86/unwind/orc: Add more unwinder warnings
  2020-03-12 17:30 ` [PATCH 13/14] x86/unwind/orc: Add more unwinder warnings Josh Poimboeuf
@ 2020-03-12 19:12   ` Jann Horn
  2020-03-12 19:20     ` Josh Poimboeuf
  0 siblings, 1 reply; 25+ messages in thread
From: Jann Horn @ 2020-03-12 19:12 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: the arch/x86 maintainers, kernel list, Peter Zijlstra,
	Vince Weaver, Dave Jones, Miroslav Benes, Andy Lutomirski,
	Steven Rostedt, Thomas Gleixner

On Thu, Mar 12, 2020 at 6:31 PM Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> Make sure warnings are displayed for all error scenarios (except when
> encountering an empty unwind hint).
[...]
>         /* End-of-stack check for kernel threads: */
>         if (orc->sp_reg == ORC_REG_UNDEFINED) {
> -               if (!orc->end)
> +               if (!orc->end) {
> +                       /*
> +                        * This is reported as an error for the caller, but
> +                        * otherwise it isn't worth warning about.  In theory
> +                        * it can only happen when hitting UNWIND_HINT_EMPTY in
> +                        * entry code, close to a kernel exit point.
> +                        */
>                         goto err;

But UNWIND_HINT_EMPTY sets end=1, right? And this is the branch for
end==0. What am I missing?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 00/14] x86/unwind/orc: ORC fixes
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (13 preceding siblings ...)
  2020-03-12 17:30 ` [PATCH 14/14] x86/unwind/orc: Add 'unwind_debug' cmdline option Josh Poimboeuf
@ 2020-03-12 19:15 ` Peter Zijlstra
  2020-03-13 14:00 ` Miroslav Benes
  15 siblings, 0 replies; 25+ messages in thread
From: Peter Zijlstra @ 2020-03-12 19:15 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: x86, linux-kernel, Vince Weaver, Dave Jones, Jann Horn,
	Miroslav Benes, Andy Lutomirski, Steven Rostedt, Thomas Gleixner

On Thu, Mar 12, 2020 at 12:30:19PM -0500, Josh Poimboeuf wrote:
> Several ORC unwinder cleanups, fixes, and debug improvements.
> 
> Jann Horn (1):
>   x86/entry/64: Fix unwind hints in rewind_stack_do_exit()
> 
> Josh Poimboeuf (12):
>   x86/dumpstack: Add SHOW_REGS_IRET mode
>   objtool: Fix stack offset tracking for indirect CFAs
>   x86/entry/64: Fix unwind hints in register clearing code
>   x86/entry/64: Fix unwind hints in kernel exit path
>   x86/entry/64: Fix unwind hints in __switch_to_asm()
>   x86/unwind/orc: Convert global variables to static
>   x86/unwind: Prevent false warnings for non-current tasks
>   x86/unwind/orc: Prevent unwinding before ORC initialization
>   x86/unwind/orc: Fix error path for bad ORC entry type
>   x86/unwind/orc: Fix premature unwind stoppage due to IRET frames
>   x86/unwind/orc: Add more unwinder warnings
>   x86/unwind/orc: Add 'unwind_debug' cmdline option
> 
> Miroslav Benes (1):
>   x86/unwind/orc: Don't skip the first frame for inactive tasks

Looks like good stuff..

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 13/14] x86/unwind/orc: Add more unwinder warnings
  2020-03-12 19:12   ` Jann Horn
@ 2020-03-12 19:20     ` Josh Poimboeuf
  0 siblings, 0 replies; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 19:20 UTC (permalink / raw)
  To: Jann Horn
  Cc: the arch/x86 maintainers, kernel list, Peter Zijlstra,
	Vince Weaver, Dave Jones, Miroslav Benes, Andy Lutomirski,
	Steven Rostedt, Thomas Gleixner

On Thu, Mar 12, 2020 at 08:12:16PM +0100, Jann Horn wrote:
> On Thu, Mar 12, 2020 at 6:31 PM Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> > Make sure warnings are displayed for all error scenarios (except when
> > encountering an empty unwind hint).
> [...]
> >         /* End-of-stack check for kernel threads: */
> >         if (orc->sp_reg == ORC_REG_UNDEFINED) {
> > -               if (!orc->end)
> > +               if (!orc->end) {
> > +                       /*
> > +                        * This is reported as an error for the caller, but
> > +                        * otherwise it isn't worth warning about.  In theory
> > +                        * it can only happen when hitting UNWIND_HINT_EMPTY in
> > +                        * entry code, close to a kernel exit point.
> > +                        */
> >                         goto err;
> 
> But UNWIND_HINT_EMPTY sets end=1, right? And this is the branch for
> end==0. What am I missing?

You're right.  I need to revisit that comment...

-- 
Josh


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 03/14] x86/entry/64: Fix unwind hints in register clearing code
  2020-03-12 17:30 ` [PATCH 03/14] x86/entry/64: Fix unwind hints in register clearing code Josh Poimboeuf
@ 2020-03-12 19:29   ` Andy Lutomirski
  2020-03-12 19:57     ` Josh Poimboeuf
  2020-03-12 21:24     ` Jann Horn
  0 siblings, 2 replies; 25+ messages in thread
From: Andy Lutomirski @ 2020-03-12 19:29 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: x86, linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner





> On Mar 12, 2020, at 10:31 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> 
> The PUSH_AND_CLEAR_REGS macro zeroes each register immediately after
> pushing it.  If an NMI or exception hits after a register is cleared,
> but before the UNWIND_HINT_REGS annotation, the ORC unwinder will
> wrongly think the previous value of the register was zero.  This can
> confuse the unwinding process and cause it to exit early.
> 
> Because ORC is simpler than DWARF, there are a limited number of unwind
> annotation states, so it's not possible to add an individual unwind hint
> after each push/clear combination.  Instead, the register clearing
> instructions need to be consolidated and moved to after the
> UNWIND_HINT_REGS annotation.

I don’t suppose you know how bad t he performance hit is on a non-PTI machine?

> 
> Fixes: 3f01daecd545 ("x86/entry/64: Introduce the PUSH_AND_CLEAN_REGS macro")
> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
> ---
> arch/x86/entry/calling.h | 40 +++++++++++++++++++++-------------------
> 1 file changed, 21 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
> index 0789e13ece90..1c7f13bb6728 100644
> --- a/arch/x86/entry/calling.h
> +++ b/arch/x86/entry/calling.h
> @@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
> #define SIZEOF_PTREGS    21*8
> 
> .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
> -    /*
> -     * Push registers and sanitize registers of values that a
> -     * speculation attack might otherwise want to exploit. The
> -     * lower registers are likely clobbered well before they
> -     * could be put to use in a speculative execution gadget.
> -     * Interleave XOR with PUSH for better uop scheduling:
> -     */
>    .if \save_ret
>    pushq    %rsi        /* pt_regs->si */
>    movq    8(%rsp), %rsi    /* temporarily store the return address in %rsi */
> @@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
>    pushq   %rsi        /* pt_regs->si */
>    .endif
>    pushq    \rdx        /* pt_regs->dx */
> -    xorl    %edx, %edx    /* nospec   dx */
>    pushq   %rcx        /* pt_regs->cx */
> -    xorl    %ecx, %ecx    /* nospec   cx */
>    pushq   \rax        /* pt_regs->ax */
>    pushq   %r8        /* pt_regs->r8 */
> -    xorl    %r8d, %r8d    /* nospec   r8 */
>    pushq   %r9        /* pt_regs->r9 */
> -    xorl    %r9d, %r9d    /* nospec   r9 */
>    pushq   %r10        /* pt_regs->r10 */
> -    xorl    %r10d, %r10d    /* nospec   r10 */
>    pushq   %r11        /* pt_regs->r11 */
> -    xorl    %r11d, %r11d    /* nospec   r11*/
>    pushq    %rbx        /* pt_regs->rbx */
> -    xorl    %ebx, %ebx    /* nospec   rbx*/
>    pushq    %rbp        /* pt_regs->rbp */
> -    xorl    %ebp, %ebp    /* nospec   rbp*/
>    pushq    %r12        /* pt_regs->r12 */
> -    xorl    %r12d, %r12d    /* nospec   r12*/
>    pushq    %r13        /* pt_regs->r13 */
> -    xorl    %r13d, %r13d    /* nospec   r13*/
>    pushq    %r14        /* pt_regs->r14 */
> -    xorl    %r14d, %r14d    /* nospec   r14*/
>    pushq    %r15        /* pt_regs->r15 */
> -    xorl    %r15d, %r15d    /* nospec   r15*/
>    UNWIND_HINT_REGS
> +
>    .if \save_ret
>    pushq    %rsi        /* return address on top of stack */
>    .endif
> +
> +    /*
> +     * Sanitize registers of values that a speculation attack might
> +     * otherwise want to exploit. The lower registers are likely clobbered
> +     * well before they could be put to use in a speculative execution
> +     * gadget.
> +     */
> +    xorl    %edx,  %edx    /* nospec dx  */
> +    xorl    %ecx,  %ecx    /* nospec cx  */
> +    xorl    %r8d,  %r8d    /* nospec r8  */
> +    xorl    %r9d,  %r9d    /* nospec r9  */
> +    xorl    %r10d, %r10d    /* nospec r10 */
> +    xorl    %r11d, %r11d    /* nospec r11 */
> +    xorl    %ebx,  %ebx    /* nospec rbx */
> +    xorl    %ebp,  %ebp    /* nospec rbp */
> +    xorl    %r12d, %r12d    /* nospec r12 */
> +    xorl    %r13d, %r13d    /* nospec r13 */
> +    xorl    %r14d, %r14d    /* nospec r14 */
> +    xorl    %r15d, %r15d    /* nospec r15 */
> +
> .endm
> 
> .macro POP_REGS pop_rdi=1 skip_r11rcx=0
> -- 
> 2.21.1
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 03/14] x86/entry/64: Fix unwind hints in register clearing code
  2020-03-12 19:29   ` Andy Lutomirski
@ 2020-03-12 19:57     ` Josh Poimboeuf
  2020-03-12 20:07       ` Peter Zijlstra
  2020-03-12 21:24     ` Jann Horn
  1 sibling, 1 reply; 25+ messages in thread
From: Josh Poimboeuf @ 2020-03-12 19:57 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: x86, linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

On Thu, Mar 12, 2020 at 12:29:29PM -0700, Andy Lutomirski wrote:
> > On Mar 12, 2020, at 10:31 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> > 
> > The PUSH_AND_CLEAR_REGS macro zeroes each register immediately after
> > pushing it.  If an NMI or exception hits after a register is cleared,
> > but before the UNWIND_HINT_REGS annotation, the ORC unwinder will
> > wrongly think the previous value of the register was zero.  This can
> > confuse the unwinding process and cause it to exit early.
> > 
> > Because ORC is simpler than DWARF, there are a limited number of unwind
> > annotation states, so it's not possible to add an individual unwind hint
> > after each push/clear combination.  Instead, the register clearing
> > instructions need to be consolidated and moved to after the
> > UNWIND_HINT_REGS annotation.
> 
> I don’t suppose you know how bad t he performance hit is on a non-PTI machine?

Hm, what does it have to do with PTI?  Should I run a syscall
microbenchmark?

-- 
Josh


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 03/14] x86/entry/64: Fix unwind hints in register clearing code
  2020-03-12 19:57     ` Josh Poimboeuf
@ 2020-03-12 20:07       ` Peter Zijlstra
  0 siblings, 0 replies; 25+ messages in thread
From: Peter Zijlstra @ 2020-03-12 20:07 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Andy Lutomirski, x86, linux-kernel, Vince Weaver, Dave Jones,
	Jann Horn, Miroslav Benes, Andy Lutomirski, Steven Rostedt,
	Thomas Gleixner

On Thu, Mar 12, 2020 at 02:57:14PM -0500, Josh Poimboeuf wrote:
> On Thu, Mar 12, 2020 at 12:29:29PM -0700, Andy Lutomirski wrote:
> > > On Mar 12, 2020, at 10:31 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> > > 
> > > The PUSH_AND_CLEAR_REGS macro zeroes each register immediately after
> > > pushing it.  If an NMI or exception hits after a register is cleared,
> > > but before the UNWIND_HINT_REGS annotation, the ORC unwinder will
> > > wrongly think the previous value of the register was zero.  This can
> > > confuse the unwinding process and cause it to exit early.
> > > 
> > > Because ORC is simpler than DWARF, there are a limited number of unwind
> > > annotation states, so it's not possible to add an individual unwind hint
> > > after each push/clear combination.  Instead, the register clearing
> > > instructions need to be consolidated and moved to after the
> > > UNWIND_HINT_REGS annotation.
> > 
> > I don’t suppose you know how bad t he performance hit is on a non-PTI machine?
> 
> Hm, what does it have to do with PTI?  Should I run a syscall
> microbenchmark?

Mostly that performance with PTI on is abysmal so we don't care about a
few cycles.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 03/14] x86/entry/64: Fix unwind hints in register clearing code
  2020-03-12 19:29   ` Andy Lutomirski
  2020-03-12 19:57     ` Josh Poimboeuf
@ 2020-03-12 21:24     ` Jann Horn
  1 sibling, 0 replies; 25+ messages in thread
From: Jann Horn @ 2020-03-12 21:24 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Josh Poimboeuf, the arch/x86 maintainers, kernel list,
	Peter Zijlstra, Vince Weaver, Dave Jones, Miroslav Benes,
	Andy Lutomirski, Steven Rostedt, Thomas Gleixner

On Thu, Mar 12, 2020 at 8:29 PM Andy Lutomirski <luto@amacapital.net> wrote:
> > On Mar 12, 2020, at 10:31 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> > The PUSH_AND_CLEAR_REGS macro zeroes each register immediately after
> > pushing it.  If an NMI or exception hits after a register is cleared,
> > but before the UNWIND_HINT_REGS annotation, the ORC unwinder will
> > wrongly think the previous value of the register was zero.  This can
> > confuse the unwinding process and cause it to exit early.
> >
> > Because ORC is simpler than DWARF, there are a limited number of unwind
> > annotation states, so it's not possible to add an individual unwind hint
> > after each push/clear combination.  Instead, the register clearing
> > instructions need to be consolidated and moved to after the
> > UNWIND_HINT_REGS annotation.
>
> I don’t suppose you know how bad t he performance hit is on a non-PTI machine?
[...]
> > +    xorl    %edx,  %edx    /* nospec dx  */
> > +    xorl    %ecx,  %ecx    /* nospec cx  */
> > +    xorl    %r8d,  %r8d    /* nospec r8  */
> > +    xorl    %r9d,  %r9d    /* nospec r9  */
> > +    xorl    %r10d, %r10d    /* nospec r10 */
> > +    xorl    %r11d, %r11d    /* nospec r11 */
> > +    xorl    %ebx,  %ebx    /* nospec rbx */
> > +    xorl    %ebp,  %ebp    /* nospec rbp */
> > +    xorl    %r12d, %r12d    /* nospec r12 */
> > +    xorl    %r13d, %r13d    /* nospec r13 */
> > +    xorl    %r14d, %r14d    /* nospec r14 */
> > +    xorl    %r15d, %r15d    /* nospec r15 */

I'm curious what the reason for a performance impact would be. Are you
worried about the performance of the instruction decoder or the
renamer? As long as those aren't slowed down, this will just end up
giving the store uops to the backend earlier, right? (Since the XOR
instructions shouldn't reach the backend at all AKAIU.)

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 01/14] x86/dumpstack: Add SHOW_REGS_IRET mode
  2020-03-12 17:30 ` [PATCH 01/14] x86/dumpstack: Add SHOW_REGS_IRET mode Josh Poimboeuf
@ 2020-03-13 11:10   ` Miroslav Benes
  0 siblings, 0 replies; 25+ messages in thread
From: Miroslav Benes @ 2020-03-13 11:10 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: x86, linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Andy Lutomirski, Steven Rostedt, Thomas Gleixner

On Thu, 12 Mar 2020, Josh Poimboeuf wrote:

> Now that __show_regs() has the concept of "modes" to indicate which
> registers should be displayed, replace show_iret_regs() with a new
> SHOW_REGS_IRET mode.  This is only a cleanup and doesn't change any
> behavior.
> 
> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
> ---
>  arch/x86/include/asm/kdebug.h |  1 +
>  arch/x86/kernel/dumpstack.c   | 27 ++++++++++-----------------
>  arch/x86/kernel/process_64.c  |  7 ++++++-
>  3 files changed, 17 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
> index 247ab14c6309..6112227368e7 100644
> --- a/arch/x86/include/asm/kdebug.h
> +++ b/arch/x86/include/asm/kdebug.h
> @@ -23,6 +23,7 @@ enum die_val {
>  };
>  
>  enum show_regs_mode {
> +	SHOW_REGS_IRET,
>  	SHOW_REGS_SHORT,
>  	/*
>  	 * For when userspace crashed, but we don't think it's our fault, and
> diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
> index ae64ec7f752f..8a9ff25779ec 100644
> --- a/arch/x86/kernel/dumpstack.c
> +++ b/arch/x86/kernel/dumpstack.c
> @@ -126,15 +126,8 @@ void show_ip(struct pt_regs *regs, const char *loglvl)
>  	show_opcodes(regs, loglvl);
>  }
>  
> -void show_iret_regs(struct pt_regs *regs)
> -{
> -	show_ip(regs, KERN_DEFAULT);
> -	printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
> -		regs->sp, regs->flags);
> -}
> -

There is also declaration of show_iret_regs() in 
arch/x86/include/asm/kdebug.h which can be removed now.

Miroslav

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 10/14] x86/unwind/orc: Prevent unwinding before ORC initialization
  2020-03-12 17:30 ` [PATCH 10/14] x86/unwind/orc: Prevent unwinding before ORC initialization Josh Poimboeuf
@ 2020-03-13 12:24   ` Miroslav Benes
  0 siblings, 0 replies; 25+ messages in thread
From: Miroslav Benes @ 2020-03-13 12:24 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: x86, linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Andy Lutomirski, Steven Rostedt, Thomas Gleixner

On Thu, 12 Mar 2020, Josh Poimboeuf wrote:

> If the unwinder is called before the ORC data has been initialized,
> orc_find() returns NULL, and it tries to fall back to using frame
> pointers.  This can cause some unexpected warnings during boot.
> 
> Move the 'orc_init' check from orc_find() to __unwind_init(), so that it

s/__unwind_init()/__unwind_start()/

>  void __unwind_start(struct unwind_state *state, struct task_struct *task,
>  		    struct pt_regs *regs, unsigned long *first_frame)
>  {
> +	if (!orc_init)
> +		goto done;
> +
>  	memset(state, 0, sizeof(*state));
>  	state->task = task;

Miroslav

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 00/14] x86/unwind/orc: ORC fixes
  2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
                   ` (14 preceding siblings ...)
  2020-03-12 19:15 ` [PATCH 00/14] x86/unwind/orc: ORC fixes Peter Zijlstra
@ 2020-03-13 14:00 ` Miroslav Benes
  15 siblings, 0 replies; 25+ messages in thread
From: Miroslav Benes @ 2020-03-13 14:00 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: x86, linux-kernel, Peter Zijlstra, Vince Weaver, Dave Jones,
	Jann Horn, Andy Lutomirski, Steven Rostedt, Thomas Gleixner

On Thu, 12 Mar 2020, Josh Poimboeuf wrote:

> Several ORC unwinder cleanups, fixes, and debug improvements.
> 
> Jann Horn (1):
>   x86/entry/64: Fix unwind hints in rewind_stack_do_exit()
> 
> Josh Poimboeuf (12):
>   x86/dumpstack: Add SHOW_REGS_IRET mode
>   objtool: Fix stack offset tracking for indirect CFAs
>   x86/entry/64: Fix unwind hints in register clearing code
>   x86/entry/64: Fix unwind hints in kernel exit path
>   x86/entry/64: Fix unwind hints in __switch_to_asm()
>   x86/unwind/orc: Convert global variables to static
>   x86/unwind: Prevent false warnings for non-current tasks
>   x86/unwind/orc: Prevent unwinding before ORC initialization
>   x86/unwind/orc: Fix error path for bad ORC entry type
>   x86/unwind/orc: Fix premature unwind stoppage due to IRET frames
>   x86/unwind/orc: Add more unwinder warnings
>   x86/unwind/orc: Add 'unwind_debug' cmdline option
> 
> Miroslav Benes (1):
>   x86/unwind/orc: Don't skip the first frame for inactive tasks
> 
>  .../admin-guide/kernel-parameters.txt         |   6 +
>  arch/x86/entry/calling.h                      |  40 ++--
>  arch/x86/entry/entry_64.S                     |  14 +-
>  arch/x86/include/asm/kdebug.h                 |   1 +
>  arch/x86/include/asm/unwind.h                 |   2 +-
>  arch/x86/kernel/dumpstack.c                   |  27 +--
>  arch/x86/kernel/dumpstack_64.c                |   3 +-
>  arch/x86/kernel/process_64.c                  |   7 +-
>  arch/x86/kernel/unwind_frame.c                |   3 +
>  arch/x86/kernel/unwind_orc.c                  | 185 ++++++++++++++----
>  tools/objtool/check.c                         |   2 +-
>  11 files changed, 201 insertions(+), 89 deletions(-)

Apart from the two nits I mentioned and Jann's comment on comment, it 
looks good to me.

Reviewed-by: Miroslav Benes <mbenes@suse.cz>

M

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2020-03-13 14:00 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-12 17:30 [PATCH 00/14] x86/unwind/orc: ORC fixes Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 01/14] x86/dumpstack: Add SHOW_REGS_IRET mode Josh Poimboeuf
2020-03-13 11:10   ` Miroslav Benes
2020-03-12 17:30 ` [PATCH 02/14] objtool: Fix stack offset tracking for indirect CFAs Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 03/14] x86/entry/64: Fix unwind hints in register clearing code Josh Poimboeuf
2020-03-12 19:29   ` Andy Lutomirski
2020-03-12 19:57     ` Josh Poimboeuf
2020-03-12 20:07       ` Peter Zijlstra
2020-03-12 21:24     ` Jann Horn
2020-03-12 17:30 ` [PATCH 04/14] x86/entry/64: Fix unwind hints in kernel exit path Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 05/14] x86/entry/64: Fix unwind hints in __switch_to_asm() Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 06/14] x86/entry/64: Fix unwind hints in rewind_stack_do_exit() Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 07/14] x86/unwind/orc: Convert global variables to static Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 08/14] x86/unwind: Prevent false warnings for non-current tasks Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 09/14] x86/unwind/orc: Don't skip the first frame for inactive tasks Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 10/14] x86/unwind/orc: Prevent unwinding before ORC initialization Josh Poimboeuf
2020-03-13 12:24   ` Miroslav Benes
2020-03-12 17:30 ` [PATCH 11/14] x86/unwind/orc: Fix error path for bad ORC entry type Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 12/14] x86/unwind/orc: Fix premature unwind stoppage due to IRET frames Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 13/14] x86/unwind/orc: Add more unwinder warnings Josh Poimboeuf
2020-03-12 19:12   ` Jann Horn
2020-03-12 19:20     ` Josh Poimboeuf
2020-03-12 17:30 ` [PATCH 14/14] x86/unwind/orc: Add 'unwind_debug' cmdline option Josh Poimboeuf
2020-03-12 19:15 ` [PATCH 00/14] x86/unwind/orc: ORC fixes Peter Zijlstra
2020-03-13 14:00 ` Miroslav Benes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).