All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] x86/dumpstack: print entry regs when dumping the stack
@ 2016-10-20 16:34 Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 1/6] x86/entry/unwind: create stack frames for saved interrupt registers Josh Poimboeuf
                   ` (5 more replies)
  0 siblings, 6 replies; 13+ messages in thread
From: Josh Poimboeuf @ 2016-10-20 16:34 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin
  Cc: x86, linux-kernel, Andy Lutomirski, Linus Torvalds, Brian Gerst,
	Peter Zijlstra

Here's the next round of dumpstack-related patches.  This adds a
mechanism to identify any pt_regs on the stack so they can be printed in
the stack dump.

Josh Poimboeuf (6):
  x86/entry/unwind: create stack frames for saved interrupt registers
  x86/unwind: create stack frames for saved syscall registers
  x86/dumpstack: print stack identifier on its own line
  x86/dumpstack: print any pt_regs found on the stack
  x86/dumpstack: fix duplicate RIP address display in __show_regs()
  x86/dumpstack: print orig_ax in __show_regs()

 arch/x86/entry/calling.h       |  20 ++++++++
 arch/x86/entry/entry_32.S      |  33 ++++++++++--
 arch/x86/entry/entry_64.S      |  10 ++--
 arch/x86/include/asm/unwind.h  |  16 +++++-
 arch/x86/kernel/dumpstack.c    |  24 +++++++--
 arch/x86/kernel/process_64.c   |  11 ++--
 arch/x86/kernel/unwind_frame.c | 111 ++++++++++++++++++++++++++++++++++++++---
 7 files changed, 201 insertions(+), 24 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/6] x86/entry/unwind: create stack frames for saved interrupt registers
  2016-10-20 16:34 [PATCH 0/6] x86/dumpstack: print entry regs when dumping the stack Josh Poimboeuf
@ 2016-10-20 16:34 ` Josh Poimboeuf
  2016-10-21  7:49   ` [tip:x86/asm] x86/entry/unwind: Create " tip-bot for Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 2/6] x86/unwind: create stack frames for saved syscall registers Josh Poimboeuf
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Josh Poimboeuf @ 2016-10-20 16:34 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin
  Cc: x86, linux-kernel, Andy Lutomirski, Linus Torvalds, Brian Gerst,
	Peter Zijlstra

With frame pointers, when a task is interrupted, its stack is no longer
completely reliable because the function could have been interrupted
before it had a chance to save the previous frame pointer on the stack.
So the caller of the interrupted function could get skipped by a stack
trace.

This is problematic for live patching, which needs to know whether a
stack trace of a sleeping task can be relied upon.  There's currently no
way to detect if a sleeping task was interrupted by a page fault
exception or preemption before it went to sleep.

Another issue is that when dumping the stack of an interrupted task, the
unwinder has no way of knowing where the saved pt_regs registers are, so
it can't print them.

This solves those issues by encoding the pt_regs pointer in the frame
pointer on entry from an interrupt or an exception.

This patch also updates the unwinder to be able to decode it, because
otherwise the unwinder would be broken by this change.

Note that this causes a change in the behavior of the unwinder: each
instance of a pt_regs on the stack is now considered a "frame".  So
callers of unwind_get_return_address() will now get an occasional
'regs->ip' address that would have previously been skipped over.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/entry/calling.h       | 20 +++++++++++
 arch/x86/entry/entry_32.S      | 33 +++++++++++++++---
 arch/x86/entry/entry_64.S      | 10 ++++--
 arch/x86/include/asm/unwind.h  | 16 ++++++++-
 arch/x86/kernel/unwind_frame.c | 76 +++++++++++++++++++++++++++++++++++++-----
 5 files changed, 139 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index b493c6a..c598d7e 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -201,6 +201,26 @@ For 32-bit we have the following conventions - kernel is built with
 	.byte 0xf1
 	.endm
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+#ifdef CONFIG_FRAME_POINTER
+	.if \ptregs_offset
+		leaq \ptregs_offset(%rsp), %rbp
+	.else
+		mov %rsp, %rbp
+	.endif
+	orq	$0x1, %rbp
+#endif
+.endm
+
 #endif /* CONFIG_X86_64 */
 
 /*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2225105..acc0c6f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -176,6 +176,22 @@
 	SET_KERNEL_GS %edx
 .endm
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original rbp.
+ */
+.macro ENCODE_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
+	mov %esp, %ebp
+	orl $0x1, %ebp
+#endif
+.endm
+
 .macro RESTORE_INT_REGS
 	popl	%ebx
 	popl	%ecx
@@ -641,6 +657,7 @@ common_interrupt:
 	ASM_CLAC
 	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	movl	%esp, %eax
 	call	do_IRQ
@@ -652,6 +669,7 @@ ENTRY(name)				\
 	ASM_CLAC;			\
 	pushl	$~(nr);			\
 	SAVE_ALL;			\
+	ENCODE_FRAME_POINTER;		\
 	TRACE_IRQS_OFF			\
 	movl	%esp, %eax;		\
 	call	fn;			\
@@ -786,6 +804,7 @@ END(spurious_interrupt_bug)
 ENTRY(xen_hypervisor_callback)
 	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 
 	/*
@@ -840,6 +859,7 @@ ENTRY(xen_failsafe_callback)
 	jmp	iret_exc
 5:	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	jmp	ret_from_exception
 
 .section .fixup, "ax"
@@ -1067,6 +1087,7 @@ common_exception:
 	pushl	%edx
 	pushl	%ecx
 	pushl	%ebx
+	ENCODE_FRAME_POINTER
 	cld
 	movl	$(__KERNEL_PERCPU), %ecx
 	movl	%ecx, %fs
@@ -1099,6 +1120,7 @@ ENTRY(debug)
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	xorl	%edx, %edx			# error code 0
 	movl	%esp, %eax			# pt_regs pointer
 
@@ -1114,11 +1136,11 @@ ENTRY(debug)
 
 .Ldebug_from_sysenter_stack:
 	/* We're on the SYSENTER stack.  Switch off. */
-	movl	%esp, %ebp
+	movl	%esp, %ebx
 	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
 	TRACE_IRQS_OFF
 	call	do_debug
-	movl	%ebp, %esp
+	movl	%ebx, %esp
 	jmp	ret_from_exception
 END(debug)
 
@@ -1141,6 +1163,7 @@ ENTRY(nmi)
 
 	pushl	%eax				# pt_regs->orig_ax
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	xorl	%edx, %edx			# zero error code
 	movl	%esp, %eax			# pt_regs pointer
 
@@ -1159,10 +1182,10 @@ ENTRY(nmi)
 	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
 	 * is using the thread stack right now, so it's safe for us to use it.
 	 */
-	movl	%esp, %ebp
+	movl	%esp, %ebx
 	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
 	call	do_nmi
-	movl	%ebp, %esp
+	movl	%ebx, %esp
 	jmp	.Lrestore_all_notrace
 
 #ifdef CONFIG_X86_ESPFIX32
@@ -1179,6 +1202,7 @@ ENTRY(nmi)
 	.endr
 	pushl	%eax
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	FIXUP_ESPFIX_STACK			# %eax == %esp
 	xorl	%edx, %edx			# zero error code
 	call	do_nmi
@@ -1192,6 +1216,7 @@ ENTRY(int3)
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	xorl	%edx, %edx			# zero error code
 	movl	%esp, %eax			# pt_regs pointer
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ef766a3..65fad8a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -469,6 +469,7 @@ END(irq_entries_start)
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
 	SAVE_EXTRA_REGS
+	ENCODE_FRAME_POINTER
 
 	testb	$3, CS(%rsp)
 	jz	1f
@@ -985,6 +986,7 @@ ENTRY(xen_failsafe_callback)
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
 	SAVE_EXTRA_REGS
+	ENCODE_FRAME_POINTER
 	jmp	error_exit
 END(xen_failsafe_callback)
 
@@ -1028,6 +1030,7 @@ ENTRY(paranoid_entry)
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
+	ENCODE_FRAME_POINTER 8
 	movl	$1, %ebx
 	movl	$MSR_GS_BASE, %ecx
 	rdmsr
@@ -1075,6 +1078,7 @@ ENTRY(error_entry)
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
+	ENCODE_FRAME_POINTER 8
 	xorl	%ebx, %ebx
 	testb	$3, CS+8(%rsp)
 	jz	.Lerror_kernelspace
@@ -1257,6 +1261,7 @@ ENTRY(nmi)
 	pushq	%r13		/* pt_regs->r13 */
 	pushq	%r14		/* pt_regs->r14 */
 	pushq	%r15		/* pt_regs->r15 */
+	ENCODE_FRAME_POINTER
 
 	/*
 	 * At this point we no longer need to worry about stack damage
@@ -1270,11 +1275,10 @@ ENTRY(nmi)
 
 	/*
 	 * Return back to user mode.  We must *not* do the normal exit
-	 * work, because we don't want to enable interrupts.  Fortunately,
-	 * do_nmi doesn't modify pt_regs.
+	 * work, because we don't want to enable interrupts.
 	 */
 	SWAPGS
-	jmp	restore_c_regs_and_iret
+	jmp	restore_regs_and_iret
 
 .Lnmi_from_kernel:
 	/*
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 46de9ac..c5a7f3a 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -13,6 +13,7 @@ struct unwind_state {
 	int graph_idx;
 #ifdef CONFIG_FRAME_POINTER
 	unsigned long *bp;
+	struct pt_regs *regs;
 #else
 	unsigned long *sp;
 #endif
@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
 	if (unwind_done(state))
 		return NULL;
 
-	return state->bp + 1;
+	return state->regs ? &state->regs->ip : state->bp + 1;
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return NULL;
+
+	return state->regs;
 }
 
 #else /* !CONFIG_FRAME_POINTER */
@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
 	return NULL;
 }
 
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_FRAME_POINTER */
 
 #endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a2456d4..2221ab1 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -14,6 +14,9 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	if (unwind_done(state))
 		return 0;
 
+	if (state->regs && user_mode(state->regs))
+		return 0;
+
 	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
 				     addr_p);
 
@@ -21,6 +24,20 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
+/*
+ * This determines if the frame pointer actually contains an encoded pointer to
+ * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
+ */
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+	unsigned long regs = (unsigned long)bp;
+
+	if (!(regs & 0x1))
+		return NULL;
+
+	return (struct pt_regs *)(regs & ~0x1);
+}
+
 static bool update_stack_state(struct unwind_state *state, void *addr,
 			       size_t len)
 {
@@ -43,26 +60,59 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
 
 bool unwind_next_frame(struct unwind_state *state)
 {
-	unsigned long *next_bp;
+	struct pt_regs *regs;
+	unsigned long *next_bp, *next_frame;
+	size_t next_len;
 
 	if (unwind_done(state))
 		return false;
 
-	next_bp = (unsigned long *)*state->bp;
+	/* have we reached the end? */
+	if (state->regs && user_mode(state->regs))
+		goto the_end;
+
+	/* get the next frame pointer */
+	if (state->regs)
+		next_bp = (unsigned long *)state->regs->bp;
+	else
+		next_bp = (unsigned long *)*state->bp;
+
+	/* is the next frame pointer an encoded pointer to pt_regs? */
+	regs = decode_frame_pointer(next_bp);
+	if (regs) {
+		next_frame = (unsigned long *)regs;
+		next_len = sizeof(*regs);
+	} else {
+		next_frame = next_bp;
+		next_len = FRAME_HEADER_SIZE;
+	}
 
 	/* make sure the next frame's data is accessible */
-	if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
+	if (!update_stack_state(state, next_frame, next_len))
 		return false;
-
 	/* move to the next frame */
-	state->bp = next_bp;
+	if (regs) {
+		state->regs = regs;
+		state->bp = NULL;
+	} else {
+		state->bp = next_bp;
+		state->regs = NULL;
+	}
+
 	return true;
+
+the_end:
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return false;
 }
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		    struct pt_regs *regs, unsigned long *first_frame)
 {
+	unsigned long *bp, *frame;
+	size_t len;
+
 	memset(state, 0, sizeof(*state));
 	state->task = task;
 
@@ -73,12 +123,22 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	}
 
 	/* set up the starting stack frame */
-	state->bp = get_frame_pointer(task, regs);
+	bp = get_frame_pointer(task, regs);
+	regs = decode_frame_pointer(bp);
+	if (regs) {
+		state->regs = regs;
+		frame = (unsigned long *)regs;
+		len = sizeof(*regs);
+	} else {
+		state->bp = bp;
+		frame = bp;
+		len = FRAME_HEADER_SIZE;
+	}
 
 	/* initialize stack info and make sure the frame data is accessible */
-	get_stack_info(state->bp, state->task, &state->stack_info,
+	get_stack_info(frame, state->task, &state->stack_info,
 		       &state->stack_mask);
-	update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
+	update_stack_state(state, frame, len);
 
 	/*
 	 * The caller can provide the address of the first frame directly
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/6] x86/unwind: create stack frames for saved syscall registers
  2016-10-20 16:34 [PATCH 0/6] x86/dumpstack: print entry regs when dumping the stack Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 1/6] x86/entry/unwind: create stack frames for saved interrupt registers Josh Poimboeuf
@ 2016-10-20 16:34 ` Josh Poimboeuf
  2016-10-21  7:49   ` [tip:x86/asm] x86/unwind: Create " tip-bot for Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 3/6] x86/dumpstack: print stack identifier on its own line Josh Poimboeuf
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Josh Poimboeuf @ 2016-10-20 16:34 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin
  Cc: x86, linux-kernel, Andy Lutomirski, Linus Torvalds, Brian Gerst,
	Peter Zijlstra

The entry code doesn't encode the pt_regs pointer for syscalls.  But the
pt_regs are always at the same location, so we can add a manual check
for them.

A later patch prints them as part of the oops stack dump.  They could be
useful, for example, to determine the arguments to a system call.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/unwind_frame.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 2221ab1..5795427 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -24,6 +24,14 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
+static bool is_last_task_frame(struct unwind_state *state)
+{
+	unsigned long bp = (unsigned long)state->bp;
+	unsigned long regs = (unsigned long)task_pt_regs(state->task);
+
+	return bp == regs - FRAME_HEADER_SIZE;
+}
+
 /*
  * This determines if the frame pointer actually contains an encoded pointer to
  * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
@@ -71,6 +79,33 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->regs && user_mode(state->regs))
 		goto the_end;
 
+	if (is_last_task_frame(state)) {
+		regs = task_pt_regs(state->task);
+
+		/*
+		 * kthreads (other than the boot CPU's idle thread) have some
+		 * partial regs at the end of their stack which were placed
+		 * there by copy_thread_tls().  But the regs don't have any
+		 * useful information, so we can skip them.
+		 *
+		 * This user_mode() check is slightly broader than a PF_KTHREAD
+		 * check because it also catches the awkward situation where a
+		 * newly forked kthread transitions into a user task by calling
+		 * do_execve(), which eventually clears PF_KTHREAD.
+		 */
+		if (!user_mode(regs))
+			goto the_end;
+
+		/*
+		 * We're almost at the end, but not quite: there's still the
+		 * syscall regs frame.  Entry code doesn't encode the regs
+		 * pointer for syscalls, so we have to set it manually.
+		 */
+		state->regs = regs;
+		state->bp = NULL;
+		return true;
+	}
+
 	/* get the next frame pointer */
 	if (state->regs)
 		next_bp = (unsigned long *)state->regs->bp;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/6] x86/dumpstack: print stack identifier on its own line
  2016-10-20 16:34 [PATCH 0/6] x86/dumpstack: print entry regs when dumping the stack Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 1/6] x86/entry/unwind: create stack frames for saved interrupt registers Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 2/6] x86/unwind: create stack frames for saved syscall registers Josh Poimboeuf
@ 2016-10-20 16:34 ` Josh Poimboeuf
  2016-10-21  7:50   ` [tip:x86/asm] x86/dumpstack: Print " tip-bot for Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 4/6] x86/dumpstack: print any pt_regs found on the stack Josh Poimboeuf
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Josh Poimboeuf @ 2016-10-20 16:34 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin
  Cc: x86, linux-kernel, Andy Lutomirski, Linus Torvalds, Brian Gerst,
	Peter Zijlstra

show_trace_log_lvl() prints the stack id (e.g. "<IRQ>") without a
newline so that any stack address printed after it will appear on the
same line.  That causes the first stack address to be vertically
misaligned with the rest, making it visually cluttered and slightly
confusing:

  Call Trace:
   <IRQ> [<ffffffff814431c3>] dump_stack+0x86/0xc3
   [<ffffffff8100828b>] perf_callchain_kernel+0x14b/0x160
   [<ffffffff811e915f>] get_perf_callchain+0x15f/0x2b0
   ...
   <EOI> [<ffffffff8189c6c3>] ? _raw_spin_unlock_irq+0x33/0x60
   [<ffffffff810e1c84>] finish_task_switch+0xb4/0x250
   [<ffffffff8106f7dc>] do_async_page_fault+0x2c/0xa0

It will look worse once we start printing pt_regs registers found in the
middle of the stack:

  <IRQ> RIP: 0010:[<ffffffff8189c6c3>]  [<ffffffff8189c6c3>] _raw_spin_unlock_irq+0x33/0x60
  RSP: 0018:ffff88007876f720  EFLAGS: 00000206
  RAX: ffff8800786caa40 RBX: ffff88007d5da140 RCX: 0000000000000007
  ...

Improve readability by adding a newline to the stack name:

  Call Trace:
   <IRQ>
   [<ffffffff814431c3>] dump_stack+0x86/0xc3
   [<ffffffff8100828b>] perf_callchain_kernel+0x14b/0x160
   [<ffffffff811e915f>] get_perf_callchain+0x15f/0x2b0
   ...
   <EOI>
   [<ffffffff8189c6c3>] ? _raw_spin_unlock_irq+0x33/0x60
   [<ffffffff810e1c84>] finish_task_switch+0xb4/0x250
   [<ffffffff8106f7dc>] do_async_page_fault+0x2c/0xa0

Now that "continued" lines are no longer needed, we can also remove the
hack of using the empty string (aka KERN_CONT) and replace it with
KERN_DEFAULT.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/dumpstack.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9b7cf5c..3251177 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -97,7 +97,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 		stack_type_str(stack_info.type, &str_begin, &str_end);
 		if (str_begin)
-			printk("%s <%s> ", log_lvl, str_begin);
+			printk("%s <%s>\n", log_lvl, str_begin);
 
 		/*
 		 * Scan the stack, printing any text addresses we find.  At the
@@ -149,7 +149,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		}
 
 		if (str_end)
-			printk("%s <%s> ", log_lvl, str_end);
+			printk("%s <%s>\n", log_lvl, str_end);
 	}
 }
 
@@ -164,12 +164,12 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 	if (!sp && task == current)
 		sp = get_stack_pointer(current, NULL);
 
-	show_stack_log_lvl(task, NULL, sp, "");
+	show_stack_log_lvl(task, NULL, sp, KERN_DEFAULT);
 }
 
 void show_stack_regs(struct pt_regs *regs)
 {
-	show_stack_log_lvl(current, regs, NULL, "");
+	show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
 }
 
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/6] x86/dumpstack: print any pt_regs found on the stack
  2016-10-20 16:34 [PATCH 0/6] x86/dumpstack: print entry regs when dumping the stack Josh Poimboeuf
                   ` (2 preceding siblings ...)
  2016-10-20 16:34 ` [PATCH 3/6] x86/dumpstack: print stack identifier on its own line Josh Poimboeuf
@ 2016-10-20 16:34 ` Josh Poimboeuf
  2016-10-21  7:50   ` [tip:x86/asm] x86/dumpstack: Print " tip-bot for Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 5/6] x86/dumpstack: fix duplicate RIP address display in __show_regs() Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 6/6] x86/dumpstack: print orig_ax " Josh Poimboeuf
  5 siblings, 1 reply; 13+ messages in thread
From: Josh Poimboeuf @ 2016-10-20 16:34 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin
  Cc: x86, linux-kernel, Andy Lutomirski, Linus Torvalds, Brian Gerst,
	Peter Zijlstra

Now that we can find pt_regs registers on the stack, print them.  Here's
an example of what it looks like:

  Call Trace:
   <IRQ>
   [<ffffffff8144b793>] dump_stack+0x86/0xc3
   [<ffffffff81142c73>] hrtimer_interrupt+0xb3/0x1c0
   [<ffffffff8105eb86>] local_apic_timer_interrupt+0x36/0x60
   [<ffffffff818b27cd>] smp_apic_timer_interrupt+0x3d/0x50
   [<ffffffff818b06ee>] apic_timer_interrupt+0x9e/0xb0
  RIP: 0010:[<ffffffff818aef43>]  [<ffffffff818aef43>] _raw_spin_unlock_irq+0x33/0x60
  RSP: 0018:ffff880079c4f760  EFLAGS: 00000202
  RAX: ffff880078738000 RBX: ffff88007d3da0c0 RCX: 0000000000000007
  RDX: 0000000000006d78 RSI: ffff8800787388f0 RDI: ffff880078738000
  RBP: ffff880079c4f768 R08: 0000002199088f38 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff81e0d540
  R13: ffff8800369fb700 R14: 0000000000000000 R15: ffff880078738000
   <EOI>
   [<ffffffff810e1f14>] finish_task_switch+0xb4/0x250
   [<ffffffff810e1ed6>] ? finish_task_switch+0x76/0x250
   [<ffffffff818a7b61>] __schedule+0x3e1/0xb20
   ...
   [<ffffffff810759c8>] trace_do_page_fault+0x58/0x2c0
   [<ffffffff8106f7dc>] do_async_page_fault+0x2c/0xa0
   [<ffffffff818b1dd8>] async_page_fault+0x28/0x30
  RIP: 0010:[<ffffffff8145b062>]  [<ffffffff8145b062>] __clear_user+0x42/0x70
  RSP: 0018:ffff880079c4fd38  EFLAGS: 00010202
  RAX: 0000000000000000 RBX: 0000000000000138 RCX: 0000000000000138
  RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000000000061b640
  RBP: ffff880079c4fd48 R08: 0000002198feefd7 R09: ffffffff82a40928
  R10: 0000000000000001 R11: 0000000000000000 R12: 000000000061b640
  R13: 0000000000000000 R14: ffff880079c50000 R15: ffff8800791d7400
   [<ffffffff8145b043>] ? __clear_user+0x23/0x70
   [<ffffffff8145b0fb>] clear_user+0x2b/0x40
   [<ffffffff812fbda2>] load_elf_binary+0x1472/0x1750
   [<ffffffff8129a591>] search_binary_handler+0xa1/0x200
   [<ffffffff8129b69b>] do_execveat_common.isra.36+0x6cb/0x9f0
   [<ffffffff8129b5f3>] ? do_execveat_common.isra.36+0x623/0x9f0
   [<ffffffff8129bcaa>] SyS_execve+0x3a/0x50
   [<ffffffff81003f5c>] do_syscall_64+0x6c/0x1e0
   [<ffffffff818afa3f>] entry_SYSCALL64_slow_path+0x25/0x25
  RIP: 0033:[<00007fd2e2f2e537>]  [<00007fd2e2f2e537>] 0x7fd2e2f2e537
  RSP: 002b:00007ffc449c5fc8  EFLAGS: 00000246
  RAX: ffffffffffffffda RBX: 00007ffc449c8860 RCX: 00007fd2e2f2e537
  RDX: 000000000127cc40 RSI: 00007ffc449c8860 RDI: 00007ffc449c6029
  RBP: 00007ffc449c60b0 R08: 65726f632d667265 R09: 00007ffc449c5e20
  R10: 00000000000005a7 R11: 0000000000000246 R12: 000000000127cc40
  R13: 000000000127ce05 R14: 00007ffc449c6029 R15: 000000000127ce01

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/dumpstack.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 3251177..64281a1 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -82,7 +82,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	 * - softirq stack
 	 * - hardirq stack
 	 */
-	for (; stack; stack = stack_info.next_sp) {
+	for (regs = NULL; stack; stack = stack_info.next_sp) {
 		const char *str_begin, *str_end;
 
 		/*
@@ -119,6 +119,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			if (!__kernel_text_address(addr))
 				continue;
 
+			/*
+			 * Don't print regs->ip again if it was already printed
+			 * by __show_regs() below.
+			 */
+			if (regs && stack == &regs->ip) {
+				unwind_next_frame(&state);
+				continue;
+			}
+
 			if (stack == ret_addr_p)
 				reliable = 1;
 
@@ -146,6 +155,11 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			 * of the addresses will just be printed as unreliable.
 			 */
 			unwind_next_frame(&state);
+
+			/* if the frame has entry regs, print them */
+			regs = unwind_get_entry_regs(&state);
+			if (regs)
+				__show_regs(regs, 0);
 		}
 
 		if (str_end)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 5/6] x86/dumpstack: fix duplicate RIP address display in __show_regs()
  2016-10-20 16:34 [PATCH 0/6] x86/dumpstack: print entry regs when dumping the stack Josh Poimboeuf
                   ` (3 preceding siblings ...)
  2016-10-20 16:34 ` [PATCH 4/6] x86/dumpstack: print any pt_regs found on the stack Josh Poimboeuf
@ 2016-10-20 16:34 ` Josh Poimboeuf
  2016-10-21  7:51   ` [tip:x86/asm] x86/dumpstack: Fix " tip-bot for Josh Poimboeuf
  2016-10-20 16:34 ` [PATCH 6/6] x86/dumpstack: print orig_ax " Josh Poimboeuf
  5 siblings, 1 reply; 13+ messages in thread
From: Josh Poimboeuf @ 2016-10-20 16:34 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin
  Cc: x86, linux-kernel, Andy Lutomirski, Linus Torvalds, Brian Gerst,
	Peter Zijlstra

The RIP address is shown twice in __show_regs().  Before:

  RIP: 0010:[<ffffffff81070446>]  [<ffffffff81070446>] native_write_msr+0x6/0x30

After:

  RIP: 0010:[<ffffffff81070446>] native_write_msr+0x6/0x30

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/process_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9c3a7b0..92d3f70 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -61,8 +61,8 @@ void __show_regs(struct pt_regs *regs, int all)
 	unsigned int fsindex, gsindex;
 	unsigned int ds, cs, es;
 
-	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
-	printk_address(regs->ip);
+	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] %pS\n", regs->cs & 0xffff,
+			regs->ip, (void *)regs->ip);
 	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
 			regs->sp, regs->flags);
 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 6/6] x86/dumpstack: print orig_ax in __show_regs()
  2016-10-20 16:34 [PATCH 0/6] x86/dumpstack: print entry regs when dumping the stack Josh Poimboeuf
                   ` (4 preceding siblings ...)
  2016-10-20 16:34 ` [PATCH 5/6] x86/dumpstack: fix duplicate RIP address display in __show_regs() Josh Poimboeuf
@ 2016-10-20 16:34 ` Josh Poimboeuf
  2016-10-21  7:51   ` [tip:x86/asm] x86/dumpstack: Print " tip-bot for Josh Poimboeuf
  5 siblings, 1 reply; 13+ messages in thread
From: Josh Poimboeuf @ 2016-10-20 16:34 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin
  Cc: x86, linux-kernel, Andy Lutomirski, Linus Torvalds, Brian Gerst,
	Peter Zijlstra

The value of regs->orig_ax contains potentially useful debugging data:
For syscalls it contains the syscall number.  For interrupts it contains
the (negated) vector number.  To reduce noise, print it only if it has a
useful value (i.e., something other than -1).

Here's what it looks like for a write syscall:

  RIP: 0033:[<00007f53ad7b1940>] 0x7f53ad7b1940
  RSP: 002b:00007fff8de66558 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
  RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007f53ad7b1940
  RDX: 0000000000000002 RSI: 00007f53ae0ca000 RDI: 0000000000000001
  ...

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 arch/x86/kernel/process_64.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 92d3f70..4481e72 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -63,8 +63,13 @@ void __show_regs(struct pt_regs *regs, int all)
 
 	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] %pS\n", regs->cs & 0xffff,
 			regs->ip, (void *)regs->ip);
-	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
+	printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
 			regs->sp, regs->flags);
+	if (regs->orig_ax != -1)
+		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
+	else
+		pr_cont("\n");
+
 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->ax, regs->bx, regs->cx);
 	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/asm] x86/entry/unwind: Create stack frames for saved interrupt registers
  2016-10-20 16:34 ` [PATCH 1/6] x86/entry/unwind: create stack frames for saved interrupt registers Josh Poimboeuf
@ 2016-10-21  7:49   ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-10-21  7:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, dvlasenk, mingo, luto, brgerst, linux-kernel, tglx, bp,
	jpoimboe, luto, torvalds, peterz

Commit-ID:  946c191161cef10c667b5ee3179db1714fa5b7c0
Gitweb:     http://git.kernel.org/tip/946c191161cef10c667b5ee3179db1714fa5b7c0
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Thu, 20 Oct 2016 11:34:40 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 21 Oct 2016 09:26:03 +0200

x86/entry/unwind: Create stack frames for saved interrupt registers

With frame pointers, when a task is interrupted, its stack is no longer
completely reliable because the function could have been interrupted
before it had a chance to save the previous frame pointer on the stack.
So the caller of the interrupted function could get skipped by a stack
trace.

This is problematic for live patching, which needs to know whether a
stack trace of a sleeping task can be relied upon.  There's currently no
way to detect if a sleeping task was interrupted by a page fault
exception or preemption before it went to sleep.

Another issue is that when dumping the stack of an interrupted task, the
unwinder has no way of knowing where the saved pt_regs registers are, so
it can't print them.

This solves those issues by encoding the pt_regs pointer in the frame
pointer on entry from an interrupt or an exception.

This patch also updates the unwinder to be able to decode it, because
otherwise the unwinder would be broken by this change.

Note that this causes a change in the behavior of the unwinder: each
instance of a pt_regs on the stack is now considered a "frame".  So
callers of unwind_get_return_address() will now get an occasional
'regs->ip' address that would have previously been skipped over.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/8b9f84a21e39d249049e0547b559ff8da0df0988.1476973742.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/calling.h       | 20 +++++++++++
 arch/x86/entry/entry_32.S      | 33 +++++++++++++++---
 arch/x86/entry/entry_64.S      | 10 ++++--
 arch/x86/include/asm/unwind.h  | 16 ++++++++-
 arch/x86/kernel/unwind_frame.c | 76 +++++++++++++++++++++++++++++++++++++-----
 5 files changed, 139 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 38dcdfa..05ed3d3 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -192,6 +192,26 @@ For 32-bit we have the following conventions - kernel is built with
 	.byte 0xf1
 	.endm
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+#ifdef CONFIG_FRAME_POINTER
+	.if \ptregs_offset
+		leaq \ptregs_offset(%rsp), %rbp
+	.else
+		mov %rsp, %rbp
+	.endif
+	orq	$0x1, %rbp
+#endif
+.endm
+
 #endif /* CONFIG_X86_64 */
 
 /*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2225105..acc0c6f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -176,6 +176,22 @@
 	SET_KERNEL_GS %edx
 .endm
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original rbp.
+ */
+.macro ENCODE_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
+	mov %esp, %ebp
+	orl $0x1, %ebp
+#endif
+.endm
+
 .macro RESTORE_INT_REGS
 	popl	%ebx
 	popl	%ecx
@@ -641,6 +657,7 @@ common_interrupt:
 	ASM_CLAC
 	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	movl	%esp, %eax
 	call	do_IRQ
@@ -652,6 +669,7 @@ ENTRY(name)				\
 	ASM_CLAC;			\
 	pushl	$~(nr);			\
 	SAVE_ALL;			\
+	ENCODE_FRAME_POINTER;		\
 	TRACE_IRQS_OFF			\
 	movl	%esp, %eax;		\
 	call	fn;			\
@@ -786,6 +804,7 @@ END(spurious_interrupt_bug)
 ENTRY(xen_hypervisor_callback)
 	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 
 	/*
@@ -840,6 +859,7 @@ ENTRY(xen_failsafe_callback)
 	jmp	iret_exc
 5:	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	jmp	ret_from_exception
 
 .section .fixup, "ax"
@@ -1067,6 +1087,7 @@ common_exception:
 	pushl	%edx
 	pushl	%ecx
 	pushl	%ebx
+	ENCODE_FRAME_POINTER
 	cld
 	movl	$(__KERNEL_PERCPU), %ecx
 	movl	%ecx, %fs
@@ -1099,6 +1120,7 @@ ENTRY(debug)
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	xorl	%edx, %edx			# error code 0
 	movl	%esp, %eax			# pt_regs pointer
 
@@ -1114,11 +1136,11 @@ ENTRY(debug)
 
 .Ldebug_from_sysenter_stack:
 	/* We're on the SYSENTER stack.  Switch off. */
-	movl	%esp, %ebp
+	movl	%esp, %ebx
 	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
 	TRACE_IRQS_OFF
 	call	do_debug
-	movl	%ebp, %esp
+	movl	%ebx, %esp
 	jmp	ret_from_exception
 END(debug)
 
@@ -1141,6 +1163,7 @@ ENTRY(nmi)
 
 	pushl	%eax				# pt_regs->orig_ax
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	xorl	%edx, %edx			# zero error code
 	movl	%esp, %eax			# pt_regs pointer
 
@@ -1159,10 +1182,10 @@ ENTRY(nmi)
 	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
 	 * is using the thread stack right now, so it's safe for us to use it.
 	 */
-	movl	%esp, %ebp
+	movl	%esp, %ebx
 	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
 	call	do_nmi
-	movl	%ebp, %esp
+	movl	%ebx, %esp
 	jmp	.Lrestore_all_notrace
 
 #ifdef CONFIG_X86_ESPFIX32
@@ -1179,6 +1202,7 @@ ENTRY(nmi)
 	.endr
 	pushl	%eax
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	FIXUP_ESPFIX_STACK			# %eax == %esp
 	xorl	%edx, %edx			# zero error code
 	call	do_nmi
@@ -1192,6 +1216,7 @@ ENTRY(int3)
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	xorl	%edx, %edx			# zero error code
 	movl	%esp, %eax			# pt_regs pointer
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ef766a3..65fad8a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -469,6 +469,7 @@ END(irq_entries_start)
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
 	SAVE_EXTRA_REGS
+	ENCODE_FRAME_POINTER
 
 	testb	$3, CS(%rsp)
 	jz	1f
@@ -985,6 +986,7 @@ ENTRY(xen_failsafe_callback)
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
 	SAVE_EXTRA_REGS
+	ENCODE_FRAME_POINTER
 	jmp	error_exit
 END(xen_failsafe_callback)
 
@@ -1028,6 +1030,7 @@ ENTRY(paranoid_entry)
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
+	ENCODE_FRAME_POINTER 8
 	movl	$1, %ebx
 	movl	$MSR_GS_BASE, %ecx
 	rdmsr
@@ -1075,6 +1078,7 @@ ENTRY(error_entry)
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
+	ENCODE_FRAME_POINTER 8
 	xorl	%ebx, %ebx
 	testb	$3, CS+8(%rsp)
 	jz	.Lerror_kernelspace
@@ -1257,6 +1261,7 @@ ENTRY(nmi)
 	pushq	%r13		/* pt_regs->r13 */
 	pushq	%r14		/* pt_regs->r14 */
 	pushq	%r15		/* pt_regs->r15 */
+	ENCODE_FRAME_POINTER
 
 	/*
 	 * At this point we no longer need to worry about stack damage
@@ -1270,11 +1275,10 @@ ENTRY(nmi)
 
 	/*
 	 * Return back to user mode.  We must *not* do the normal exit
-	 * work, because we don't want to enable interrupts.  Fortunately,
-	 * do_nmi doesn't modify pt_regs.
+	 * work, because we don't want to enable interrupts.
 	 */
 	SWAPGS
-	jmp	restore_c_regs_and_iret
+	jmp	restore_regs_and_iret
 
 .Lnmi_from_kernel:
 	/*
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 46de9ac..c5a7f3a 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -13,6 +13,7 @@ struct unwind_state {
 	int graph_idx;
 #ifdef CONFIG_FRAME_POINTER
 	unsigned long *bp;
+	struct pt_regs *regs;
 #else
 	unsigned long *sp;
 #endif
@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
 	if (unwind_done(state))
 		return NULL;
 
-	return state->bp + 1;
+	return state->regs ? &state->regs->ip : state->bp + 1;
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return NULL;
+
+	return state->regs;
 }
 
 #else /* !CONFIG_FRAME_POINTER */
@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
 	return NULL;
 }
 
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_FRAME_POINTER */
 
 #endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a2456d4..2221ab1 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -14,6 +14,9 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	if (unwind_done(state))
 		return 0;
 
+	if (state->regs && user_mode(state->regs))
+		return 0;
+
 	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
 				     addr_p);
 
@@ -21,6 +24,20 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
+/*
+ * This determines if the frame pointer actually contains an encoded pointer to
+ * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
+ */
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+	unsigned long regs = (unsigned long)bp;
+
+	if (!(regs & 0x1))
+		return NULL;
+
+	return (struct pt_regs *)(regs & ~0x1);
+}
+
 static bool update_stack_state(struct unwind_state *state, void *addr,
 			       size_t len)
 {
@@ -43,26 +60,59 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
 
 bool unwind_next_frame(struct unwind_state *state)
 {
-	unsigned long *next_bp;
+	struct pt_regs *regs;
+	unsigned long *next_bp, *next_frame;
+	size_t next_len;
 
 	if (unwind_done(state))
 		return false;
 
-	next_bp = (unsigned long *)*state->bp;
+	/* have we reached the end? */
+	if (state->regs && user_mode(state->regs))
+		goto the_end;
+
+	/* get the next frame pointer */
+	if (state->regs)
+		next_bp = (unsigned long *)state->regs->bp;
+	else
+		next_bp = (unsigned long *)*state->bp;
+
+	/* is the next frame pointer an encoded pointer to pt_regs? */
+	regs = decode_frame_pointer(next_bp);
+	if (regs) {
+		next_frame = (unsigned long *)regs;
+		next_len = sizeof(*regs);
+	} else {
+		next_frame = next_bp;
+		next_len = FRAME_HEADER_SIZE;
+	}
 
 	/* make sure the next frame's data is accessible */
-	if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
+	if (!update_stack_state(state, next_frame, next_len))
 		return false;
-
 	/* move to the next frame */
-	state->bp = next_bp;
+	if (regs) {
+		state->regs = regs;
+		state->bp = NULL;
+	} else {
+		state->bp = next_bp;
+		state->regs = NULL;
+	}
+
 	return true;
+
+the_end:
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return false;
 }
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		    struct pt_regs *regs, unsigned long *first_frame)
 {
+	unsigned long *bp, *frame;
+	size_t len;
+
 	memset(state, 0, sizeof(*state));
 	state->task = task;
 
@@ -73,12 +123,22 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	}
 
 	/* set up the starting stack frame */
-	state->bp = get_frame_pointer(task, regs);
+	bp = get_frame_pointer(task, regs);
+	regs = decode_frame_pointer(bp);
+	if (regs) {
+		state->regs = regs;
+		frame = (unsigned long *)regs;
+		len = sizeof(*regs);
+	} else {
+		state->bp = bp;
+		frame = bp;
+		len = FRAME_HEADER_SIZE;
+	}
 
 	/* initialize stack info and make sure the frame data is accessible */
-	get_stack_info(state->bp, state->task, &state->stack_info,
+	get_stack_info(frame, state->task, &state->stack_info,
 		       &state->stack_mask);
-	update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
+	update_stack_state(state, frame, len);
 
 	/*
 	 * The caller can provide the address of the first frame directly

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/asm] x86/unwind: Create stack frames for saved syscall registers
  2016-10-20 16:34 ` [PATCH 2/6] x86/unwind: create stack frames for saved syscall registers Josh Poimboeuf
@ 2016-10-21  7:49   ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-10-21  7:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, brgerst, hpa, luto, mingo, peterz, linux-kernel, jpoimboe,
	torvalds, dvlasenk, bp

Commit-ID:  acb4608ad1865a42af8e0a2db332a7c3a381e1f5
Gitweb:     http://git.kernel.org/tip/acb4608ad1865a42af8e0a2db332a7c3a381e1f5
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Thu, 20 Oct 2016 11:34:41 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 21 Oct 2016 09:26:04 +0200

x86/unwind: Create stack frames for saved syscall registers

The entry code doesn't encode the pt_regs pointer for syscalls.  But the
pt_regs are always at the same location, so we can add a manual check
for them.

A later patch prints them as part of the oops stack dump.  They could be
useful, for example, to determine the arguments to a system call.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/e176aa9272930cd3f51fda0b94e2eae356677da4.1476973742.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/unwind_frame.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 2221ab1..5795427 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -24,6 +24,14 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
+static bool is_last_task_frame(struct unwind_state *state)
+{
+	unsigned long bp = (unsigned long)state->bp;
+	unsigned long regs = (unsigned long)task_pt_regs(state->task);
+
+	return bp == regs - FRAME_HEADER_SIZE;
+}
+
 /*
  * This determines if the frame pointer actually contains an encoded pointer to
  * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
@@ -71,6 +79,33 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->regs && user_mode(state->regs))
 		goto the_end;
 
+	if (is_last_task_frame(state)) {
+		regs = task_pt_regs(state->task);
+
+		/*
+		 * kthreads (other than the boot CPU's idle thread) have some
+		 * partial regs at the end of their stack which were placed
+		 * there by copy_thread_tls().  But the regs don't have any
+		 * useful information, so we can skip them.
+		 *
+		 * This user_mode() check is slightly broader than a PF_KTHREAD
+		 * check because it also catches the awkward situation where a
+		 * newly forked kthread transitions into a user task by calling
+		 * do_execve(), which eventually clears PF_KTHREAD.
+		 */
+		if (!user_mode(regs))
+			goto the_end;
+
+		/*
+		 * We're almost at the end, but not quite: there's still the
+		 * syscall regs frame.  Entry code doesn't encode the regs
+		 * pointer for syscalls, so we have to set it manually.
+		 */
+		state->regs = regs;
+		state->bp = NULL;
+		return true;
+	}
+
 	/* get the next frame pointer */
 	if (state->regs)
 		next_bp = (unsigned long *)state->regs->bp;

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/asm] x86/dumpstack: Print stack identifier on its own line
  2016-10-20 16:34 ` [PATCH 3/6] x86/dumpstack: print stack identifier on its own line Josh Poimboeuf
@ 2016-10-21  7:50   ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-10-21  7:50 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, peterz, torvalds, mingo, hpa, luto, linux-kernel, jpoimboe,
	dvlasenk, tglx, brgerst

Commit-ID:  79439d8e15b51fa359a0f5d0c8f856c1f5b4bd56
Gitweb:     http://git.kernel.org/tip/79439d8e15b51fa359a0f5d0c8f856c1f5b4bd56
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Thu, 20 Oct 2016 11:34:42 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 21 Oct 2016 09:26:04 +0200

x86/dumpstack: Print stack identifier on its own line

show_trace_log_lvl() prints the stack id (e.g. "<IRQ>") without a
newline so that any stack address printed after it will appear on the
same line.  That causes the first stack address to be vertically
misaligned with the rest, making it visually cluttered and slightly
confusing:

  Call Trace:
   <IRQ> [<ffffffff814431c3>] dump_stack+0x86/0xc3
   [<ffffffff8100828b>] perf_callchain_kernel+0x14b/0x160
   [<ffffffff811e915f>] get_perf_callchain+0x15f/0x2b0
   ...
   <EOI> [<ffffffff8189c6c3>] ? _raw_spin_unlock_irq+0x33/0x60
   [<ffffffff810e1c84>] finish_task_switch+0xb4/0x250
   [<ffffffff8106f7dc>] do_async_page_fault+0x2c/0xa0

It will look worse once we start printing pt_regs registers found in the
middle of the stack:

  <IRQ> RIP: 0010:[<ffffffff8189c6c3>]  [<ffffffff8189c6c3>] _raw_spin_unlock_irq+0x33/0x60
  RSP: 0018:ffff88007876f720  EFLAGS: 00000206
  RAX: ffff8800786caa40 RBX: ffff88007d5da140 RCX: 0000000000000007
  ...

Improve readability by adding a newline to the stack name:

  Call Trace:
   <IRQ>
   [<ffffffff814431c3>] dump_stack+0x86/0xc3
   [<ffffffff8100828b>] perf_callchain_kernel+0x14b/0x160
   [<ffffffff811e915f>] get_perf_callchain+0x15f/0x2b0
   ...
   <EOI>
   [<ffffffff8189c6c3>] ? _raw_spin_unlock_irq+0x33/0x60
   [<ffffffff810e1c84>] finish_task_switch+0xb4/0x250
   [<ffffffff8106f7dc>] do_async_page_fault+0x2c/0xa0

Now that "continued" lines are no longer needed, we can also remove the
hack of using the empty string (aka KERN_CONT) and replace it with
KERN_DEFAULT.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/9bdd6dee2c74555d45500939fcc155997dc7889e.1476973742.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/dumpstack.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9b7cf5c..3251177 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -97,7 +97,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 		stack_type_str(stack_info.type, &str_begin, &str_end);
 		if (str_begin)
-			printk("%s <%s> ", log_lvl, str_begin);
+			printk("%s <%s>\n", log_lvl, str_begin);
 
 		/*
 		 * Scan the stack, printing any text addresses we find.  At the
@@ -149,7 +149,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		}
 
 		if (str_end)
-			printk("%s <%s> ", log_lvl, str_end);
+			printk("%s <%s>\n", log_lvl, str_end);
 	}
 }
 
@@ -164,12 +164,12 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 	if (!sp && task == current)
 		sp = get_stack_pointer(current, NULL);
 
-	show_stack_log_lvl(task, NULL, sp, "");
+	show_stack_log_lvl(task, NULL, sp, KERN_DEFAULT);
 }
 
 void show_stack_regs(struct pt_regs *regs)
 {
-	show_stack_log_lvl(current, regs, NULL, "");
+	show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
 }
 
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/asm] x86/dumpstack: Print any pt_regs found on the stack
  2016-10-20 16:34 ` [PATCH 4/6] x86/dumpstack: print any pt_regs found on the stack Josh Poimboeuf
@ 2016-10-21  7:50   ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-10-21  7:50 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, brgerst, luto, bp, dvlasenk, peterz, jpoimboe, mingo,
	torvalds, linux-kernel, hpa

Commit-ID:  3b3fa11bc7000bb86c9fd30703da3689a9a9758d
Gitweb:     http://git.kernel.org/tip/3b3fa11bc7000bb86c9fd30703da3689a9a9758d
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Thu, 20 Oct 2016 11:34:43 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 21 Oct 2016 09:26:04 +0200

x86/dumpstack: Print any pt_regs found on the stack

Now that we can find pt_regs registers on the stack, print them.  Here's
an example of what it looks like:

  Call Trace:
   <IRQ>
   [<ffffffff8144b793>] dump_stack+0x86/0xc3
   [<ffffffff81142c73>] hrtimer_interrupt+0xb3/0x1c0
   [<ffffffff8105eb86>] local_apic_timer_interrupt+0x36/0x60
   [<ffffffff818b27cd>] smp_apic_timer_interrupt+0x3d/0x50
   [<ffffffff818b06ee>] apic_timer_interrupt+0x9e/0xb0
  RIP: 0010:[<ffffffff818aef43>]  [<ffffffff818aef43>] _raw_spin_unlock_irq+0x33/0x60
  RSP: 0018:ffff880079c4f760  EFLAGS: 00000202
  RAX: ffff880078738000 RBX: ffff88007d3da0c0 RCX: 0000000000000007
  RDX: 0000000000006d78 RSI: ffff8800787388f0 RDI: ffff880078738000
  RBP: ffff880079c4f768 R08: 0000002199088f38 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff81e0d540
  R13: ffff8800369fb700 R14: 0000000000000000 R15: ffff880078738000
   <EOI>
   [<ffffffff810e1f14>] finish_task_switch+0xb4/0x250
   [<ffffffff810e1ed6>] ? finish_task_switch+0x76/0x250
   [<ffffffff818a7b61>] __schedule+0x3e1/0xb20
   ...
   [<ffffffff810759c8>] trace_do_page_fault+0x58/0x2c0
   [<ffffffff8106f7dc>] do_async_page_fault+0x2c/0xa0
   [<ffffffff818b1dd8>] async_page_fault+0x28/0x30
  RIP: 0010:[<ffffffff8145b062>]  [<ffffffff8145b062>] __clear_user+0x42/0x70
  RSP: 0018:ffff880079c4fd38  EFLAGS: 00010202
  RAX: 0000000000000000 RBX: 0000000000000138 RCX: 0000000000000138
  RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000000000061b640
  RBP: ffff880079c4fd48 R08: 0000002198feefd7 R09: ffffffff82a40928
  R10: 0000000000000001 R11: 0000000000000000 R12: 000000000061b640
  R13: 0000000000000000 R14: ffff880079c50000 R15: ffff8800791d7400
   [<ffffffff8145b043>] ? __clear_user+0x23/0x70
   [<ffffffff8145b0fb>] clear_user+0x2b/0x40
   [<ffffffff812fbda2>] load_elf_binary+0x1472/0x1750
   [<ffffffff8129a591>] search_binary_handler+0xa1/0x200
   [<ffffffff8129b69b>] do_execveat_common.isra.36+0x6cb/0x9f0
   [<ffffffff8129b5f3>] ? do_execveat_common.isra.36+0x623/0x9f0
   [<ffffffff8129bcaa>] SyS_execve+0x3a/0x50
   [<ffffffff81003f5c>] do_syscall_64+0x6c/0x1e0
   [<ffffffff818afa3f>] entry_SYSCALL64_slow_path+0x25/0x25
  RIP: 0033:[<00007fd2e2f2e537>]  [<00007fd2e2f2e537>] 0x7fd2e2f2e537
  RSP: 002b:00007ffc449c5fc8  EFLAGS: 00000246
  RAX: ffffffffffffffda RBX: 00007ffc449c8860 RCX: 00007fd2e2f2e537
  RDX: 000000000127cc40 RSI: 00007ffc449c8860 RDI: 00007ffc449c6029
  RBP: 00007ffc449c60b0 R08: 65726f632d667265 R09: 00007ffc449c5e20
  R10: 00000000000005a7 R11: 0000000000000246 R12: 000000000127cc40
  R13: 000000000127ce05 R14: 00007ffc449c6029 R15: 000000000127ce01

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/5cc2c512ec82cfba00dd22467644d4ed751a48c0.1476973742.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/dumpstack.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 3251177..64281a1 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -82,7 +82,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	 * - softirq stack
 	 * - hardirq stack
 	 */
-	for (; stack; stack = stack_info.next_sp) {
+	for (regs = NULL; stack; stack = stack_info.next_sp) {
 		const char *str_begin, *str_end;
 
 		/*
@@ -119,6 +119,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			if (!__kernel_text_address(addr))
 				continue;
 
+			/*
+			 * Don't print regs->ip again if it was already printed
+			 * by __show_regs() below.
+			 */
+			if (regs && stack == &regs->ip) {
+				unwind_next_frame(&state);
+				continue;
+			}
+
 			if (stack == ret_addr_p)
 				reliable = 1;
 
@@ -146,6 +155,11 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			 * of the addresses will just be printed as unreliable.
 			 */
 			unwind_next_frame(&state);
+
+			/* if the frame has entry regs, print them */
+			regs = unwind_get_entry_regs(&state);
+			if (regs)
+				__show_regs(regs, 0);
 		}
 
 		if (str_end)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/asm] x86/dumpstack: Fix duplicate RIP address display in __show_regs()
  2016-10-20 16:34 ` [PATCH 5/6] x86/dumpstack: fix duplicate RIP address display in __show_regs() Josh Poimboeuf
@ 2016-10-21  7:51   ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-10-21  7:51 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, linux-kernel, luto, brgerst, torvalds, mingo, jpoimboe, hpa,
	tglx, dvlasenk, peterz

Commit-ID:  1141c3e39c64e4aba2d98cb3dcca95369c9dafbe
Gitweb:     http://git.kernel.org/tip/1141c3e39c64e4aba2d98cb3dcca95369c9dafbe
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Thu, 20 Oct 2016 11:34:44 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 21 Oct 2016 09:26:04 +0200

x86/dumpstack: Fix duplicate RIP address display in __show_regs()

The RIP address is shown twice in __show_regs().  Before:

  RIP: 0010:[<ffffffff81070446>]  [<ffffffff81070446>] native_write_msr+0x6/0x30

After:

  RIP: 0010:[<ffffffff81070446>] native_write_msr+0x6/0x30

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/b3fda66f36761759b000883b059cdd9a7649dcc1.1476973742.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/process_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3760b3..b3b50ac 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -61,8 +61,8 @@ void __show_regs(struct pt_regs *regs, int all)
 	unsigned int fsindex, gsindex;
 	unsigned int ds, cs, es;
 
-	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
-	printk_address(regs->ip);
+	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] %pS\n", regs->cs & 0xffff,
+			regs->ip, (void *)regs->ip);
 	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
 			regs->sp, regs->flags);
 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/asm] x86/dumpstack: Print orig_ax in __show_regs()
  2016-10-20 16:34 ` [PATCH 6/6] x86/dumpstack: print orig_ax " Josh Poimboeuf
@ 2016-10-21  7:51   ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-10-21  7:51 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, hpa, torvalds, bp, jpoimboe, luto, linux-kernel,
	dvlasenk, tglx, mingo, luto, brgerst

Commit-ID:  6fa81a12b3f1834a22167aa2d7b24dcc4bf884e3
Gitweb:     http://git.kernel.org/tip/6fa81a12b3f1834a22167aa2d7b24dcc4bf884e3
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Thu, 20 Oct 2016 11:34:45 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 21 Oct 2016 09:26:05 +0200

x86/dumpstack: Print orig_ax in __show_regs()

The value of regs->orig_ax contains potentially useful debugging data:
For syscalls it contains the syscall number.  For interrupts it contains
the (negated) vector number.  To reduce noise, print it only if it has a
useful value (i.e., something other than -1).

Here's what it looks like for a write syscall:

  RIP: 0033:[<00007f53ad7b1940>] 0x7f53ad7b1940
  RSP: 002b:00007fff8de66558 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
  RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007f53ad7b1940
  RDX: 0000000000000002 RSI: 00007f53ae0ca000 RDI: 0000000000000001
  ...

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/93f0fe0307a4af884d3fca00edabcc8cff236002.1476973742.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/process_64.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3b50ac..f1c36da 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -63,8 +63,13 @@ void __show_regs(struct pt_regs *regs, int all)
 
 	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] %pS\n", regs->cs & 0xffff,
 			regs->ip, (void *)regs->ip);
-	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
+	printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
 			regs->sp, regs->flags);
+	if (regs->orig_ax != -1)
+		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
+	else
+		pr_cont("\n");
+
 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->ax, regs->bx, regs->cx);
 	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-10-21  7:52 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-20 16:34 [PATCH 0/6] x86/dumpstack: print entry regs when dumping the stack Josh Poimboeuf
2016-10-20 16:34 ` [PATCH 1/6] x86/entry/unwind: create stack frames for saved interrupt registers Josh Poimboeuf
2016-10-21  7:49   ` [tip:x86/asm] x86/entry/unwind: Create " tip-bot for Josh Poimboeuf
2016-10-20 16:34 ` [PATCH 2/6] x86/unwind: create stack frames for saved syscall registers Josh Poimboeuf
2016-10-21  7:49   ` [tip:x86/asm] x86/unwind: Create " tip-bot for Josh Poimboeuf
2016-10-20 16:34 ` [PATCH 3/6] x86/dumpstack: print stack identifier on its own line Josh Poimboeuf
2016-10-21  7:50   ` [tip:x86/asm] x86/dumpstack: Print " tip-bot for Josh Poimboeuf
2016-10-20 16:34 ` [PATCH 4/6] x86/dumpstack: print any pt_regs found on the stack Josh Poimboeuf
2016-10-21  7:50   ` [tip:x86/asm] x86/dumpstack: Print " tip-bot for Josh Poimboeuf
2016-10-20 16:34 ` [PATCH 5/6] x86/dumpstack: fix duplicate RIP address display in __show_regs() Josh Poimboeuf
2016-10-21  7:51   ` [tip:x86/asm] x86/dumpstack: Fix " tip-bot for Josh Poimboeuf
2016-10-20 16:34 ` [PATCH 6/6] x86/dumpstack: print orig_ax " Josh Poimboeuf
2016-10-21  7:51   ` [tip:x86/asm] x86/dumpstack: Print " tip-bot for Josh Poimboeuf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.