linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/6] Tracing vs CR2
@ 2019-07-11 11:40 Peter Zijlstra
  2019-07-11 11:40 ` [PATCH v3 1/6] x86/paravirt: Make read_cr2() CALLEE_SAVE Peter Zijlstra
                   ` (6 more replies)
  0 siblings, 7 replies; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-11 11:40 UTC (permalink / raw)
  To: tglx, bp, mingo, rostedt, luto, torvalds
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel, devel, peterz

Hi,

Here's the latest (and hopefully final) set of tracing vs CR2 patches.

They are basically the same as v2, with only minor edits and tags collected
from the last review.

Please consider.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 1/6] x86/paravirt: Make read_cr2() CALLEE_SAVE
  2019-07-11 11:40 [PATCH v3 0/6] Tracing vs CR2 Peter Zijlstra
@ 2019-07-11 11:40 ` Peter Zijlstra
  2019-07-17 21:22   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
  2019-07-11 11:40 ` [PATCH v3 2/6] x86/entry/32: Simplify common_exception Peter Zijlstra
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-11 11:40 UTC (permalink / raw)
  To: tglx, bp, mingo, rostedt, luto, torvalds
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel, devel, peterz

The one paravirt read_cr2() implementation (Xen) is actually quite
trivial and doesn't need to clobber anything other than the return
register. By making read_cr2() CALLEE_SAVE we avoid all the PUSH/POP
nonsense and allow more convenient use from assembly.

Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/calling.h              |    6 ++++++
 arch/x86/include/asm/paravirt.h       |   22 +++++++++++++---------
 arch/x86/include/asm/paravirt_types.h |    2 +-
 arch/x86/kernel/asm-offsets.c         |    1 +
 arch/x86/kernel/head_64.S             |    4 +---
 arch/x86/kernel/paravirt.c            |    2 +-
 arch/x86/xen/enlighten_pv.c           |    3 ++-
 arch/x86/xen/mmu_pv.c                 |   12 +-----------
 arch/x86/xen/xen-asm.S                |   17 +++++++++++++++++
 arch/x86/xen/xen-ops.h                |    3 +++
 10 files changed, 46 insertions(+), 26 deletions(-)

--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -383,3 +383,9 @@ For 32-bit we have the following convent
 .Lafter_call_\@:
 #endif
 .endm
+
+#ifdef CONFIG_PARAVIRT_XXL
+#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
+#else
+#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
+#endif
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -116,7 +116,7 @@ static inline void write_cr0(unsigned lo
 
 static inline unsigned long read_cr2(void)
 {
-	return PVOP_CALL0(unsigned long, mmu.read_cr2);
+	return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
 }
 
 static inline void write_cr2(unsigned long x)
@@ -909,13 +909,7 @@ extern void default_banner(void);
 		  ANNOTATE_RETPOLINE_SAFE;				\
 		  call PARA_INDIRECT(pv_ops+PV_CPU_swapgs);		\
 		 )
-#endif
-
-#define GET_CR2_INTO_RAX				\
-	ANNOTATE_RETPOLINE_SAFE;				\
-	call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
 
-#ifdef CONFIG_PARAVIRT_XXL
 #define USERGS_SYSRET64							\
 	PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64),			\
 		  ANNOTATE_RETPOLINE_SAFE;				\
@@ -929,9 +923,19 @@ extern void default_banner(void);
 		  call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);	    \
 		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 #endif
-#endif
+#endif /* CONFIG_PARAVIRT_XXL */
+#endif	/* CONFIG_X86_64 */
+
+#ifdef CONFIG_PARAVIRT_XXL
+
+#define GET_CR2_INTO_AX							\
+	PARA_SITE(PARA_PATCH(PV_MMU_read_cr2),				\
+		  ANNOTATE_RETPOLINE_SAFE;				\
+		  call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);		\
+		 )
+
+#endif /* CONFIG_PARAVIRT_XXL */
 
-#endif	/* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
 #else  /* CONFIG_PARAVIRT */
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -220,7 +220,7 @@ struct pv_mmu_ops {
 	void (*exit_mmap)(struct mm_struct *mm);
 
 #ifdef CONFIG_PARAVIRT_XXL
-	unsigned long (*read_cr2)(void);
+	struct paravirt_callee_save read_cr2;
 	void (*write_cr2)(unsigned long);
 
 	unsigned long (*read_cr3)(void);
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -76,6 +76,7 @@ static void __used common(void)
 	BLANK();
 	OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
 	OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+	OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
 #endif
 
 	BLANK();
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -29,9 +29,7 @@
 #ifdef CONFIG_PARAVIRT_XXL
 #include <asm/asm-offsets.h>
 #include <asm/paravirt.h>
-#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
 #else
-#define GET_CR2_INTO(reg) movq %cr2, reg
 #define INTERRUPT_RETURN iretq
 #endif
 
@@ -323,7 +321,7 @@ END(early_idt_handler_array)
 
 	cmpq $14,%rsi		/* Page fault? */
 	jnz 10f
-	GET_CR2_INTO(%rdi)	/* Can clobber any volatile register if pv */
+	GET_CR2_INTO(%rdi)	/* can clobber %rax if pv */
 	call early_make_pgtable
 	andl %eax,%eax
 	jz 20f			/* All good */
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -370,7 +370,7 @@ struct paravirt_patch_template pv_ops =
 	.mmu.exit_mmap		= paravirt_nop,
 
 #ifdef CONFIG_PARAVIRT_XXL
-	.mmu.read_cr2		= native_read_cr2,
+	.mmu.read_cr2		= __PV_IS_CALLEE_SAVE(native_read_cr2),
 	.mmu.write_cr2		= native_write_cr2,
 	.mmu.read_cr3		= __native_read_cr3,
 	.mmu.write_cr3		= native_write_cr3,
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -998,7 +998,8 @@ void __init xen_setup_vcpu_info_placemen
 			__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
 		pv_ops.irq.irq_enable =
 			__PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
-		pv_ops.mmu.read_cr2 = xen_read_cr2_direct;
+		pv_ops.mmu.read_cr2 =
+			__PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
 	}
 }
 
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1307,16 +1307,6 @@ static void xen_write_cr2(unsigned long
 	this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
 }
 
-static unsigned long xen_read_cr2(void)
-{
-	return this_cpu_read(xen_vcpu)->arch.cr2;
-}
-
-unsigned long xen_read_cr2_direct(void)
-{
-	return this_cpu_read(xen_vcpu_info.arch.cr2);
-}
-
 static noinline void xen_flush_tlb(void)
 {
 	struct mmuext_op *op;
@@ -2397,7 +2387,7 @@ static void xen_leave_lazy_mmu(void)
 }
 
 static const struct pv_mmu_ops xen_mmu_ops __initconst = {
-	.read_cr2 = xen_read_cr2,
+	.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
 	.write_cr2 = xen_write_cr2,
 
 	.read_cr3 = xen_read_cr3,
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -10,6 +10,7 @@
 #include <asm/percpu.h>
 #include <asm/processor-flags.h>
 #include <asm/frame.h>
+#include <asm/asm.h>
 
 #include <linux/linkage.h>
 
@@ -135,3 +136,19 @@ ENTRY(check_events)
 	FRAME_END
 	ret
 ENDPROC(check_events)
+
+ENTRY(xen_read_cr2)
+	FRAME_BEGIN
+	_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+	_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+	FRAME_END
+	ret
+	ENDPROC(xen_read_cr2);
+
+ENTRY(xen_read_cr2_direct)
+	FRAME_BEGIN
+	_ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+	FRAME_END
+	ret
+	ENDPROC(xen_read_cr2_direct);
+
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -134,6 +134,9 @@ __visible void xen_irq_disable_direct(vo
 __visible unsigned long xen_save_fl_direct(void);
 __visible void xen_restore_fl_direct(unsigned long);
 
+__visible unsigned long xen_read_cr2(void);
+__visible unsigned long xen_read_cr2_direct(void);
+
 /* These are not functions, and cannot be called normally */
 __visible void xen_iret(void);
 __visible void xen_sysret32(void);



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 2/6] x86/entry/32: Simplify common_exception
  2019-07-11 11:40 [PATCH v3 0/6] Tracing vs CR2 Peter Zijlstra
  2019-07-11 11:40 ` [PATCH v3 1/6] x86/paravirt: Make read_cr2() CALLEE_SAVE Peter Zijlstra
@ 2019-07-11 11:40 ` Peter Zijlstra
  2019-07-17 21:23   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
  2019-07-11 11:40 ` [PATCH v3 3/6] x86/entry/64: Simplify idtentry a little Peter Zijlstra
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-11 11:40 UTC (permalink / raw)
  To: tglx, bp, mingo, rostedt, luto, torvalds
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel, devel, peterz

By adding one more option to SAVE_ALL we can make use of it in
common_exception and simplify things. This saves duplication later
where page_fault will no longer use common_exception.

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/entry_32.S |   36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -294,9 +294,11 @@
 .Lfinished_frame_\@:
 .endm
 
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
 	cld
+.if \skip_gs == 0
 	PUSH_GS
+.endif
 	FIXUP_FRAME
 	pushl	%fs
 	pushl	%es
@@ -313,13 +315,13 @@
 	movl	%edx, %es
 	movl	$(__KERNEL_PERCPU), %edx
 	movl	%edx, %fs
+.if \skip_gs == 0
 	SET_KERNEL_GS %edx
-
+.endif
 	/* Switch to kernel stack if necessary */
 .if \switch_stacks > 0
 	SWITCH_TO_KERNEL_STACK
 .endif
-
 .endm
 
 .macro SAVE_ALL_NMI cr3_reg:req
@@ -1448,32 +1450,20 @@ END(page_fault)
 
 common_exception:
 	/* the function address is in %gs's slot on the stack */
-	FIXUP_FRAME
-	pushl	%fs
-	pushl	%es
-	pushl	%ds
-	pushl	%eax
-	movl	$(__USER_DS), %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	$(__KERNEL_PERCPU), %eax
-	movl	%eax, %fs
-	pushl	%ebp
-	pushl	%edi
-	pushl	%esi
-	pushl	%edx
-	pushl	%ecx
-	pushl	%ebx
-	SWITCH_TO_KERNEL_STACK
+	SAVE_ALL switch_stacks=1 skip_gs=1
 	ENCODE_FRAME_POINTER
-	cld
 	UNWIND_ESPFIX_STACK
+
+	/* fixup %gs */
 	GS_TO_REG %ecx
 	movl	PT_GS(%esp), %edi		# get the function address
-	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
-	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
 	REG_TO_PTGS %ecx
 	SET_KERNEL_GS %ecx
+
+	/* fixup orig %eax */
+	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
+	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
+
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
 	CALL_NOSPEC %edi



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 3/6] x86/entry/64: Simplify idtentry a little
  2019-07-11 11:40 [PATCH v3 0/6] Tracing vs CR2 Peter Zijlstra
  2019-07-11 11:40 ` [PATCH v3 1/6] x86/paravirt: Make read_cr2() CALLEE_SAVE Peter Zijlstra
  2019-07-11 11:40 ` [PATCH v3 2/6] x86/entry/32: Simplify common_exception Peter Zijlstra
@ 2019-07-11 11:40 ` Peter Zijlstra
  2019-07-17 21:24   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
  2019-07-11 11:40 ` [PATCH v3 4/6] x86/entry/64: Update comments and sanity tests for create_gap Peter Zijlstra
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-11 11:40 UTC (permalink / raw)
  To: tglx, bp, mingo, rostedt, luto, torvalds
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel, devel, peterz

There's a bunch of duplication in idtentry, namely the
.Lfrom_usermode_switch_stack is a paranoid=0 copy of the normal flow.

Make this explicit by creating a idtentry_part helper macro.

Acked-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/entry_64.S |  102 +++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 54 deletions(-)

--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -864,6 +864,52 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
 
+.macro idtentry_part do_sym, has_error_code:req, paranoid:req, shift_ist=-1, ist_offset=0
+
+	.if \paranoid
+	call	paranoid_entry
+	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
+	.else
+	call	error_entry
+	.endif
+	UNWIND_HINT_REGS
+
+	.if \paranoid
+	.if \shift_ist != -1
+	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
+	.else
+	TRACE_IRQS_OFF
+	.endif
+	.endif
+
+	movq	%rsp, %rdi			/* pt_regs pointer */
+
+	.if \has_error_code
+	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
+	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
+	.else
+	xorl	%esi, %esi			/* no error code */
+	.endif
+
+	.if \shift_ist != -1
+	subq	$\ist_offset, CPU_TSS_IST(\shift_ist)
+	.endif
+
+	call	\do_sym
+
+	.if \shift_ist != -1
+	addq	$\ist_offset, CPU_TSS_IST(\shift_ist)
+	.endif
+
+	.if \paranoid
+	/* this procedure expect "no swapgs" flag in ebx */
+	jmp	paranoid_exit
+	.else
+	jmp	error_exit
+	.endif
+
+.endm
+
 /**
  * idtentry - Generate an IDT entry stub
  * @sym:		Name of the generated entry point
@@ -934,47 +980,7 @@ ENTRY(\sym)
 .Lfrom_usermode_no_gap_\@:
 	.endif
 
-	.if \paranoid
-	call	paranoid_entry
-	.else
-	call	error_entry
-	.endif
-	UNWIND_HINT_REGS
-	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
-
-	.if \paranoid
-	.if \shift_ist != -1
-	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
-	.else
-	TRACE_IRQS_OFF
-	.endif
-	.endif
-
-	movq	%rsp, %rdi			/* pt_regs pointer */
-
-	.if \has_error_code
-	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
-	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
-	.else
-	xorl	%esi, %esi			/* no error code */
-	.endif
-
-	.if \shift_ist != -1
-	subq	$\ist_offset, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	call	\do_sym
-
-	.if \shift_ist != -1
-	addq	$\ist_offset, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	/* these procedures expect "no swapgs" flag in ebx */
-	.if \paranoid
-	jmp	paranoid_exit
-	.else
-	jmp	error_exit
-	.endif
+	idtentry_part \do_sym, \has_error_code, \paranoid, \shift_ist, \ist_offset
 
 	.if \paranoid == 1
 	/*
@@ -983,21 +989,9 @@ ENTRY(\sym)
 	 * run in real process context if user_mode(regs).
 	 */
 .Lfrom_usermode_switch_stack_\@:
-	call	error_entry
-
-	movq	%rsp, %rdi			/* pt_regs pointer */
-
-	.if \has_error_code
-	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
-	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
-	.else
-	xorl	%esi, %esi			/* no error code */
+	idtentry_part \do_sym, \has_error_code, paranoid=0
 	.endif
 
-	call	\do_sym
-
-	jmp	error_exit
-	.endif
 _ASM_NOKPROBE(\sym)
 END(\sym)
 .endm



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 4/6] x86/entry/64: Update comments and sanity tests for create_gap
  2019-07-11 11:40 [PATCH v3 0/6] Tracing vs CR2 Peter Zijlstra
                   ` (2 preceding siblings ...)
  2019-07-11 11:40 ` [PATCH v3 3/6] x86/entry/64: Simplify idtentry a little Peter Zijlstra
@ 2019-07-11 11:40 ` Peter Zijlstra
  2019-07-17 21:25   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
  2019-07-11 11:40 ` [PATCH v3 5/6] x86/mm, tracing: Fix CR2 corruption Peter Zijlstra
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-11 11:40 UTC (permalink / raw)
  To: tglx, bp, mingo, rostedt, luto, torvalds
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel, devel, peterz

Commit 2700fefdb2d9 ("x86_64: Add gap to int3 to allow for call
emulation") forgot to update the comment, do so now.

Acked-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/entry_64.S |   17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -913,15 +913,16 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
 /**
  * idtentry - Generate an IDT entry stub
  * @sym:		Name of the generated entry point
- * @do_sym: 		C function to be called
- * @has_error_code: 	True if this IDT vector has an error code on the stack
- * @paranoid: 		non-zero means that this vector may be invoked from
+ * @do_sym:		C function to be called
+ * @has_error_code:	True if this IDT vector has an error code on the stack
+ * @paranoid:		non-zero means that this vector may be invoked from
  *			kernel mode with user GSBASE and/or user CR3.
  *			2 is special -- see below.
  * @shift_ist:		Set to an IST index if entries from kernel mode should
- *             		decrement the IST stack so that nested entries get a
+ *			decrement the IST stack so that nested entries get a
  *			fresh stack.  (This is for #DB, which has a nasty habit
- *             		of recursing.)
+ *			of recursing.)
+ * @create_gap:		create a 6-word stack gap when coming from kernel mode.
  *
  * idtentry generates an IDT stub that sets up a usable kernel context,
  * creates struct pt_regs, and calls @do_sym.  The stub has the following
@@ -951,10 +952,14 @@ ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
 	/* Sanity check */
-	.if \shift_ist != -1 && \paranoid == 0
+	.if \shift_ist != -1 && \paranoid != 1
 	.error "using shift_ist requires paranoid=1"
 	.endif
 
+	.if \create_gap && \paranoid
+	.error "using create_gap requires paranoid=0"
+	.endif
+
 	ASM_CLAC
 
 	.if \has_error_code == 0



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 5/6] x86/mm, tracing: Fix CR2 corruption
  2019-07-11 11:40 [PATCH v3 0/6] Tracing vs CR2 Peter Zijlstra
                   ` (3 preceding siblings ...)
  2019-07-11 11:40 ` [PATCH v3 4/6] x86/entry/64: Update comments and sanity tests for create_gap Peter Zijlstra
@ 2019-07-11 11:40 ` Peter Zijlstra
  2019-07-17 21:25   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
  2019-07-11 11:41 ` [PATCH v3 6/6] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Peter Zijlstra
  2019-07-16 19:33 ` [PATCH v3 0/6] Tracing vs CR2 Vegard Nossum
  6 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-11 11:40 UTC (permalink / raw)
  To: tglx, bp, mingo, rostedt, luto, torvalds
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel, devel, peterz

Despire the current efforts to read CR2 before tracing happens there
still exist a number of possible holes:

  idtentry page_fault             do_page_fault           has_error_code=1
    call error_entry
      TRACE_IRQS_OFF
        call trace_hardirqs_off*
          #PF // modifies CR2

      CALL_enter_from_user_mode
        __context_tracking_exit()
          trace_user_exit(0)
            #PF // modifies CR2

    call do_page_fault
      address = read_cr2(); /* whoopsie */

And similar for i386.

Fix it by pulling the CR2 read into the entry code, before any of that
stuff gets a chance to run and ruin things.

Reported-by: He Zhe <zhe.he@windriver.com>
Reported-by: Eiichi Tsukata <devel@etsukata.com>
Debugged-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/entry_32.S       |   25 ++++++++++++++++++++++---
 arch/x86/entry/entry_64.S       |   35 ++++++++++++++++++-----------------
 arch/x86/include/asm/kvm_para.h |    2 +-
 arch/x86/include/asm/traps.h    |    4 ++--
 arch/x86/kernel/kvm.c           |    8 ++++----
 arch/x86/kernel/traps.c         |    6 +-----
 arch/x86/mm/fault.c             |   30 +++++++++++-------------------
 7 files changed, 59 insertions(+), 51 deletions(-)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1443,9 +1443,28 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vec
 
 ENTRY(page_fault)
 	ASM_CLAC
-	pushl	$do_page_fault
-	ALIGN
-	jmp common_exception
+	pushl	$0; /* %gs's slot on the stack */
+
+	SAVE_ALL switch_stacks=1 skip_gs=1
+
+	ENCODE_FRAME_POINTER
+	UNWIND_ESPFIX_STACK
+
+	/* fixup %gs */
+	GS_TO_REG %ecx
+	REG_TO_PTGS %ecx
+	SET_KERNEL_GS %ecx
+
+	GET_CR2_INTO(%ecx)			# might clobber %eax
+
+	/* fixup orig %eax */
+	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
+	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
+
+	TRACE_IRQS_OFF
+	movl	%esp, %eax			# pt_regs pointer
+	call	do_page_fault
+	jmp	ret_from_exception
 END(page_fault)
 
 common_exception:
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -864,7 +864,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
 
-.macro idtentry_part do_sym, has_error_code:req, paranoid:req, shift_ist=-1, ist_offset=0
+.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
 
 	.if \paranoid
 	call	paranoid_entry
@@ -874,12 +874,21 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
 	.endif
 	UNWIND_HINT_REGS
 
-	.if \paranoid
+	.if \read_cr2
+	GET_CR2_INTO(%rdx);			/* can clobber %rax */
+	.endif
+
 	.if \shift_ist != -1
 	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
 	.else
 	TRACE_IRQS_OFF
 	.endif
+
+	.if \paranoid == 0
+	testb	$3, CS(%rsp)
+	jz	.Lfrom_kernel_no_context_tracking_\@
+	CALL_enter_from_user_mode
+.Lfrom_kernel_no_context_tracking_\@:
 	.endif
 
 	movq	%rsp, %rdi			/* pt_regs pointer */
@@ -923,6 +932,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
  *			fresh stack.  (This is for #DB, which has a nasty habit
  *			of recursing.)
  * @create_gap:		create a 6-word stack gap when coming from kernel mode.
+ * @read_cr2:		load CR2 into the 3rd argument; done before calling any C code
  *
  * idtentry generates an IDT stub that sets up a usable kernel context,
  * creates struct pt_regs, and calls @do_sym.  The stub has the following
@@ -947,7 +957,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
@@ -985,7 +995,7 @@ ENTRY(\sym)
 .Lfrom_usermode_no_gap_\@:
 	.endif
 
-	idtentry_part \do_sym, \has_error_code, \paranoid, \shift_ist, \ist_offset
+	idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
 
 	.if \paranoid == 1
 	/*
@@ -994,7 +1004,7 @@ ENTRY(\sym)
 	 * run in real process context if user_mode(regs).
 	 */
 .Lfrom_usermode_switch_stack_\@:
-	idtentry_part \do_sym, \has_error_code, paranoid=0
+	idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0
 	.endif
 
 _ASM_NOKPROBE(\sym)
@@ -1006,7 +1016,7 @@ idtentry overflow			do_overflow			has_er
 idtentry bounds				do_bounds			has_error_code=0
 idtentry invalid_op			do_invalid_op			has_error_code=0
 idtentry device_not_available		do_device_not_available		has_error_code=0
-idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2
+idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2 read_cr2=1
 idtentry coprocessor_segment_overrun	do_coprocessor_segment_overrun	has_error_code=0
 idtentry invalid_TSS			do_invalid_TSS			has_error_code=1
 idtentry segment_not_present		do_segment_not_present		has_error_code=1
@@ -1179,10 +1189,10 @@ idtentry xenint3		do_int3			has_error_co
 #endif
 
 idtentry general_protection	do_general_protection	has_error_code=1
-idtentry page_fault		do_page_fault		has_error_code=1
+idtentry page_fault		do_page_fault		has_error_code=1	read_cr2=1
 
 #ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault	do_async_page_fault	has_error_code=1
+idtentry async_page_fault	do_async_page_fault	has_error_code=1	read_cr2=1
 #endif
 
 #ifdef CONFIG_X86_MCE
@@ -1281,18 +1291,9 @@ ENTRY(error_entry)
 	movq	%rax, %rsp			/* switch stack */
 	ENCODE_FRAME_POINTER
 	pushq	%r12
-
-	/*
-	 * We need to tell lockdep that IRQs are off.  We can't do this until
-	 * we fix gsbase, and we should do it before enter_from_user_mode
-	 * (which can take locks).
-	 */
-	TRACE_IRQS_OFF
-	CALL_enter_from_user_mode
 	ret
 
 .Lerror_entry_done:
-	TRACE_IRQS_OFF
 	ret
 
 	/*
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, i
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
 extern void kvm_disable_steal_time(void);
-void do_async_page_fault(struct pt_regs *regs, unsigned long error_code);
+void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 void __init kvm_spinlock_init(void);
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -74,14 +74,14 @@ dotraplinkage void do_invalid_TSS(struct
 dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
 dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
 #ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code);
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address);
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
 struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
 void __init trap_init(void);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
 dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
 dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -242,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_
 NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
 
 dotraplinkage void
-do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 {
 	enum ctx_state prev_state;
 
 	switch (kvm_read_and_reset_pf_reason()) {
 	default:
-		do_page_fault(regs, error_code);
+		do_page_fault(regs, error_code, address);
 		break;
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
 		/* page is swapped out by the host. */
 		prev_state = exception_enter();
-		kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
+		kvm_async_pf_task_wait((u32)address, !user_mode(regs));
 		exception_exit(prev_state);
 		break;
 	case KVM_PV_REASON_PAGE_READY:
 		rcu_irq_enter();
-		kvm_async_pf_task_wake((u32)read_cr2());
+		kvm_async_pf_task_wake((u32)address);
 		rcu_irq_exit();
 		break;
 	}
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_o
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
 {
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
-#ifdef CONFIG_VMAP_STACK
-	unsigned long cr2;
-#endif
 
 #ifdef CONFIG_X86_ESPFIX64
 	extern unsigned char native_irq_return_iret[];
@@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struc
 	 * stack even if the actual trigger for the double fault was
 	 * something else.
 	 */
-	cr2 = read_cr2();
 	if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
 		handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
 #endif
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1509,9 +1509,8 @@ void do_user_addr_fault(struct pt_regs *
 NOKPROBE_SYMBOL(do_user_addr_fault);
 
 /*
- * This routine handles page faults.  It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
+ * Explicitly maked noinline such that the function tracer sees this as the
+ * page_fault entry point.
  */
 static noinline void
 __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
@@ -1530,33 +1529,26 @@ __do_page_fault(struct pt_regs *regs, un
 }
 NOKPROBE_SYMBOL(__do_page_fault);
 
-static nokprobe_inline void
-trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
-			 unsigned long error_code)
+static __always_inline void
+trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
+			 unsigned long address)
 {
+	if (!trace_pagefault_enabled())
+		return;
+
 	if (user_mode(regs))
 		trace_page_fault_user(address, regs, error_code);
 	else
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
-/*
- * We must have this function blacklisted from kprobes, tagged with notrace
- * and call read_cr2() before calling anything else. To avoid calling any
- * kind of tracing machinery before we've observed the CR2 value.
- *
- * exception_{enter,exit}() contains all sorts of tracepoints.
- */
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+dotraplinkage void
+do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 {
-	unsigned long address = read_cr2(); /* Get the faulting address */
 	enum ctx_state prev_state;
 
 	prev_state = exception_enter();
-	if (trace_pagefault_enabled())
-		trace_page_fault_entries(address, regs, error_code);
-
+	trace_page_fault_entries(regs, error_code, address);
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v3 6/6] x86/entry/64: Remove TRACE_IRQS_*_DEBUG
  2019-07-11 11:40 [PATCH v3 0/6] Tracing vs CR2 Peter Zijlstra
                   ` (4 preceding siblings ...)
  2019-07-11 11:40 ` [PATCH v3 5/6] x86/mm, tracing: Fix CR2 corruption Peter Zijlstra
@ 2019-07-11 11:41 ` Peter Zijlstra
  2019-07-11 14:45   ` Andy Lutomirski
  2019-07-16 19:33 ` [PATCH v3 0/6] Tracing vs CR2 Vegard Nossum
  6 siblings, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-11 11:41 UTC (permalink / raw)
  To: tglx, bp, mingo, rostedt, luto, torvalds
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel, devel, peterz

Since INT3/#BP no longer runs on an IST, this workaround is no longer
required.

Tested by running lockdep+ftrace as described in the initial commit:

  5963e317b1e9 ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep")

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/entry_64.S |   46 ++--------------------------------------------
 1 file changed, 2 insertions(+), 44 deletions(-)

--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -67,44 +67,6 @@ END(native_usergs_sysret64)
 .endm
 
 /*
- * When dynamic function tracer is enabled it will add a breakpoint
- * to all locations that it is about to modify, sync CPUs, update
- * all the code, sync CPUs, then remove the breakpoints. In this time
- * if lockdep is enabled, it might jump back into the debug handler
- * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
- *
- * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
- * make sure the stack pointer does not get reset back to the top
- * of the debug stack, and instead just reuses the current stack.
- */
-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
-
-.macro TRACE_IRQS_OFF_DEBUG
-	call	debug_stack_set_zero
-	TRACE_IRQS_OFF
-	call	debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_ON_DEBUG
-	call	debug_stack_set_zero
-	TRACE_IRQS_ON
-	call	debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_IRETQ_DEBUG
-	btl	$9, EFLAGS(%rsp)		/* interrupts off? */
-	jnc	1f
-	TRACE_IRQS_ON_DEBUG
-1:
-.endm
-
-#else
-# define TRACE_IRQS_OFF_DEBUG			TRACE_IRQS_OFF
-# define TRACE_IRQS_ON_DEBUG			TRACE_IRQS_ON
-# define TRACE_IRQS_IRETQ_DEBUG			TRACE_IRQS_IRETQ
-#endif
-
-/*
  * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
  * This is the only entry point used for 64-bit system calls.  The
@@ -878,11 +840,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
 	GET_CR2_INTO(%rdx);			/* can clobber %rax */
 	.endif
 
-	.if \shift_ist != -1
-	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
-	.else
 	TRACE_IRQS_OFF
-	.endif
 
 	.if \paranoid == 0
 	testb	$3, CS(%rsp)
@@ -1248,7 +1206,7 @@ END(paranoid_entry)
 ENTRY(paranoid_exit)
 	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF_DEBUG
+	TRACE_IRQS_OFF
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	.Lparanoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ
@@ -1257,7 +1215,7 @@ ENTRY(paranoid_exit)
 	SWAPGS_UNSAFE_STACK
 	jmp	.Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
-	TRACE_IRQS_IRETQ_DEBUG
+	TRACE_IRQS_IRETQ
 	/* Always restore stashed CR3 value (see paranoid_entry) */
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 6/6] x86/entry/64: Remove TRACE_IRQS_*_DEBUG
  2019-07-11 11:41 ` [PATCH v3 6/6] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Peter Zijlstra
@ 2019-07-11 14:45   ` Andy Lutomirski
  2019-07-11 18:28     ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Andy Lutomirski @ 2019-07-11 14:45 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Borislav Petkov, Ingo Molnar, Steven Rostedt,
	Andrew Lutomirski, Linus Torvalds, H. Peter Anvin, Dave Hansen,
	Juergen Gross, LKML, He Zhe, Joel Fernandes, devel

On Thu, Jul 11, 2019 at 4:51 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> Since INT3/#BP no longer runs on an IST, this workaround is no longer
> required.
>
> Tested by running lockdep+ftrace as described in the initial commit:
>
>   5963e317b1e9 ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep")
>
> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

I would definitely like to see this happen, but there are all kinds of
possibly nasties here.  Ideally we'd like get rid of IST for #DB, but
we can't due to the MOV SS mess.  There are a few relevant cases we
care about:

#DB from user mode -> anything that hits in C code: irrelevant --
we've exited the IST stack already.

#DB from user mode -> NMI/MCE in the asm -> #DB: The NMI code tries to
get this right.  The MCE code does not.

#DB from kernel mode -> NMI/MCE -> #DB: same as above.

MOV SS -> #DB from entry -> #DB again: ugh.  We get some protection
from shift_ist.

IMO we would ideally just clear DR7 in sensitive contexts.  Or extend
the debug_stack_set_zero(), etc hack.

All that being said, the actual _DEBUG macros shouldn't matter here, I
think.  But I'd like to sleep on it.   So not-yet-acked-by me.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 6/6] x86/entry/64: Remove TRACE_IRQS_*_DEBUG
  2019-07-11 14:45   ` Andy Lutomirski
@ 2019-07-11 18:28     ` Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-11 18:28 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Borislav Petkov, Ingo Molnar, Steven Rostedt,
	Linus Torvalds, H. Peter Anvin, Dave Hansen, Juergen Gross, LKML,
	He Zhe, Joel Fernandes, devel

On Thu, Jul 11, 2019 at 07:45:56AM -0700, Andy Lutomirski wrote:
> On Thu, Jul 11, 2019 at 4:51 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > Since INT3/#BP no longer runs on an IST, this workaround is no longer
> > required.
> >
> > Tested by running lockdep+ftrace as described in the initial commit:
> >
> >   5963e317b1e9 ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep")
> >
> > Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> 
> I would definitely like to see this happen, but there are all kinds of
> possibly nasties here.  Ideally we'd like get rid of IST for #DB, but
> we can't due to the MOV SS mess.  There are a few relevant cases we
> care about:
> 
> #DB from user mode -> anything that hits in C code: irrelevant --
> we've exited the IST stack already.
> 
> #DB from user mode -> NMI/MCE in the asm -> #DB: The NMI code tries to
> get this right.  The MCE code does not.
> 
> #DB from kernel mode -> NMI/MCE -> #DB: same as above.
> 
> MOV SS -> #DB from entry -> #DB again: ugh.  We get some protection
> from shift_ist.
> 
> IMO we would ideally just clear DR7 in sensitive contexts.  Or extend
> the debug_stack_set_zero(), etc hack.
> 
> All that being said, the actual _DEBUG macros shouldn't matter here, I
> think.  But I'd like to sleep on it.   So not-yet-acked-by me.

How about something lovely like:

#DB from kernel space; in say lockdep.
the #DB entry calls back into lockdep through trace_irq
which then hits the same #DB

and we get recursive #DB.

Now, I don't think we can actually make that happen, because most/all
the relevant functions have NOKPROBE_SYMBOL() on. Even the idtentry
generates _ASM_NOKPROBE().

Still, it might make sense to have #DB itself clear/restore DR7 if it
doesn't already.

Also, the comment on do_debug() seems wrong; we can set watchpoints on
kernel text just fine these days.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-11 11:40 [PATCH v3 0/6] Tracing vs CR2 Peter Zijlstra
                   ` (5 preceding siblings ...)
  2019-07-11 11:41 ` [PATCH v3 6/6] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Peter Zijlstra
@ 2019-07-16 19:33 ` Vegard Nossum
  2019-07-16 21:51   ` Vegard Nossum
  2019-07-17  8:07   ` Peter Zijlstra
  6 siblings, 2 replies; 29+ messages in thread
From: Vegard Nossum @ 2019-07-16 19:33 UTC (permalink / raw)
  To: Peter Zijlstra, tglx, bp, mingo, rostedt, luto, torvalds
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel, devel


On 7/11/19 1:40 PM, Peter Zijlstra wrote:
> Hi,
> 
> Here's the latest (and hopefully final) set of tracing vs CR2 patches.
> 
> They are basically the same as v2, with only minor edits and tags collected
> from the last review.
> 
> Please consider.
> 

Hi,

I ran my own battery of tests on your patch set on top of 
5ad18b2e60b75c7297a998dea702451d33a052ed and ran into this:

------------[ cut here ]------------
General protection fault in user access. Non-canonical address?
WARNING: CPU: 0 PID: 5039 at arch/x86/mm/extable.c:126 
ex_handler_uaccess+0x5d/0x70
CPU: 0 PID: 5039 Comm: init Not tainted 5.2.0+ #124
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
RIP: 0010:ex_handler_uaccess+0x5d/0x70
Code: 5d 41 5c c3 e8 c4 8e 0e 00 80 3d e5 74 1e 01 00 75 d3 e8 b6 8e 0e 
00 48 c7 c7 10 a7 fb 81 c6 05 d0 74 1e 01 01 e8 d1 43 01 00 <0f> 0b eb 
b7 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 55 48 89
RSP: 0000:fffffe000000fc48 EFLAGS: 00010086
RAX: 0000000000000000 RBX: ffffffff81c07dac RCX: ffffffff811a887c
RDX: 0000000000000000 RSI: ffffffff8289f05f RDI: 0000000000000093
RBP: fffffe000000fcb8 R08: 00000036fe0f15d3 R09: 000000000000003f
R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000d
R13: 000000000000000d R14: 0000000000000000 R15: 0000000000000000
FS:  00005555563ab8c0(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000001ff7 CR3: 000000003c804002 CR4: 00000000003606f0
DR0: 0000000040209100 DR1: 00000000402091a1 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff1 DR7: 00000000000b062a
Call Trace:
  <#DB>
  fixup_exception+0x50/0x6a
  do_general_protection+0x40/0x160
  general_protection+0x2d/0x40
RIP: 0010:arch_stack_walk_user+0x71/0x100
Code: 00 48 83 e8 10 49 39 c4 77 45 4c 8b 04 24 4c 89 e3 4d 89 fd 4c 89 
fd 41 83 87 98 0a 00 00 01 0f 01 cb 0f ae e8 31 c0 4c 89 e2 <4c> 8b 33 
4d 89 f4 85 c0 75 7a 48 8b 73 08 0f 01 ca 85 c0 74 1f 65
RSP: 0000:fffffe000000fd68 EFLAGS: 00050046
RAX: 0000000000000000 RBX: 854163717acc2789 RCX: ffffffff811ca27b
RDX: 854163717acc2789 RSI: 0000000040209102 RDI: fffffe000000fdb8
RBP: ffff88803d55d040 R08: ffffc9000520bf58 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 854163717acc2789
R13: ffff88803d55d040 R14: 0000000000000093 R15: ffff88803d55d040
  ? stack_trace_consume_entry+0x4b/0x80
  ? arch_stack_walk_user+0x34/0x100
  ? profile_setup.cold+0xc1/0xc1
  stack_trace_save_user+0x71/0x9c
  trace_buffer_unlock_commit_regs+0x1ae/0x270
  trace_event_buffer_commit+0x90/0x240
  trace_event_raw_event_preemptirq_template+0x9a/0x100
  ? debug+0x16/0x70
  ? perf_trace_preemptirq_template+0x120/0x120
  ? trace_hardirqs_off_thunk+0x1a/0x1c
  trace_hardirqs_off_caller+0xf4/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? debug+0x11/0x70
  debug+0x16/0x70
RIP: 0010:copy_user_generic_unrolled+0xa0/0xc0
Code: 7f 40 ff c9 75 b6 89 d1 83 e2 07 c1 e9 03 74 12 4c 8b 06 4c 89 07 
48 8d 76 08 48 8d 7f 08 ff c9 75 ee 21 d2 74 10 89 d1 8a 06 <88> 07 48 
ff c6 48 ff c7 ff c9 75 f2 31 c0 0f 01 ca c3 0f 1f 40 00
RSP: 0000:ffffc9000520be38 EFLAGS: 00040202
RAX: ffff88803d55d09c RBX: ffff88803d55d040 RCX: 0000000000000001
RDX: 0000000000000001 RSI: 0000000040209102 RDI: ffffc9000520be76
RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 00007ffffffff000
R13: 0000000040209102 R14: ffffc9000520be76 R15: 0000000000000000
  </#DB>
  __probe_kernel_read+0x57/0x90
  is_prefetch.isra.0+0xb5/0x210
  ? tracer_hardirqs_on+0x53/0x1a0
  __bad_area_nosemaphore+0x9e/0x220
  __do_page_fault+0x483/0x630
  ? async_page_fault+0x8/0x40
  async_page_fault+0x36/0x40
RIP: 0033:0x40209102
Code: 00 00 49 bc 00 20 23 40 00 00 00 00 49 bd 00 00 d0 40 00 00 00 00 
49 be ff ff ff ff ff ff ff ff 49 bf 00 50 80 40 00 00 00 00 <9c> 48 81 
0c 24 00 04 00 00 48 81 0c 24 00 00 04 00 9d ff 2c 25 00
RSP: 002b:0000000000001fff EFLAGS: 00010217
RAX: 0000000000000000 RBX: 00000000402090b0 RCX: 0000000000000001
RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000041ebb000
RBP: 854163717acc2789 R08: 0000000000000001 R09: b1f39cc399a61ebb
R10: 00007ffeab175000 R11: 0000000000000360 R12: 0000000040232000
R13: 0000000040d00000 R14: ffffffffffffffff R15: 0000000040805000
---[ end trace e5e49800ff5aa5ed ]---
PANIC: double fault, error_code: 0x0
CPU: 0 PID: 5039 Comm: init Tainted: G        W         5.2.0+ #124
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f008 EFLAGS: 00010093
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f088 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
FS:  00005555563ab8c0(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: fffffe000000eff8 CR3: 000000003c804002 CR4: 00000000003606f0
DR0: 0000000040209100 DR1: 00000000402091a1 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 00000000000b062a
Call Trace:
  <#DB>
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f148 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f1c8 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f288 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f308 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f3c8 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f448 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f508 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f588 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f648 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f6c8 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f788 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f808 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f8c8 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f948 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000fa08 EFLAGS: 00010083 ORIG_RAX: 0000000000000000
RAX: 0000000000006004 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000006004 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000fa88 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  async_page_fault+0x16/0x40
RIP: 0010:fixup_bad_iret+0x6/0x50
Code: 2d a8 00 00 00 48 39 f8 74 0b b9 15 00 00 00 48 89 c7 f3 48 a5 c3 
0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 41 54 55 48 89 fd <65> 48 8b 
3d a6 31 f2 7e 48 8b b5 a0 00 00 00 4c 8d a7 50 ff ff ff
RSP: 0000:fffffe000000fb48 EFLAGS: 00010082 ORIG_RAX: 0000000000000000
RAX: 800000003c804002 RBX: 0000000000000000 RCX: ffffffff81a00b97
RDX: 0000000000000000 RSI: ffffffff81a013a8 RDI: fffffe000000fb60
RBP: fffffe000000fb60 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? native_iret+0x7/0x7
  ? general_protection+0x8/0x40
  error_entry+0xe5/0xf0
RIP: 0010:native_irq_return_iret+0x0/0x2
Code: 5b 41 5b 41 5a 41 59 41 58 58 59 5a 5e 5f 48 83 c4 08 e9 0c 00 00 
00 90 90 66 2e 0f 1f 84 00 00 00 00 00 f6 44 24 20 04 75 02 <48> cf 57 
0f 01 f8 0f 1f 00 66 90 0f 20 df 48 0f ba ef 3f 48 81 e7
RSP: 0000:fffffe000000fc18 EFLAGS: 00010046 ORIG_RAX: 0000000000000000
RAX: fffffe000000fe08 RBX: ffffffff81c07dac RCX: ffff88803c824000
RDX: ffffffff8126a228 RSI: 0000000040209100 RDI: 0000000000000000
RBP: fffffe000000fcb8 R08: fffffe000000fec0 R09: ffffffff8125b177
R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000d
R13: 000000000000000d R14: 0000000000000000 R15: 0000000000000000
  ? general_protection+0x8/0x40
  ? perf_exclude_event+0x67/0x90
  ? perf_bp_event+0x98/0xe0
RIP: 0000:0x2
Code: Bad RIP value.
RSP: 0000:0000000005080021 EFLAGS: 00000000
  ? ex_handler_uaccess+0x5d/0x70
  ? fixup_exception+0x50/0x6a
  ? do_general_protection+0x40/0x160
  ? general_protection+0x2d/0x40
  ? stack_trace_consume_entry+0x4b/0x80
  ? arch_stack_walk_user+0x71/0x100
  ? arch_stack_walk_user+0x34/0x100
  ? profile_setup.cold+0xc1/0xc1
  ? stack_trace_save_user+0x71/0x9c
  ? __this_cpu_preempt_check+0xc/0xc6
  ? hw_breakpoint_exceptions_notify+0x120/0x1c0
  ? notifier_call_chain+0x8e/0xb0
  ? atomic_notifier_call_chain+0x37/0x40
  ? notify_die+0x5c/0x80
  ? trace_hardirqs_off_caller+0x20/0x150
  ? trace_hardirqs_off_thunk+0x1a/0x1c
  ? debug_smp_processor_id+0x28/0xd0
  ? paranoid_exit+0xb/0xb0
  ? copy_user_enhanced_fast_string+0xe/0x20
  </#DB>
WARNING: stack recursion on stack type 9
Kernel panic - not syncing: Machine halted.
CPU: 0 PID: 5039 Comm: init Tainted: G        W         5.2.0+ #124
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
Call Trace:
  <#DF>
  dump_stack+0xe1/0x133
  panic+0x159/0x3d8
  ? get_cpu_entry_area+0x8/0x30
  df_debug+0x24/0x2d
  do_double_fault+0x94/0xf0
  double_fault+0x2c/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f008 EFLAGS: 00010093
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f088 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  </#DF>
  <#DB>
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f148 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f1c8 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f288 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f308 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f3c8 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f448 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f508 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f588 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f648 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f6c8 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f788 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f808 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000f8c8 EFLAGS: 00010093 ORIG_RAX: 0000000000000000
RAX: 0000000000016cc0 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000016cc0 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000f948 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  async_page_fault+0x16/0x40
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50
Code: 82 e8 74 2d f8 ff 48 89 9d 10 01 00 00 48 89 ee 5b 4c 89 e7 5d 41 
5c e9 8e 5d 12 00 5b b8 f4 ff ff ff 5d 41 5c c3 0f 1f 40 00 <65> 48 8b 
04 25 c0 6c 01 00 65 8b 15 78 ba df 7e 81 e2 00 01 1f 00
RSP: 0000:fffffe000000fa08 EFLAGS: 00010083 ORIG_RAX: 0000000000000000
RAX: 0000000000006004 RBX: ffffffff81a01436 RCX: ffffffff81a00b97
RDX: 0000000000006004 RSI: ffffffff81a01428 RDI: ffffffff81a01436
RBP: fffffe000000fa88 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? async_page_fault+0x16/0x40
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  ? async_page_fault+0x16/0x40
  trace_hardirqs_off_caller+0x10/0x150
  trace_hardirqs_off_thunk+0x1a/0x1c
  ? native_iret+0x7/0x7
  ? async_page_fault+0x8/0x40
  async_page_fault+0x16/0x40
RIP: 0010:fixup_bad_iret+0x6/0x50
Code: 2d a8 00 00 00 48 39 f8 74 0b b9 15 00 00 00 48 89 c7 f3 48 a5 c3 
0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 41 54 55 48 89 fd <65> 48 8b 
3d a6 31 f2 7e 48 8b b5 a0 00 00 00 4c 8d a7 50 ff ff ff
RSP: 0000:fffffe000000fb48 EFLAGS: 00010082 ORIG_RAX: 0000000000000000
RAX: 800000003c804002 RBX: 0000000000000000 RCX: ffffffff81a00b97
RDX: 0000000000000000 RSI: ffffffff81a013a8 RDI: fffffe000000fb60
RBP: fffffe000000fb60 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  ? native_iret+0x7/0x7
  ? general_protection+0x8/0x40
  error_entry+0xe5/0xf0
RIP: 0010:native_irq_return_iret+0x0/0x2
Code: 5b 41 5b 41 5a 41 59 41 58 58 59 5a 5e 5f 48 83 c4 08 e9 0c 00 00 
00 90 90 66 2e 0f 1f 84 00 00 00 00 00 f6 44 24 20 04 75 02 <48> cf 57 
0f 01 f8 0f 1f 00 66 90 0f 20 df 48 0f ba ef 3f 48 81 e7
RSP: 0000:fffffe000000fc18 EFLAGS: 00010046 ORIG_RAX: 0000000000000000
RAX: fffffe000000fe08 RBX: ffffffff81c07dac RCX: ffff88803c824000
RDX: ffffffff8126a228 RSI: 0000000040209100 RDI: 0000000000000000
RBP: fffffe000000fcb8 R08: fffffe000000fec0 R09: ffffffff8125b177
R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000d
R13: 000000000000000d R14: 0000000000000000 R15: 0000000000000000
  ? general_protection+0x8/0x40
  ? perf_exclude_event+0x67/0x90
  ? perf_bp_event+0x98/0xe0
RIP: 0000:0x2
Code: Bad RIP value.
RSP: 0000:0000000005080021 EFLAGS: 00000000
  ? ex_handler_uaccess+0x5d/0x70
  ? fixup_exception+0x50/0x6a
  ? do_general_protection+0x40/0x160
  ? general_protection+0x2d/0x40
  ? stack_trace_consume_entry+0x4b/0x80
  ? arch_stack_walk_user+0x71/0x100
  ? arch_stack_walk_user+0x34/0x100
  ? profile_setup.cold+0xc1/0xc1
  ? stack_trace_save_user+0x71/0x9c
  ? __this_cpu_preempt_check+0xc/0xc6
  ? hw_breakpoint_exceptions_notify+0x120/0x1c0
  ? notifier_call_chain+0x8e/0xb0
  ? atomic_notifier_call_chain+0x37/0x40
  ? notify_die+0x5c/0x80
  ? trace_hardirqs_off_caller+0x20/0x150
  ? trace_hardirqs_off_thunk+0x1a/0x1c
  ? debug_smp_processor_id+0x28/0xd0
  ? paranoid_exit+0xb/0xb0
  ? copy_user_enhanced_fast_string+0xe/0x20
  </#DB>
Kernel Offset: disabled
---[ end Kernel panic - not syncing: Machine halted. ]---

There's quite a bit to unpack there... I haven't looked into it AT ALL
yet, but at least you have the report. Will try to see if I can get a
reproducible test case.


Vegard

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-16 19:33 ` [PATCH v3 0/6] Tracing vs CR2 Vegard Nossum
@ 2019-07-16 21:51   ` Vegard Nossum
  2019-07-17  1:02     ` Andy Lutomirski
  2019-07-17  9:37     ` Eiichi Tsukata
  2019-07-17  8:07   ` Peter Zijlstra
  1 sibling, 2 replies; 29+ messages in thread
From: Vegard Nossum @ 2019-07-16 21:51 UTC (permalink / raw)
  To: Peter Zijlstra, tglx, bp, mingo, rostedt, luto, torvalds, linux_lkml_grp
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel, devel


On 7/16/19 9:33 PM, Vegard Nossum wrote:
> 
> On 7/11/19 1:40 PM, Peter Zijlstra wrote:
>> Hi,
>>
>> Here's the latest (and hopefully final) set of tracing vs CR2 patches.
>>
>> They are basically the same as v2, with only minor edits and tags 
>> collected
>> from the last review.
>>
>> Please consider.
>>
> 
> Hi,
> 
> I ran my own battery of tests on your patch set on top of 
> 5ad18b2e60b75c7297a998dea702451d33a052ed and ran into this:
> 
> ------------[ cut here ]------------
> General protection fault in user access. Non-canonical address?
> WARNING: CPU: 0 PID: 5039 at arch/x86/mm/extable.c:126 
> ex_handler_uaccess+0x5d/0x70

Got a different one:

WARNING: CPU: 0 PID: 2150 at arch/x86/kernel/traps.c:791 do_debug+0xfe/0x240
CPU: 0 PID: 2150 Comm: init Not tainted 5.2.0+ #124
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
RIP: 0010:do_debug+0xfe/0x240
Code: 05 07 3d f3 7e f6 85 91 00 00 00 02 0f 85 d8 00 00 00 49 8b 84 24 
18 0b 00 00 f6 44 24 01 40 74 2f f6 85 88 00 00 00 03 75 26 <0f> 0b 80 
e4 bf 49 89 84 24 18 0b 00 00 f0 41 80 0c 24 10 48 81 a5
RSP: 0000:fffffe000000ff20 EFLAGS: 00010046
RAX: 0000000000004002 RBX: 0000000000000000 RCX: ffffffff810e2f72
RDX: 0000000000000000 RSI: 0000000000000003 RDI: ffffffff8201f090
RBP: fffffe000000ff58 R08: 0000000000000000 R09: 0000000000000005
R10: 0000000000000000 R11: 0000000000000000 R12: ffff88803e0df040
R13: 0000000000000000 R14: 000000003d376001 R15: 0000000000000000
FS:  0000555556dbc8c0(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000041f38010 CR3: 000000003d376001 CR4: 00000000003606f0
DR0: 0000000000000001 DR1: 0000000041a4f070 DR2: 00007fff959ff000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000003b3062a
Call Trace:
  <#DB>
  debug+0x2d/0x70
RIP: 0010:arch_stack_walk_user+0x74/0x100
Code: e8 10 49 39 c4 77 45 4c 8b 04 24 4c 89 e3 4d 89 fd 4c 89 fd 41 83 
87 98 0a 00 00 01 0f 01 cb 0f ae e8 31 c0 4c 89 e2 4c 8b 33 <4d> 89 f4 
85 c0 75 7a 48 8b 73 08 0f 01 ca 85 c0 74 1f 65 48 8b 04
RSP: 0000:ffffc900030dbd68 EFLAGS: 00040046
RAX: 0000000000000000 RBX: 0000000041a4f073 RCX: ffffffff811ca27b
RDX: 0000000041a4f073 RSI: 0000000041a4f0dd RDI: ffffc900030dbdb8
RBP: ffff88803e0df040 R08: ffffc900030dbf58 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000041a4f073
R13: ffff88803e0df040 R14: 0041281000bf4800 R15: ffff88803e0df040
  ? stack_trace_consume_entry+0x4b/0x80
  </#DB>
  ? profile_setup.cold+0xc1/0xc1
  stack_trace_save_user+0x71/0x9c
  trace_buffer_unlock_commit_regs+0x1ae/0x270
  trace_event_buffer_commit+0x90/0x240
  trace_event_raw_event_preemptirq_template+0x9a/0x100
  ? debug+0x49/0x70
  ? perf_trace_preemptirq_template+0x120/0x120
  ? trace_hardirqs_off_thunk+0x1a/0x1c
  trace_hardirqs_off_caller+0xf4/0x150
  ? debug+0x44/0x70
  trace_hardirqs_off_thunk+0x1a/0x1c
  debug+0x49/0x70
RIP: 0033:0x41a4f0dd
Code: 47 11 b7 d2 36 45 6c 49 be 00 f0 9f 95 ff 7f 00 00 49 bf de a7 b3 
e8 d7 21 3c 15 9c 48 81 0c 24 00 01 00 00 9d b8 62 00 00 00 <8e> c0 0f 
05 66 8c c8 9c 48 81 24 24 ff fe ff ff 9d 48 89 04 25 40
RSP: 002b:0000000040901ea0 EFLAGS: 00000317
RAX: 0000000000000062 RBX: 0000000041281000 RCX: ffffffffffffffff
RDX: 00000000401c0000 RSI: 0000000041892000 RDI: 0000000041281000
RBP: 0000000041a4f073 R08: 0000000000000001 R09: 0000000000000001
R10: ffffffff917d7748 R11: 1000000000000000 R12: fdffffffffffffff
R13: 6c4536d2b71147a5 R14: 00007fff959ff000 R15: 153c21d7e8b3a7de
---[ end trace 0cd51ba690f12b47 ]---

The warning is this:

         if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
                 /*
                  * Historical junk that used to handle SYSENTER 
single-stepping.
                  * This should be unreachable now.  If we survive for a 
while
                  * without anyone hitting this warning, we'll turn this 
into
                  * an oops.
                  */
                 tsk->thread.debugreg6 &= ~DR_STEP;
                 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
                 regs->flags &= ~X86_EFLAGS_TF;
         }

Unfortunately DR6 from the register dump has already been cleared at the
top of do_debug() and the local variable dr6 is on the stack and not
loaded into any of the registers AFAICT.

 From the userspace Code: line you can clearly see it setting EFLAGS_TF,
then it seems to be trapping on the next instruction:

   1b:   9c                      pushfq
   1c:   48 81 0c 24 00 01 00    orq    $0x100,(%rsp)
   23:   00
   24:   9d                      popfq
   25:   b8 62 00 00 00          mov    $0x62,%eax
   2a:*  8e c0                   mov    %eax,%es         <-- trapping 
instruction

You can see that DR1 points to 41a4f070, which is close to userspace RBP
(41a4f073), which is perhaps being accessed by stack_trace_save_user()
and causing the debug exception on a data breakpoint?

The Code: line from stack_trace_save_user() is:

   27:   4c 8b 33                mov    (%rbx),%r14
   2a:*  4d 89 f4                mov    %r14,%r12                <-- 
trapping instruction

with RBX == 41a4f073 so that seems to fit the theory, except I'd have
expected the "trapping instruction" to point at the memory dereference.
(But maybe it's one of those "points to return address" kind of things?)

DR7 is 03b3062a, which is..
  - DR0, DR1, DR2 global breakpoints
  - DR0 reads + writes
  - DR1 reads + writes
  - DR2 reads + writes

A second instance of the same warning:

WARNING: CPU: 0 PID: 601 at arch/x86/kernel/traps.c:791 do_debug+0xfe/0x240
CPU: 0 PID: 601 Comm: init Not tainted 5.2.0+ #124
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
RIP: 0010:do_debug+0xfe/0x240
Code: 05 07 3d f3 7e f6 85 91 00 00 00 02 0f 85 d8 00 00 00 49 8b 84 24 
18 0b 00 00 f6 44 24 01 40 74 2f f6 85 88 00 00 00 03 75 26 <0f> 0b 80 
41 80 0c 24 10 48 81 a5
RSP: 0000:fffffe000000ff20 EFLAGS: 00010046
RAX: 0000000000004002 RBX: 0000000000000000 RCX: ffffffff810e2f72
RDX: 0000000000000000 RSI: 0000000000000003 RDI: ffffffff8201f090
RBP: fffffe000000ff58 R08: 0000000000000000 R09: 0000000000000005
R10: 0000000000000000 R11: 0000000000000000 R12: ffff88803e0bd040
R13: 0000000000000000 R14: 000000003d3c6001 R15: 0000000000000000
FS:  0000555556efb8c0(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000041686010 CR3: 000000003d3c6001 CR4: 00000000003606f0
DR0: 0000000000000001 DR1: 00000000400be070 DR2: 00007ffd20c67000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000003b3062a
Call Trace:
  <#DB>
  debug+0x2d/0x70
RIP: 0010:arch_stack_walk_user+0x74/0x100
Code: e8 10 49 39 c4 77 45 4c 8b 04 24 4c 89 e3 4d 89 fd 4c 89 fd 41 83 
87 98 0a 00 00 01 0f 01 cb 0f ae e8 31 c0 4c 89 e2 4c 8b 33 <4d> 89 f4 
85 c0 74 1f 65 48 8b 04
RSP: 0000:ffffc900024f3d68 EFLAGS: 00040046
RAX: 0000000000000000 RBX: 00000000400be073 RCX: ffffffff811ca27b
RDX: 00000000400be073 RSI: 00000000400be0dd RDI: ffffc900024f3db8
RBP: ffff88803e0bd040 R08: ffffc900024f3f58 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000400be073
R13: ffff88803e0bd040 R14: 00404e4000bf4800 R15: ffff88803e0bd040
  ? stack_trace_consume_entry+0x4b/0x80
  </#DB>
  ? profile_setup.cold+0xc1/0xc1
  stack_trace_save_user+0x71/0x9c
  trace_buffer_unlock_commit_regs+0x1ae/0x270
  trace_event_buffer_commit+0x90/0x240
  trace_event_raw_event_preemptirq_template+0x9a/0x100
  ? debug+0x49/0x70
  ? perf_trace_preemptirq_template+0x120/0x120
  ? trace_hardirqs_off_thunk+0x1a/0x1c
  trace_hardirqs_off_caller+0xf4/0x150
  ? debug+0x44/0x70
  trace_hardirqs_off_thunk+0x1a/0x1c
  debug+0x49/0x70
RIP: 0033:0x400be0dd
Code: 3a 51 3e 59 a9 b2 e3 49 be 00 70 c6 20 fd 7f 00 00 49 bf de a7 b3 
e8 d7 21 3c 15 9c 48 81 0c 24 00 01 00 00 9d b8 62 00 00 00 <8e> c0 0f 
ff ff 9d 48 89 04 25 40
RSP: 002b:00000000417d0ea0 EFLAGS: 00000317
RAX: 0000000000000062 RBX: 00000000404e4000 RCX: ffffffffffffffff
RDX: 0000000041da4000 RSI: 0000000040bb0000 RDI: 00000000404e4000
RBP: 00000000400be073 R08: 0000000000000001 R09: 0000000000000001
R10: 9c7fa8aa10386cdb R11: 1000000000000000 R12: fdffffffffffffff
R13: e3b2a9593e513a6b R14: 00007ffd20c67000 R15: 153c21d7e8b3a7de
---[ end trace beb9776710443227 ]---


Vegard

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-16 21:51   ` Vegard Nossum
@ 2019-07-17  1:02     ` Andy Lutomirski
  2019-07-17  7:46       ` Vegard Nossum
  2019-07-17  7:47       ` Peter Zijlstra
  2019-07-17  9:37     ` Eiichi Tsukata
  1 sibling, 2 replies; 29+ messages in thread
From: Andy Lutomirski @ 2019-07-17  1:02 UTC (permalink / raw)
  To: Vegard Nossum
  Cc: Peter Zijlstra, Thomas Gleixner, Borislav Petkov, Ingo Molnar,
	Steven Rostedt, Andrew Lutomirski, Linus Torvalds,
	linux_lkml_grp, H. Peter Anvin, Dave Hansen, Juergen Gross, LKML,
	He Zhe, Joel Fernandes, devel

On Tue, Jul 16, 2019 at 2:53 PM Vegard Nossum <vegard.nossum@oracle.com> wrote:
>
>
> On 7/16/19 9:33 PM, Vegard Nossum wrote:
> >
> > On 7/11/19 1:40 PM, Peter Zijlstra wrote:
> >> Hi,
> >>
> >> Here's the latest (and hopefully final) set of tracing vs CR2 patches.
> >>
> >> They are basically the same as v2, with only minor edits and tags
> >> collected
> >> from the last review.
> >>
> >> Please consider.
> >>
> >
> > Hi,
> >
> > I ran my own battery of tests on your patch set on top of
> > 5ad18b2e60b75c7297a998dea702451d33a052ed and ran into this:
> >

On a different thread, Peter and I decided that the last patch in this
series (the one that removes the _DEBUG stuff) is wrong.  Can you see
if these are reproducible with that patch removed?

--Andy

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-17  1:02     ` Andy Lutomirski
@ 2019-07-17  7:46       ` Vegard Nossum
  2019-07-17  7:47       ` Peter Zijlstra
  1 sibling, 0 replies; 29+ messages in thread
From: Vegard Nossum @ 2019-07-17  7:46 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Peter Zijlstra, Thomas Gleixner, Borislav Petkov, Ingo Molnar,
	Steven Rostedt, Linus Torvalds, linux_lkml_grp, H. Peter Anvin,
	Dave Hansen, Juergen Gross, LKML, He Zhe, Joel Fernandes, devel


On 7/17/19 3:02 AM, Andy Lutomirski wrote:
> On Tue, Jul 16, 2019 at 2:53 PM Vegard Nossum <vegard.nossum@oracle.com> wrote:
>>
>>
>> On 7/16/19 9:33 PM, Vegard Nossum wrote:
>>>
>>> On 7/11/19 1:40 PM, Peter Zijlstra wrote:
>>>> Hi,
>>>>
>>>> Here's the latest (and hopefully final) set of tracing vs CR2 patches.
>>>>
>>>> They are basically the same as v2, with only minor edits and tags
>>>> collected
>>>> from the last review.
>>>>
>>>> Please consider.
>>>>
>>>
>>> Hi,
>>>
>>> I ran my own battery of tests on your patch set on top of
>>> 5ad18b2e60b75c7297a998dea702451d33a052ed and ran into this:
>>>
> 
> On a different thread, Peter and I decided that the last patch in this
> series (the one that removes the _DEBUG stuff) is wrong.  Can you see
> if these are reproducible with that patch removed?

Yes, without the last patch I still get this:

Run /init as init process
init[711]: segfault at 40000000 ip 000000004000000a sp 0000000040000ff8 
error 7
------------[ cut here ]------------
General protection fault in user access. Non-canonical address?
WARNING: CPU: 0 PID: 711 at arch/x86/mm/extable.c:126 
ex_handler_uaccess+0x5d/0x70
CPU: 0 PID: 711 Comm: init Not tainted 5.2.0+ #125
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
init[716]: segfault at 40000000 ip 000000004000000a sp 0000000040000ff8 
error 7
RIP: 0010:ex_handler_uaccess+0x5d/0x70
Code: 5d 41 5c c3 e8 c4 8e 0e 00 80 3d e5 74 1e 01 00 75 d3 e8 b6 8e 0e 
00 48 c7 c7 10 a7 fb 81 c6 05 d0 74 1e 01 01 e8 d1 43 01 00 <0f> 0b eb 
b7 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 55 48 89
RSP: 0000:ffffc9000065fa18 EFLAGS: 00010086
RAX: 0000000000000000 RBX: ffffffff81c07dac RCX: ffffffff811a887c
init[714]: segfault at 40000000 ip 000000004000000a sp 0000000040000ff8 
error 7
RDX: 0000000000000000 RSI: ffffffff8289f05f RDI: 0000000000000093
RBP: ffffc9000065fa88 R08: 000000002e80b265 R09: 000000000000003f
init[718]: segfault at 40000000 ip 000000004000000a sp 0000000040000ff8 
error 7
R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000d
R13: 000000000000000d R14: 0000000000000000 R15: 0000000000000000
FS:  00000000006ce880(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000003fffffe0 CR3: 000000003d2f6004 CR4: 00000000003606f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
Code: Bad RIP value.
  fixup_exception+0x50/0x6a
  do_general_protection+0x40/0x160
  general_protection+0x2d/0x40
RIP: 0010:arch_stack_walk_user+0x71/0x100
Code: 00 48 83 e8 10 49 39 c4 77 45 4c 8b 04 24 4c 89 e3 4d 89 fd 4c 89 
fd 41 83 87 98 0a 00 00 01 0f 01 cb 0f ae e8 31 c0 4c 89 e2 <4c> 8b 33 
4d 89 f4 85 c0 75 7a 48 8b 73 08 0f 01 ca 85 c0 74 1f 65
[...]

This is my reproducer (as init):

#include <fcntl.h>
#include <sched.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/user.h>
#include <unistd.h>
#include <wait.h>

struct child_data {
   (*code)();
};

child_fn(void *arg)
{
   child_data *data = arg;
   mprotect(data->code, PAGE_SIZE, PROT_EXEC);
   data->code();
}

int main()
{
   mkdir("/sys", 7);
   mount("nodev", "/sys", "sysfs", 0, "");
   mount("nodev", "/sys/kernel/tracing", "tracefs", 0, "");

   int tracing_options_userstacktrace = 
open("/sys/kernel/tracing/options/userstacktrace", O_RDWR);
   write(tracing_options_userstacktrace, "1\n", 2);

   int tracing_events_preemptirq_irq_disable = 
open("/sys/kernel/tracing/events/preemptirq/irq_disable/enable", O_RDWR);
   write(tracing_events_preemptirq_irq_disable, "1\n", 2);

   void *code = mmap(0, PAGE_SIZE, PROT_WRITE, MAP_PRIVATE | 
MAP_ANONYMOUS | MAP_32BIT, 1, 0);
   {
     unsigned char *output = code;

     *output++ = 72;
     *output++ = 189;
     for (int i = 0; i < 8; ++i)
       *output++ = i;
   }

   void *child_stack = mmap(0, PAGE_SIZE, PROT_WRITE, MAP_PRIVATE | 
MAP_ANONYMOUS | MAP_32BIT, 1, 0);

   while (1) {
     child_data data = { code };
     clone(child_fn, child_stack, SIGCHLD, &data);
   }
}

Compiled with -static and booted with "norandmaps" (for some reason that
makes a difference), this is 100% reproducible for me, although the
reproducer is somewhat sensitive to small changes that I don't quite
understand.


Vegard

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-17  1:02     ` Andy Lutomirski
  2019-07-17  7:46       ` Vegard Nossum
@ 2019-07-17  7:47       ` Peter Zijlstra
  1 sibling, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-17  7:47 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Vegard Nossum, Thomas Gleixner, Borislav Petkov, Ingo Molnar,
	Steven Rostedt, Linus Torvalds, linux_lkml_grp, H. Peter Anvin,
	Dave Hansen, Juergen Gross, LKML, He Zhe, Joel Fernandes, devel

On Tue, Jul 16, 2019 at 06:02:33PM -0700, Andy Lutomirski wrote:

> On a different thread, Peter and I decided that the last patch in this
> series (the one that removes the _DEBUG stuff) is wrong.  Can you see
> if these are reproducible with that patch removed?

Wrong is maybe the wrong word :-), premature maybe, we definitely want to
get there, but the #DB crud needs a wee bit of work first.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-16 19:33 ` [PATCH v3 0/6] Tracing vs CR2 Vegard Nossum
  2019-07-16 21:51   ` Vegard Nossum
@ 2019-07-17  8:07   ` Peter Zijlstra
  2019-07-17  8:09     ` Vegard Nossum
  1 sibling, 1 reply; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-17  8:07 UTC (permalink / raw)
  To: Vegard Nossum
  Cc: tglx, bp, mingo, rostedt, luto, torvalds, hpa, dave.hansen,
	jgross, linux-kernel, zhe.he, joel, devel

On Tue, Jul 16, 2019 at 09:33:50PM +0200, Vegard Nossum wrote:
> ------------[ cut here ]------------
> General protection fault in user access. Non-canonical address?
> WARNING: CPU: 0 PID: 5039 at arch/x86/mm/extable.c:126
> ex_handler_uaccess+0x5d/0x70
> CPU: 0 PID: 5039 Comm: init Not tainted 5.2.0+ #124
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> Ubuntu-1.8.2-1ubuntu1 04/01/2014
> RIP: 0010:ex_handler_uaccess+0x5d/0x70
> Code: 5d 41 5c c3 e8 c4 8e 0e 00 80 3d e5 74 1e 01 00 75 d3 e8 b6 8e 0e 00
> 48 c7 c7 10 a7 fb 81 c6 05 d0 74 1e 01 01 e8 d1 43 01 00 <0f> 0b eb b7 0f 1f
> 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 55 48 89
> RSP: 0000:fffffe000000fc48 EFLAGS: 00010086
> RAX: 0000000000000000 RBX: ffffffff81c07dac RCX: ffffffff811a887c
> RDX: 0000000000000000 RSI: ffffffff8289f05f RDI: 0000000000000093
> RBP: fffffe000000fcb8 R08: 00000036fe0f15d3 R09: 000000000000003f
> R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000d
> R13: 000000000000000d R14: 0000000000000000 R15: 0000000000000000
> FS:  00005555563ab8c0(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000001ff7 CR3: 000000003c804002 CR4: 00000000003606f0
> DR0: 0000000040209100 DR1: 00000000402091a1 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff1 DR7: 00000000000b062a
> Call Trace:
>  <#DB>
>  fixup_exception+0x50/0x6a
>  do_general_protection+0x40/0x160
>  general_protection+0x2d/0x40
> RIP: 0010:arch_stack_walk_user+0x71/0x100
> Code: 00 48 83 e8 10 49 39 c4 77 45 4c 8b 04 24 4c 89 e3 4d 89 fd 4c 89 fd
> 41 83 87 98 0a 00 00 01 0f 01 cb 0f ae e8 31 c0 4c 89 e2 <4c> 8b 33 4d 89 f4
> 85 c0 75 7a 48 8b 73 08 0f 01 ca 85 c0 74 1f 65
> RSP: 0000:fffffe000000fd68 EFLAGS: 00050046
> RAX: 0000000000000000 RBX: 854163717acc2789 RCX: ffffffff811ca27b
> RDX: 854163717acc2789 RSI: 0000000040209102 RDI: fffffe000000fdb8
> RBP: ffff88803d55d040 R08: ffffc9000520bf58 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000000 R12: 854163717acc2789
> R13: ffff88803d55d040 R14: 0000000000000093 R15: ffff88803d55d040
>  ? stack_trace_consume_entry+0x4b/0x80
>  ? arch_stack_walk_user+0x34/0x100
>  ? profile_setup.cold+0xc1/0xc1
>  stack_trace_save_user+0x71/0x9c
>  trace_buffer_unlock_commit_regs+0x1ae/0x270
>  trace_event_buffer_commit+0x90/0x240
>  trace_event_raw_event_preemptirq_template+0x9a/0x100
>  ? debug+0x16/0x70
>  ? perf_trace_preemptirq_template+0x120/0x120
>  ? trace_hardirqs_off_thunk+0x1a/0x1c
>  trace_hardirqs_off_caller+0xf4/0x150
>  trace_hardirqs_off_thunk+0x1a/0x1c
>  ? debug+0x11/0x70
>  debug+0x16/0x70
> RIP: 0010:copy_user_generic_unrolled+0xa0/0xc0
> Code: 7f 40 ff c9 75 b6 89 d1 83 e2 07 c1 e9 03 74 12 4c 8b 06 4c 89 07 48
> 8d 76 08 48 8d 7f 08 ff c9 75 ee 21 d2 74 10 89 d1 8a 06 <88> 07 48 ff c6 48
> ff c7 ff c9 75 f2 31 c0 0f 01 ca c3 0f 1f 40 00
> RSP: 0000:ffffc9000520be38 EFLAGS: 00040202
> RAX: ffff88803d55d09c RBX: ffff88803d55d040 RCX: 0000000000000001
> RDX: 0000000000000001 RSI: 0000000040209102 RDI: ffffc9000520be76
> RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000000 R12: 00007ffffffff000
> R13: 0000000040209102 R14: ffffc9000520be76 R15: 0000000000000000
>  </#DB>
>  __probe_kernel_read+0x57/0x90
>  is_prefetch.isra.0+0xb5/0x210
>  ? tracer_hardirqs_on+0x53/0x1a0
>  __bad_area_nosemaphore+0x9e/0x220
>  __do_page_fault+0x483/0x630
>  ? async_page_fault+0x8/0x40
>  async_page_fault+0x36/0x40
> RIP: 0033:0x40209102
> Code: 00 00 49 bc 00 20 23 40 00 00 00 00 49 bd 00 00 d0 40 00 00 00 00 49
> be ff ff ff ff ff ff ff ff 49 bf 00 50 80 40 00 00 00 00 <9c> 48 81 0c 24 00
> 04 00 00 48 81 0c 24 00 00 04 00 9d ff 2c 25 00
> RSP: 002b:0000000000001fff EFLAGS: 00010217
> RAX: 0000000000000000 RBX: 00000000402090b0 RCX: 0000000000000001
> RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000041ebb000
> RBP: 854163717acc2789 R08: 0000000000000001 R09: b1f39cc399a61ebb
> R10: 00007ffeab175000 R11: 0000000000000360 R12: 0000000040232000
> R13: 0000000040d00000 R14: ffffffffffffffff R15: 0000000040805000
> ---[ end trace e5e49800ff5aa5ed ]---


  https://lkml.kernel.org/r/57754f11-2c65-a2c8-2f6d-bfab0d2f8b53@etsukata.com

Does something like the below help?

diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index c8d0f05721a1..80ad4ccb7025 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -226,12 +226,16 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
 		.store	= store,
 		.size	= size,
 	};
+	mm_segment_t fs;
 
 	/* Trace user stack if not a kernel thread */
 	if (current->flags & PF_KTHREAD)
 		return 0;
 
+	fs = get_fs();
+	set_fs(USER_DS);
 	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
+	set_fs(fs);
 	return c.len;
 }
 #endif

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-17  8:07   ` Peter Zijlstra
@ 2019-07-17  8:09     ` Vegard Nossum
  2019-07-18  8:57       ` [PATCH] stacktrace: Force USER_DS for stack_trace_save_user() Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Vegard Nossum @ 2019-07-17  8:09 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: tglx, bp, mingo, rostedt, luto, torvalds, hpa, dave.hansen,
	jgross, linux-kernel, zhe.he, joel, devel

On 7/17/19 10:07 AM, Peter Zijlstra wrote:
> On Tue, Jul 16, 2019 at 09:33:50PM +0200, Vegard Nossum wrote:
>> ------------[ cut here ]------------
>> General protection fault in user access. Non-canonical address?
>> WARNING: CPU: 0 PID: 5039 at arch/x86/mm/extable.c:126
>> ex_handler_uaccess+0x5d/0x70
[...]
> 
> 
>    https://lkml.kernel.org/r/57754f11-2c65-a2c8-2f6d-bfab0d2f8b53@etsukata.com
> 
> Does something like the below help?
> 
> diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
> index c8d0f05721a1..80ad4ccb7025 100644
> --- a/kernel/stacktrace.c
> +++ b/kernel/stacktrace.c
> @@ -226,12 +226,16 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
>   		.store	= store,
>   		.size	= size,
>   	};
> +	mm_segment_t fs;
>   
>   	/* Trace user stack if not a kernel thread */
>   	if (current->flags & PF_KTHREAD)
>   		return 0;
>   
> +	fs = get_fs();
> +	set_fs(USER_DS);
>   	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
> +	set_fs(fs);
>   	return c.len;
>   }
>   #endif
> 

Yes.


Vegard

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-16 21:51   ` Vegard Nossum
  2019-07-17  1:02     ` Andy Lutomirski
@ 2019-07-17  9:37     ` Eiichi Tsukata
  2019-07-18 20:27       ` Andy Lutomirski
  1 sibling, 1 reply; 29+ messages in thread
From: Eiichi Tsukata @ 2019-07-17  9:37 UTC (permalink / raw)
  To: Vegard Nossum, Peter Zijlstra, tglx, bp, mingo, rostedt, luto,
	torvalds, linux_lkml_grp
  Cc: hpa, dave.hansen, jgross, linux-kernel, zhe.he, joel



On 2019/07/17 6:51, Vegard Nossum wrote:
> 
...
> 
> Got a different one:
> 
> WARNING: CPU: 0 PID: 2150 at arch/x86/kernel/traps.c:791 do_debug+0xfe/0x240
> CPU: 0 PID: 2150 Comm: init Not tainted 5.2.0+ #124
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
> RIP: 0010:do_debug+0xfe/0x240
...


Hello Vegard

I found a way to reproduce #DB WARNING by setting hardware watchpoint to
the address arch_stack_walk_user() will touch.


[Steps to Reproduce #DB WARNING]

poc.s:

```
        .global _start

        .text
_start:
        # exit(0)
        mov $60, %rax
        xor %rdi, %rdi
        syscall
```

build:

  # gcc -g -c poc.s; ld -o poc poc.o

setup ftrace:

  # echo 1 > options/userstacktrace
  # echo 1 > events/preemptirq/irq_disable/enable

exec gdb:(set hardware watch point to $rbp)

  [18:28:48 root@vm loops]# gdb ./poc
  GNU gdb (GDB) Fedora 8.3-6.fc30
  Copyright (C) 2019 Free Software Foundation, Inc.
  License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
  This is free software: you are free to change and redistribute it.
  There is NO WARRANTY, to the extent permitted by law.
  Type "show copying" and "show warranty" for details.
  This GDB was configured as "x86_64-redhat-linux-gnu".
  Type "show configuration" for configuration details.
  For bug reporting instructions, please see:
  <http://www.gnu.org/software/gdb/bugs/>.
  Find the GDB manual and other documentation resources online at:
      <http://www.gnu.org/software/gdb/documentation/>.

  For help, type "help".
  Type "apropos word" to search for commands related to "word"...
  Reading symbols from ./poc...
  (gdb) l
  1               .global _start
  2
  3               .text
  4       _start:
  5               # exit(0)
  6               mov $60, %rax
  7               xor %rdi, %rdi
  8               syscall
  (gdb) b 6
  Breakpoint 1 at 0x401000: file poc.s, line 6.
  (gdb) start
  Function "main" not defined.
  Make breakpoint pending on future shared library load? (y or [n]) n
  Starting program: /root/tmp/loops/poc

  Breakpoint 1, _start () at poc.s:6
  6               mov $60, %rax
  (gdb) set $rbp = $rsp
  (gdb) p $rbp
  $1 = (void *) 0x7fffffffe4b0
  (gdb) rwatch *0x7fffffffe4b0
  Hardware read watchpoint 2: *0x7fffffffe4b0
  (gdb) c
  Continuing.
  [Inferior 1 (process 2744) exited normally]

dmesg:

[  564.646159][ T2744] WARNING: CPU: 0 PID: 2744 at arch/x86/kernel/traps.c:791 do_debug+0x220/0x490
[  564.648581][ T2744] Modules linked in:
[  564.649530][ T2744] CPU: 0 PID: 2744 Comm: poc Tainted: G        W         5.2.0+ #77
[  564.651121][ T2744] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014
[  564.653569][ T2744] RIP: 0010:do_debug+0x220/0x490
[  564.654847][ T2744] Code: 00 48 8b 95 60 ff ff ff 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 03 02 00 00 41 f6 87 88 00 00 00 03 75 60 <0f> 0b 4c 89 f2 49 81 e5 ff bf ff ff 48 b8 00 00 00 00 00 fc ff df
[  564.659905][ T2744] RSP: 0000:fffffe0000014e98 EFLAGS: 00010046
[  564.661500][ T2744] RAX: dffffc0000000000 RBX: 1fffffc0000029d8 RCX: 1ffff1100f81c2d3
[  564.663531][ T2744] RDX: 1fffffc0000029fc RSI: 0000000000000000 RDI: ffffffff85c19f00
[  564.665553][ T2744] RBP: fffffe0000014f48 R08: fffffe0000014fe8 R09: ffff88807c0e08a0
[  564.667637][ T2744] R10: 0000000000000001 R11: 1ffff1100d1042ba R12: ffff88807c0e0000
[  564.669700][ T2744] R13: 0000000000004001 R14: ffff88807c0e1698 R15: fffffe0000014f58
[  564.671768][ T2744] FS:  0000000000000000(0000) GS:ffff888068800000(0000) knlGS:0000000000000000
[  564.674032][ T2744] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  564.675752][ T2744] CR2: 0000000000000001 CR3: 000000005fe08002 CR4: 0000000000160ef0
[  564.677570][ T2744] DR0: 00007fffffffe4b0 DR1: 0000000000000000 DR2: 0000000000000000
[  564.679686][ T2744] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 00000000000f0602
[  564.681788][ T2744] Call Trace:
[  564.682700][ T2744]  <#DB>
[  564.683492][ T2744]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  564.684954][ T2744]  ? do_int3+0x1f0/0x1f0
[  564.686074][ T2744]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  564.687512][ T2744]  debug+0x2d/0x70
[  564.688456][ T2744] RIP: 0010:arch_stack_walk_user+0x7d/0xf2
[  564.689899][ T2744] Code: 00 0f 85 8d 00 00 00 49 8b 87 d8 16 00 00 48 83 e8 10 49 39 c6 77 32 41 83 87 e8 15 00 00 01 0f 1f 00 0f ae e8 31 c0 49 8b 0e <85> c0 75 6d 49 8b 76 08 0f 1f 00 85 c0 74 1f 65 48 8b 04 25 00 ef
[  564.694763][ T2744] RSP: 0000:ffff888061fb7c48 EFLAGS: 00000046
[  564.696316][ T2744] RAX: 0000000000000000 RBX: ffff88807c0e0000 RCX: 0000000000000001
[  564.698342][ T2744] RDX: 1ffff1100ba08e93 RSI: 0000000000401009 RDI: ffff888061fb7cbc
[  564.700323][ T2744] RBP: ffff888061fb7c80 R08: 1ffff1100ba08e93 R09: ffff88805d04749c
[  564.702337][ T2744] R10: ffffed100ba08e9b R11: ffff88805d0474db R12: ffff888061fb7cb0
[  564.704359][ T2744] R13: ffff888061fb7f58 R14: 00007fffffffe4b0 R15: ffff88807c0e0000
[  564.706413][ T2744]  </#DB>
[  564.707182][ T2744]  ? stack_trace_save+0xc0/0xc0
[  564.708447][ T2744]  stack_trace_save_user+0x138/0x160
[  564.709752][ T2744]  ? stack_trace_save_tsk_reliable+0x210/0x210
[  564.711235][ T2744]  ? kasan_check_read+0x11/0x20
[  564.712358][ T2744]  trace_buffer_unlock_commit_regs+0x208/0x360
[  564.713871][ T2744]  trace_event_buffer_commit+0x1a0/0x790
[  564.715278][ T2744]  ? trace_event_buffer_reserve+0x163/0x240
[  564.716742][ T2744]  trace_event_raw_event_preemptirq_template+0x156/0x200
[  564.718431][ T2744]  ? perf_trace_preemptirq_template+0x490/0x490
[  564.719761][ T2744]  ? rcu_irq_enter_irqson+0x23/0x30
[  564.721064][ T2744]  ? trace_hardirqs_off+0x28/0x180
[  564.722337][ T2744]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  564.723729][ T2744]  ? debug+0x49/0x70
[  564.724703][ T2744]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  564.726113][ T2744]  ? perf_trace_preemptirq_template+0x490/0x490
[  564.727721][ T2744]  trace_hardirqs_off_caller+0x106/0x170
[  564.729154][ T2744]  ? debug+0x44/0x70
[  564.730108][ T2744]  trace_hardirqs_off_thunk+0x1a/0x1c
[  564.731456][ T2744]  debug+0x49/0x70
[  564.732278][ T2744] RIP: 0033:0x401009
[  564.733155][ T2744] Code: Bad RIP value.
[  564.734019][ T2744] RSP: 002b:00007fffffffe4b0 EFLAGS: 00000302
[  564.735366][ T2744] RAX: 000000000000003c RBX: 0000000000000000 RCX: 0000000000000000
[  564.737290][ T2744] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[  564.739314][ T2744] RBP: 00007fffffffe4b0 R08: 0000000000000000 R09: 0000000000000000
[  564.741338][ T2744] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[  564.743311][ T2744] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[  564.745463][ T2744] irq event stamp: 3340
[  564.746554][ T2744] hardirqs last  enabled at (3339): [<ffffffff82a04c12>] trace_hardirqs_on_thunk+0x1a/0x1c
[  564.748558][ T2744] hardirqs last disabled at (3340): [<ffffffff82d28b32>] rcu_irq_enter_irqson+0x12/0x30
[  564.750479][ T2744] softirqs last  enabled at (3330): [<ffffffff85a00634>] __do_softirq+0x634/0x9f1
[  564.752737][ T2744] softirqs last disabled at (3319): [<ffffffff82b77920>] irq_exit+0x150/0x180
[  564.754933][ T2744] ---[ end trace 67c6e66ff6ba5cd0 ]---



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [tip:x86/urgent] x86/paravirt: Make read_cr2() CALLEE_SAVE
  2019-07-11 11:40 ` [PATCH v3 1/6] x86/paravirt: Make read_cr2() CALLEE_SAVE Peter Zijlstra
@ 2019-07-17 21:22   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: tip-bot for Peter Zijlstra @ 2019-07-17 21:22 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: mingo, tglx, peterz, jgross, hpa, linux-kernel

Commit-ID:  55aedddb6149ab71bec9f050846855113977b033
Gitweb:     https://git.kernel.org/tip/55aedddb6149ab71bec9f050846855113977b033
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 11 Jul 2019 13:40:55 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 17 Jul 2019 23:17:37 +0200

x86/paravirt: Make read_cr2() CALLEE_SAVE

The one paravirt read_cr2() implementation (Xen) is actually quite trivial
and doesn't need to clobber anything other than the return register.

Making read_cr2() CALLEE_SAVE avoids all the PUSH/POP nonsense and allows
more convenient use from assembly.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: bp@alien8.de
Cc: rostedt@goodmis.org
Cc: luto@kernel.org
Cc: torvalds@linux-foundation.org
Cc: hpa@zytor.com
Cc: dave.hansen@linux.intel.com
Cc: zhe.he@windriver.com
Cc: joel@joelfernandes.org
Cc: devel@etsukata.com
Link: https://lkml.kernel.org/r/20190711114335.887392493@infradead.org

---
 arch/x86/entry/calling.h              |  6 ++++++
 arch/x86/include/asm/paravirt.h       | 22 +++++++++++++---------
 arch/x86/include/asm/paravirt_types.h |  2 +-
 arch/x86/kernel/asm-offsets.c         |  1 +
 arch/x86/kernel/head_64.S             |  4 +---
 arch/x86/kernel/paravirt.c            |  2 +-
 arch/x86/xen/enlighten_pv.c           |  3 ++-
 arch/x86/xen/mmu_pv.c                 | 12 +-----------
 arch/x86/xen/xen-asm.S                | 16 ++++++++++++++++
 arch/x86/xen/xen-ops.h                |  3 +++
 10 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9f1f9e3b8230..830bd984182b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -343,3 +343,9 @@ For 32-bit we have the following conventions - kernel is built with
 .Lafter_call_\@:
 #endif
 .endm
+
+#ifdef CONFIG_PARAVIRT_XXL
+#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
+#else
+#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
+#endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c25c38a05c1c..5135282683d4 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -116,7 +116,7 @@ static inline void write_cr0(unsigned long x)
 
 static inline unsigned long read_cr2(void)
 {
-	return PVOP_CALL0(unsigned long, mmu.read_cr2);
+	return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
 }
 
 static inline void write_cr2(unsigned long x)
@@ -909,13 +909,7 @@ extern void default_banner(void);
 		  ANNOTATE_RETPOLINE_SAFE;				\
 		  call PARA_INDIRECT(pv_ops+PV_CPU_swapgs);		\
 		 )
-#endif
-
-#define GET_CR2_INTO_RAX				\
-	ANNOTATE_RETPOLINE_SAFE;				\
-	call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
 
-#ifdef CONFIG_PARAVIRT_XXL
 #define USERGS_SYSRET64							\
 	PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64),			\
 		  ANNOTATE_RETPOLINE_SAFE;				\
@@ -929,9 +923,19 @@ extern void default_banner(void);
 		  call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);	    \
 		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 #endif
-#endif
+#endif /* CONFIG_PARAVIRT_XXL */
+#endif	/* CONFIG_X86_64 */
+
+#ifdef CONFIG_PARAVIRT_XXL
+
+#define GET_CR2_INTO_AX							\
+	PARA_SITE(PARA_PATCH(PV_MMU_read_cr2),				\
+		  ANNOTATE_RETPOLINE_SAFE;				\
+		  call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);		\
+		 )
+
+#endif /* CONFIG_PARAVIRT_XXL */
 
-#endif	/* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
 #else  /* CONFIG_PARAVIRT */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 946f8f1f1efc..639b2df445ee 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -220,7 +220,7 @@ struct pv_mmu_ops {
 	void (*exit_mmap)(struct mm_struct *mm);
 
 #ifdef CONFIG_PARAVIRT_XXL
-	unsigned long (*read_cr2)(void);
+	struct paravirt_callee_save read_cr2;
 	void (*write_cr2)(unsigned long);
 
 	unsigned long (*read_cr3)(void);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index da64452584b0..5c7ee3df4d0b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -76,6 +76,7 @@ static void __used common(void)
 	BLANK();
 	OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
 	OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+	OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
 #endif
 
 	BLANK();
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index bcd206c8ac90..0e2d72929a8c 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -29,9 +29,7 @@
 #ifdef CONFIG_PARAVIRT_XXL
 #include <asm/asm-offsets.h>
 #include <asm/paravirt.h>
-#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
 #else
-#define GET_CR2_INTO(reg) movq %cr2, reg
 #define INTERRUPT_RETURN iretq
 #endif
 
@@ -323,7 +321,7 @@ early_idt_handler_common:
 
 	cmpq $14,%rsi		/* Page fault? */
 	jnz 10f
-	GET_CR2_INTO(%rdi)	/* Can clobber any volatile register if pv */
+	GET_CR2_INTO(%rdi)	/* can clobber %rax if pv */
 	call early_make_pgtable
 	andl %eax,%eax
 	jz 20f			/* All good */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 98039d7fb998..0aa6256eedd8 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -370,7 +370,7 @@ struct paravirt_patch_template pv_ops = {
 	.mmu.exit_mmap		= paravirt_nop,
 
 #ifdef CONFIG_PARAVIRT_XXL
-	.mmu.read_cr2		= native_read_cr2,
+	.mmu.read_cr2		= __PV_IS_CALLEE_SAVE(native_read_cr2),
 	.mmu.write_cr2		= native_write_cr2,
 	.mmu.read_cr3		= __native_read_cr3,
 	.mmu.write_cr3		= native_write_cr3,
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 4722ba2966ac..26b63d051bda 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -998,7 +998,8 @@ void __init xen_setup_vcpu_info_placement(void)
 			__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
 		pv_ops.irq.irq_enable =
 			__PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
-		pv_ops.mmu.read_cr2 = xen_read_cr2_direct;
+		pv_ops.mmu.read_cr2 =
+			__PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
 	}
 }
 
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index f6e5eeecfc69..26e8b326966d 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1307,16 +1307,6 @@ static void xen_write_cr2(unsigned long cr2)
 	this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
 }
 
-static unsigned long xen_read_cr2(void)
-{
-	return this_cpu_read(xen_vcpu)->arch.cr2;
-}
-
-unsigned long xen_read_cr2_direct(void)
-{
-	return this_cpu_read(xen_vcpu_info.arch.cr2);
-}
-
 static noinline void xen_flush_tlb(void)
 {
 	struct mmuext_op *op;
@@ -2397,7 +2387,7 @@ static void xen_leave_lazy_mmu(void)
 }
 
 static const struct pv_mmu_ops xen_mmu_ops __initconst = {
-	.read_cr2 = xen_read_cr2,
+	.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
 	.write_cr2 = xen_write_cr2,
 
 	.read_cr3 = xen_read_cr3,
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 8019edd0125c..be104eef80be 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -10,6 +10,7 @@
 #include <asm/percpu.h>
 #include <asm/processor-flags.h>
 #include <asm/frame.h>
+#include <asm/asm.h>
 
 #include <linux/linkage.h>
 
@@ -135,3 +136,18 @@ ENTRY(check_events)
 	FRAME_END
 	ret
 ENDPROC(check_events)
+
+ENTRY(xen_read_cr2)
+	FRAME_BEGIN
+	_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+	_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+	FRAME_END
+	ret
+	ENDPROC(xen_read_cr2);
+
+ENTRY(xen_read_cr2_direct)
+	FRAME_BEGIN
+	_ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+	FRAME_END
+	ret
+	ENDPROC(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f111f47ba98..45a441c33d6d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -134,6 +134,9 @@ __visible void xen_irq_disable_direct(void);
 __visible unsigned long xen_save_fl_direct(void);
 __visible void xen_restore_fl_direct(unsigned long);
 
+__visible unsigned long xen_read_cr2(void);
+__visible unsigned long xen_read_cr2_direct(void);
+
 /* These are not functions, and cannot be called normally */
 __visible void xen_iret(void);
 __visible void xen_sysret32(void);

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [tip:x86/urgent] x86/entry/32: Simplify common_exception
  2019-07-11 11:40 ` [PATCH v3 2/6] x86/entry/32: Simplify common_exception Peter Zijlstra
@ 2019-07-17 21:23   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: tip-bot for Peter Zijlstra @ 2019-07-17 21:23 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: mingo, luto, rostedt, peterz, tglx, linux-kernel, hpa

Commit-ID:  e67f1c11e5ea7fa47449a16325ecc997dbbf9bdf
Gitweb:     https://git.kernel.org/tip/e67f1c11e5ea7fa47449a16325ecc997dbbf9bdf
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 11 Jul 2019 13:40:56 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 17 Jul 2019 23:17:37 +0200

x86/entry/32: Simplify common_exception

Adding one more option to SAVE_ALL can be used in common_exception to
simplify things. This also saves duplication later where page_fault will no
longer use common_exception.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: bp@alien8.de
Cc: torvalds@linux-foundation.org
Cc: hpa@zytor.com
Cc: dave.hansen@linux.intel.com
Cc: jgross@suse.com
Cc: zhe.he@windriver.com
Cc: joel@joelfernandes.org
Cc: devel@etsukata.com
Link: https://lkml.kernel.org/r/20190711114335.945136187@infradead.org

---
 arch/x86/entry/entry_32.S | 36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 90b473297299..4d4b6100f0e8 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -294,9 +294,11 @@
 .Lfinished_frame_\@:
 .endm
 
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
 	cld
+.if \skip_gs == 0
 	PUSH_GS
+.endif
 	FIXUP_FRAME
 	pushl	%fs
 	pushl	%es
@@ -313,13 +315,13 @@
 	movl	%edx, %es
 	movl	$(__KERNEL_PERCPU), %edx
 	movl	%edx, %fs
+.if \skip_gs == 0
 	SET_KERNEL_GS %edx
-
+.endif
 	/* Switch to kernel stack if necessary */
 .if \switch_stacks > 0
 	SWITCH_TO_KERNEL_STACK
 .endif
-
 .endm
 
 .macro SAVE_ALL_NMI cr3_reg:req
@@ -1448,32 +1450,20 @@ END(page_fault)
 
 common_exception:
 	/* the function address is in %gs's slot on the stack */
-	FIXUP_FRAME
-	pushl	%fs
-	pushl	%es
-	pushl	%ds
-	pushl	%eax
-	movl	$(__USER_DS), %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	$(__KERNEL_PERCPU), %eax
-	movl	%eax, %fs
-	pushl	%ebp
-	pushl	%edi
-	pushl	%esi
-	pushl	%edx
-	pushl	%ecx
-	pushl	%ebx
-	SWITCH_TO_KERNEL_STACK
+	SAVE_ALL switch_stacks=1 skip_gs=1
 	ENCODE_FRAME_POINTER
-	cld
 	UNWIND_ESPFIX_STACK
+
+	/* fixup %gs */
 	GS_TO_REG %ecx
 	movl	PT_GS(%esp), %edi		# get the function address
-	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
-	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
 	REG_TO_PTGS %ecx
 	SET_KERNEL_GS %ecx
+
+	/* fixup orig %eax */
+	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
+	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
+
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
 	CALL_NOSPEC %edi

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [tip:x86/urgent] x86/entry/64: Simplify idtentry a little
  2019-07-11 11:40 ` [PATCH v3 3/6] x86/entry/64: Simplify idtentry a little Peter Zijlstra
@ 2019-07-17 21:24   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: tip-bot for Peter Zijlstra @ 2019-07-17 21:24 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: tglx, rostedt, luto, hpa, mingo, linux-kernel, peterz

Commit-ID:  2fd37912cfb019228bf246215938e6f7619516a2
Gitweb:     https://git.kernel.org/tip/2fd37912cfb019228bf246215938e6f7619516a2
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 11 Jul 2019 13:40:57 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 17 Jul 2019 23:17:37 +0200

x86/entry/64: Simplify idtentry a little

There's a bunch of duplication in idtentry, namely the
.Lfrom_usermode_switch_stack is a paranoid=0 copy of the normal flow.

Make this explicit by creating a idtentry_part helper macro.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: bp@alien8.de
Cc: torvalds@linux-foundation.org
Cc: hpa@zytor.com
Cc: dave.hansen@linux.intel.com
Cc: jgross@suse.com
Cc: zhe.he@windriver.com
Cc: joel@joelfernandes.org
Cc: devel@etsukata.com
Link: https://lkml.kernel.org/r/20190711114336.002429503@infradead.org

---
 arch/x86/entry/entry_64.S | 102 ++++++++++++++++++++++------------------------
 1 file changed, 48 insertions(+), 54 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0ea4831a72a4..3db5fede743b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -864,6 +864,52 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
 
+.macro idtentry_part do_sym, has_error_code:req, paranoid:req, shift_ist=-1, ist_offset=0
+
+	.if \paranoid
+	call	paranoid_entry
+	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
+	.else
+	call	error_entry
+	.endif
+	UNWIND_HINT_REGS
+
+	.if \paranoid
+	.if \shift_ist != -1
+	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
+	.else
+	TRACE_IRQS_OFF
+	.endif
+	.endif
+
+	movq	%rsp, %rdi			/* pt_regs pointer */
+
+	.if \has_error_code
+	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
+	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
+	.else
+	xorl	%esi, %esi			/* no error code */
+	.endif
+
+	.if \shift_ist != -1
+	subq	$\ist_offset, CPU_TSS_IST(\shift_ist)
+	.endif
+
+	call	\do_sym
+
+	.if \shift_ist != -1
+	addq	$\ist_offset, CPU_TSS_IST(\shift_ist)
+	.endif
+
+	.if \paranoid
+	/* this procedure expect "no swapgs" flag in ebx */
+	jmp	paranoid_exit
+	.else
+	jmp	error_exit
+	.endif
+
+.endm
+
 /**
  * idtentry - Generate an IDT entry stub
  * @sym:		Name of the generated entry point
@@ -934,47 +980,7 @@ ENTRY(\sym)
 .Lfrom_usermode_no_gap_\@:
 	.endif
 
-	.if \paranoid
-	call	paranoid_entry
-	.else
-	call	error_entry
-	.endif
-	UNWIND_HINT_REGS
-	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
-
-	.if \paranoid
-	.if \shift_ist != -1
-	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
-	.else
-	TRACE_IRQS_OFF
-	.endif
-	.endif
-
-	movq	%rsp, %rdi			/* pt_regs pointer */
-
-	.if \has_error_code
-	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
-	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
-	.else
-	xorl	%esi, %esi			/* no error code */
-	.endif
-
-	.if \shift_ist != -1
-	subq	$\ist_offset, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	call	\do_sym
-
-	.if \shift_ist != -1
-	addq	$\ist_offset, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	/* these procedures expect "no swapgs" flag in ebx */
-	.if \paranoid
-	jmp	paranoid_exit
-	.else
-	jmp	error_exit
-	.endif
+	idtentry_part \do_sym, \has_error_code, \paranoid, \shift_ist, \ist_offset
 
 	.if \paranoid == 1
 	/*
@@ -983,21 +989,9 @@ ENTRY(\sym)
 	 * run in real process context if user_mode(regs).
 	 */
 .Lfrom_usermode_switch_stack_\@:
-	call	error_entry
-
-	movq	%rsp, %rdi			/* pt_regs pointer */
-
-	.if \has_error_code
-	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
-	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
-	.else
-	xorl	%esi, %esi			/* no error code */
+	idtentry_part \do_sym, \has_error_code, paranoid=0
 	.endif
 
-	call	\do_sym
-
-	jmp	error_exit
-	.endif
 _ASM_NOKPROBE(\sym)
 END(\sym)
 .endm

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [tip:x86/urgent] x86/entry/64: Update comments and sanity tests for create_gap
  2019-07-11 11:40 ` [PATCH v3 4/6] x86/entry/64: Update comments and sanity tests for create_gap Peter Zijlstra
@ 2019-07-17 21:25   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: tip-bot for Peter Zijlstra @ 2019-07-17 21:25 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: peterz, linux-kernel, hpa, mingo, rostedt, tglx, luto

Commit-ID:  4234653e882740cbf6625eeee294e388b3176583
Gitweb:     https://git.kernel.org/tip/4234653e882740cbf6625eeee294e388b3176583
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 11 Jul 2019 13:40:58 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 17 Jul 2019 23:17:38 +0200

x86/entry/64: Update comments and sanity tests for create_gap

Commit 2700fefdb2d9 ("x86_64: Add gap to int3 to allow for call
emulation") forgot to update the comment, do so now.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: bp@alien8.de
Cc: torvalds@linux-foundation.org
Cc: hpa@zytor.com
Cc: dave.hansen@linux.intel.com
Cc: jgross@suse.com
Cc: zhe.he@windriver.com
Cc: joel@joelfernandes.org
Cc: devel@etsukata.com
Link: https://lkml.kernel.org/r/20190711114336.059780563@infradead.org

---
 arch/x86/entry/entry_64.S | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3db5fede743b..95ae05f0edf2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -913,15 +913,16 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 /**
  * idtentry - Generate an IDT entry stub
  * @sym:		Name of the generated entry point
- * @do_sym: 		C function to be called
- * @has_error_code: 	True if this IDT vector has an error code on the stack
- * @paranoid: 		non-zero means that this vector may be invoked from
+ * @do_sym:		C function to be called
+ * @has_error_code:	True if this IDT vector has an error code on the stack
+ * @paranoid:		non-zero means that this vector may be invoked from
  *			kernel mode with user GSBASE and/or user CR3.
  *			2 is special -- see below.
  * @shift_ist:		Set to an IST index if entries from kernel mode should
- *             		decrement the IST stack so that nested entries get a
+ *			decrement the IST stack so that nested entries get a
  *			fresh stack.  (This is for #DB, which has a nasty habit
- *             		of recursing.)
+ *			of recursing.)
+ * @create_gap:		create a 6-word stack gap when coming from kernel mode.
  *
  * idtentry generates an IDT stub that sets up a usable kernel context,
  * creates struct pt_regs, and calls @do_sym.  The stub has the following
@@ -951,10 +952,14 @@ ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
 	/* Sanity check */
-	.if \shift_ist != -1 && \paranoid == 0
+	.if \shift_ist != -1 && \paranoid != 1
 	.error "using shift_ist requires paranoid=1"
 	.endif
 
+	.if \create_gap && \paranoid
+	.error "using create_gap requires paranoid=0"
+	.endif
+
 	ASM_CLAC
 
 	.if \has_error_code == 0

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [tip:x86/urgent] x86/mm, tracing: Fix CR2 corruption
  2019-07-11 11:40 ` [PATCH v3 5/6] x86/mm, tracing: Fix CR2 corruption Peter Zijlstra
@ 2019-07-17 21:25   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: tip-bot for Peter Zijlstra @ 2019-07-17 21:25 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, peterz, linux-kernel, zhe.he, rostedt, devel, hpa, luto, tglx

Commit-ID:  a0d14b8909de55139b8702fe0c7e80b69763dcfb
Gitweb:     https://git.kernel.org/tip/a0d14b8909de55139b8702fe0c7e80b69763dcfb
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 11 Jul 2019 13:40:59 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 17 Jul 2019 23:17:38 +0200

x86/mm, tracing: Fix CR2 corruption

Despite the current efforts to read CR2 before tracing happens there still
exist a number of possible holes:

  idtentry page_fault             do_page_fault           has_error_code=1
    call error_entry
      TRACE_IRQS_OFF
        call trace_hardirqs_off*
          #PF // modifies CR2

      CALL_enter_from_user_mode
        __context_tracking_exit()
          trace_user_exit(0)
            #PF // modifies CR2

    call do_page_fault
      address = read_cr2(); /* whoopsie */

And similar for i386.

Fix it by pulling the CR2 read into the entry code, before any of that
stuff gets a chance to run and ruin things.

Reported-by: He Zhe <zhe.he@windriver.com>
Reported-by: Eiichi Tsukata <devel@etsukata.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: bp@alien8.de
Cc: rostedt@goodmis.org
Cc: torvalds@linux-foundation.org
Cc: hpa@zytor.com
Cc: dave.hansen@linux.intel.com
Cc: jgross@suse.com
Cc: joel@joelfernandes.org
Link: https://lkml.kernel.org/r/20190711114336.116812491@infradead.org

Debugged-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/entry/entry_32.S       | 25 ++++++++++++++++++++++---
 arch/x86/entry/entry_64.S       | 35 ++++++++++++++++++-----------------
 arch/x86/include/asm/kvm_para.h |  2 +-
 arch/x86/include/asm/traps.h    |  4 ++--
 arch/x86/kernel/kvm.c           |  8 ++++----
 arch/x86/kernel/traps.c         |  6 +-----
 arch/x86/mm/fault.c             | 30 +++++++++++-------------------
 7 files changed, 59 insertions(+), 51 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4d4b6100f0e8..2bb986f305ac 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1443,9 +1443,28 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
 
 ENTRY(page_fault)
 	ASM_CLAC
-	pushl	$do_page_fault
-	ALIGN
-	jmp common_exception
+	pushl	$0; /* %gs's slot on the stack */
+
+	SAVE_ALL switch_stacks=1 skip_gs=1
+
+	ENCODE_FRAME_POINTER
+	UNWIND_ESPFIX_STACK
+
+	/* fixup %gs */
+	GS_TO_REG %ecx
+	REG_TO_PTGS %ecx
+	SET_KERNEL_GS %ecx
+
+	GET_CR2_INTO(%ecx)			# might clobber %eax
+
+	/* fixup orig %eax */
+	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
+	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
+
+	TRACE_IRQS_OFF
+	movl	%esp, %eax			# pt_regs pointer
+	call	do_page_fault
+	jmp	ret_from_exception
 END(page_fault)
 
 common_exception:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 95ae05f0edf2..7cb2e1f1ec09 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -864,7 +864,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
 
-.macro idtentry_part do_sym, has_error_code:req, paranoid:req, shift_ist=-1, ist_offset=0
+.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
 
 	.if \paranoid
 	call	paranoid_entry
@@ -874,12 +874,21 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 	.endif
 	UNWIND_HINT_REGS
 
-	.if \paranoid
+	.if \read_cr2
+	GET_CR2_INTO(%rdx);			/* can clobber %rax */
+	.endif
+
 	.if \shift_ist != -1
 	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
 	.else
 	TRACE_IRQS_OFF
 	.endif
+
+	.if \paranoid == 0
+	testb	$3, CS(%rsp)
+	jz	.Lfrom_kernel_no_context_tracking_\@
+	CALL_enter_from_user_mode
+.Lfrom_kernel_no_context_tracking_\@:
 	.endif
 
 	movq	%rsp, %rdi			/* pt_regs pointer */
@@ -923,6 +932,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  *			fresh stack.  (This is for #DB, which has a nasty habit
  *			of recursing.)
  * @create_gap:		create a 6-word stack gap when coming from kernel mode.
+ * @read_cr2:		load CR2 into the 3rd argument; done before calling any C code
  *
  * idtentry generates an IDT stub that sets up a usable kernel context,
  * creates struct pt_regs, and calls @do_sym.  The stub has the following
@@ -947,7 +957,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
@@ -985,7 +995,7 @@ ENTRY(\sym)
 .Lfrom_usermode_no_gap_\@:
 	.endif
 
-	idtentry_part \do_sym, \has_error_code, \paranoid, \shift_ist, \ist_offset
+	idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
 
 	.if \paranoid == 1
 	/*
@@ -994,7 +1004,7 @@ ENTRY(\sym)
 	 * run in real process context if user_mode(regs).
 	 */
 .Lfrom_usermode_switch_stack_\@:
-	idtentry_part \do_sym, \has_error_code, paranoid=0
+	idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0
 	.endif
 
 _ASM_NOKPROBE(\sym)
@@ -1006,7 +1016,7 @@ idtentry overflow			do_overflow			has_error_code=0
 idtentry bounds				do_bounds			has_error_code=0
 idtentry invalid_op			do_invalid_op			has_error_code=0
 idtentry device_not_available		do_device_not_available		has_error_code=0
-idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2
+idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2 read_cr2=1
 idtentry coprocessor_segment_overrun	do_coprocessor_segment_overrun	has_error_code=0
 idtentry invalid_TSS			do_invalid_TSS			has_error_code=1
 idtentry segment_not_present		do_segment_not_present		has_error_code=1
@@ -1179,10 +1189,10 @@ idtentry xenint3		do_int3			has_error_code=0
 #endif
 
 idtentry general_protection	do_general_protection	has_error_code=1
-idtentry page_fault		do_page_fault		has_error_code=1
+idtentry page_fault		do_page_fault		has_error_code=1	read_cr2=1
 
 #ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault	do_async_page_fault	has_error_code=1
+idtentry async_page_fault	do_async_page_fault	has_error_code=1	read_cr2=1
 #endif
 
 #ifdef CONFIG_X86_MCE
@@ -1281,18 +1291,9 @@ ENTRY(error_entry)
 	movq	%rax, %rsp			/* switch stack */
 	ENCODE_FRAME_POINTER
 	pushq	%r12
-
-	/*
-	 * We need to tell lockdep that IRQs are off.  We can't do this until
-	 * we fix gsbase, and we should do it before enter_from_user_mode
-	 * (which can take locks).
-	 */
-	TRACE_IRQS_OFF
-	CALL_enter_from_user_mode
 	ret
 
 .Lerror_entry_done:
-	TRACE_IRQS_OFF
 	ret
 
 	/*
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed3cf1c3934..9b4df6eaa11a 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
 extern void kvm_disable_steal_time(void);
-void do_async_page_fault(struct pt_regs *regs, unsigned long error_code);
+void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 void __init kvm_spinlock_init(void);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 7d6f3f3fad78..5dd1674ddf4c 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -74,14 +74,14 @@ dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
 dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
 dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
 #ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code);
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address);
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
 struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
 void __init trap_init(void);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
 dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
 dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 82caf01b63dd..3231440d6253 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -242,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
 NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
 
 dotraplinkage void
-do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 {
 	enum ctx_state prev_state;
 
 	switch (kvm_read_and_reset_pf_reason()) {
 	default:
-		do_page_fault(regs, error_code);
+		do_page_fault(regs, error_code, address);
 		break;
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
 		/* page is swapped out by the host. */
 		prev_state = exception_enter();
-		kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
+		kvm_async_pf_task_wait((u32)address, !user_mode(regs));
 		exception_exit(prev_state);
 		break;
 	case KVM_PV_REASON_PAGE_READY:
 		rcu_irq_enter();
-		kvm_async_pf_task_wake((u32)read_cr2());
+		kvm_async_pf_task_wake((u32)address);
 		rcu_irq_exit();
 		break;
 	}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 87095a477154..4bb0f8447112 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_overflow(const char *message,
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
 {
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
-#ifdef CONFIG_VMAP_STACK
-	unsigned long cr2;
-#endif
 
 #ifdef CONFIG_X86_ESPFIX64
 	extern unsigned char native_irq_return_iret[];
@@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	 * stack even if the actual trigger for the double fault was
 	 * something else.
 	 */
-	cr2 = read_cr2();
 	if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
 		handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
 #endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 794f364cb882..0799cc79efd3 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1507,9 +1507,8 @@ good_area:
 NOKPROBE_SYMBOL(do_user_addr_fault);
 
 /*
- * This routine handles page faults.  It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
+ * Explicitly marked noinline such that the function tracer sees this as the
+ * page_fault entry point.
  */
 static noinline void
 __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
@@ -1528,33 +1527,26 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
 }
 NOKPROBE_SYMBOL(__do_page_fault);
 
-static nokprobe_inline void
-trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
-			 unsigned long error_code)
+static __always_inline void
+trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
+			 unsigned long address)
 {
+	if (!trace_pagefault_enabled())
+		return;
+
 	if (user_mode(regs))
 		trace_page_fault_user(address, regs, error_code);
 	else
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
-/*
- * We must have this function blacklisted from kprobes, tagged with notrace
- * and call read_cr2() before calling anything else. To avoid calling any
- * kind of tracing machinery before we've observed the CR2 value.
- *
- * exception_{enter,exit}() contains all sorts of tracepoints.
- */
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+dotraplinkage void
+do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 {
-	unsigned long address = read_cr2(); /* Get the faulting address */
 	enum ctx_state prev_state;
 
 	prev_state = exception_enter();
-	if (trace_pagefault_enabled())
-		trace_page_fault_entries(address, regs, error_code);
-
+	trace_page_fault_entries(regs, error_code, address);
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH] stacktrace: Force USER_DS for stack_trace_save_user()
  2019-07-17  8:09     ` Vegard Nossum
@ 2019-07-18  8:57       ` Peter Zijlstra
  2019-07-18 13:21         ` Joel Fernandes
  2019-07-18 14:52         ` [tip:core/urgent] " tip-bot for Peter Zijlstra
  0 siblings, 2 replies; 29+ messages in thread
From: Peter Zijlstra @ 2019-07-18  8:57 UTC (permalink / raw)
  To: Vegard Nossum
  Cc: tglx, bp, mingo, rostedt, luto, torvalds, hpa, dave.hansen,
	jgross, linux-kernel, zhe.he, joel, devel

On Wed, Jul 17, 2019 at 10:09:45AM +0200, Vegard Nossum wrote:
> On 7/17/19 10:07 AM, Peter Zijlstra wrote:

> > Does something like the below help?

> Yes.

Thanks!

---
Subject: stacktrace: Force USER_DS for stack_trace_save_user()
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu Jul 18 10:47:47 CEST 2019

When walking userspace stacks, we should set USER_DS, otherwise
access_ok() will not function as expected.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Tested-by: Vegard Nossum <vegard.nossum@oracle.com>
Reported-by: Eiichi Tsukata <devel@etsukata.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -226,12 +226,17 @@ unsigned int stack_trace_save_user(unsig
 		.store	= store,
 		.size	= size,
 	};
+	mm_segment_t fs;
 
 	/* Trace user stack if not a kernel thread */
 	if (current->flags & PF_KTHREAD)
 		return 0;
 
+	fs = get_fs();
+	set_fs(USER_DS);
 	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
+	set_fs(fs);
+
 	return c.len;
 }
 #endif

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH] stacktrace: Force USER_DS for stack_trace_save_user()
  2019-07-18  8:57       ` [PATCH] stacktrace: Force USER_DS for stack_trace_save_user() Peter Zijlstra
@ 2019-07-18 13:21         ` Joel Fernandes
  2019-07-18 14:52         ` [tip:core/urgent] " tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 29+ messages in thread
From: Joel Fernandes @ 2019-07-18 13:21 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Vegard Nossum, tglx, bp, mingo, rostedt, luto, torvalds, hpa,
	dave.hansen, jgross, linux-kernel, zhe.he, devel

On Thu, Jul 18, 2019 at 10:57:54AM +0200, Peter Zijlstra wrote:
> On Wed, Jul 17, 2019 at 10:09:45AM +0200, Vegard Nossum wrote:
> > On 7/17/19 10:07 AM, Peter Zijlstra wrote:
> 
> > > Does something like the below help?
> 
> > Yes.
> 
> Thanks!
> 
> ---
> Subject: stacktrace: Force USER_DS for stack_trace_save_user()
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Thu Jul 18 10:47:47 CEST 2019
> 
> When walking userspace stacks, we should set USER_DS, otherwise
> access_ok() will not function as expected.
> 
> Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
> Tested-by: Vegard Nossum <vegard.nossum@oracle.com>
> Reported-by: Eiichi Tsukata <devel@etsukata.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>

thanks,

 - Joel


> ---
> --- a/kernel/stacktrace.c
> +++ b/kernel/stacktrace.c
> @@ -226,12 +226,17 @@ unsigned int stack_trace_save_user(unsig
>  		.store	= store,
>  		.size	= size,
>  	};
> +	mm_segment_t fs;
>  
>  	/* Trace user stack if not a kernel thread */
>  	if (current->flags & PF_KTHREAD)
>  		return 0;
>  
> +	fs = get_fs();
> +	set_fs(USER_DS);
>  	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
> +	set_fs(fs);
> +
>  	return c.len;
>  }
>  #endif

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [tip:core/urgent] stacktrace: Force USER_DS for stack_trace_save_user()
  2019-07-18  8:57       ` [PATCH] stacktrace: Force USER_DS for stack_trace_save_user() Peter Zijlstra
  2019-07-18 13:21         ` Joel Fernandes
@ 2019-07-18 14:52         ` tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 29+ messages in thread
From: tip-bot for Peter Zijlstra @ 2019-07-18 14:52 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, mingo, vegard.nossum, devel, linux-kernel, hpa, joel, peterz

Commit-ID:  cac9b9a4b08304f11daace03b8b48659355e44c1
Gitweb:     https://git.kernel.org/tip/cac9b9a4b08304f11daace03b8b48659355e44c1
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 18 Jul 2019 10:47:47 +0200
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 18 Jul 2019 16:47:24 +0200

stacktrace: Force USER_DS for stack_trace_save_user()

When walking userspace stacks, USER_DS needs to be set, otherwise
access_ok() will not function as expected.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Reported-by: Eiichi Tsukata <devel@etsukata.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Vegard Nossum <vegard.nossum@oracle.com>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Link: https://lkml.kernel.org/r/20190718085754.GM3402@hirez.programming.kicks-ass.net
---
 kernel/stacktrace.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index e6a02b274b73..f5440abb7532 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -226,12 +226,17 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
 		.store	= store,
 		.size	= size,
 	};
+	mm_segment_t fs;
 
 	/* Trace user stack if not a kernel thread */
 	if (current->flags & PF_KTHREAD)
 		return 0;
 
+	fs = get_fs();
+	set_fs(USER_DS);
 	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
+	set_fs(fs);
+
 	return c.len;
 }
 #endif

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-17  9:37     ` Eiichi Tsukata
@ 2019-07-18 20:27       ` Andy Lutomirski
  2019-07-20  3:59         ` Eiichi Tsukata
  0 siblings, 1 reply; 29+ messages in thread
From: Andy Lutomirski @ 2019-07-18 20:27 UTC (permalink / raw)
  To: Eiichi Tsukata
  Cc: Vegard Nossum, Peter Zijlstra, Thomas Gleixner, Borislav Petkov,
	Ingo Molnar, Steven Rostedt, Andrew Lutomirski, Linus Torvalds,
	linux_lkml_grp, H. Peter Anvin, Dave Hansen, Juergen Gross, LKML,
	He Zhe, Joel Fernandes

Hi all-

I suspect that a bunch of the bugs you're all finding boil down to:

 - Nested debug exceptions could corrupt the outer exception's DR6.
 - Nested debug exceptions in which *both* exceptions came from the
kernel were probably all kinds of buggy
 - Data breakpoints in bad places in the kernel were bad news

Could you give this not-quite-finished series a try?

https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-18 20:27       ` Andy Lutomirski
@ 2019-07-20  3:59         ` Eiichi Tsukata
  2019-07-20 12:49           ` Andy Lutomirski
  0 siblings, 1 reply; 29+ messages in thread
From: Eiichi Tsukata @ 2019-07-20  3:59 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Vegard Nossum, Peter Zijlstra, Thomas Gleixner, Borislav Petkov,
	Ingo Molnar, Steven Rostedt, Linus Torvalds, linux_lkml_grp,
	H. Peter Anvin, Dave Hansen, Juergen Gross, LKML, He Zhe,
	Joel Fernandes


On 2019/07/19 5:27, Andy Lutomirski wrote:
> Hi all-
> 
> I suspect that a bunch of the bugs you're all finding boil down to:
> 
>  - Nested debug exceptions could corrupt the outer exception's DR6.
>  - Nested debug exceptions in which *both* exceptions came from the
> kernel were probably all kinds of buggy
>  - Data breakpoints in bad places in the kernel were bad news
> 
> Could you give this not-quite-finished series a try?
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/
> 

Though I'm still trying to find out other cases(other areas which could
be buggy if we set hw breakpoints), as far as I tested, there is
no problem so far.

If I understand correctly, the call trace and the dr6 value will be: 

====

debug() // dr6: 0xffff4ff0, user_mode: 1
  TRACE_IRQS_OFF
    arch_stack_user_walk()
      debug()  // dr6: 0xffff4ff1 == 0xffff4ff0 | 0xffff0ff1 ... (*)
        do_debug()
          WARN_ON_ONCE
  do_debug() // dr6: 0xffff0ff0(cleared in the above do_debug())

(*) :
>   * The Intel SDM says:
>   *
>   *   Certain debug exceptions may clear bits 0-3. The remaining
>   *   contents of the DR6 register are never cleared by the
>   *   processor. To avoid confusion in identifying debug
>   *   exceptions, debug handlers should clear the register before
>   *   returning to the interrupted task.

====

Note: printk() in do_debug() can cause infinite loop(printk() -> 
irq_disable() -> do_debug() -> printk() ...), so printk_deferred()
was preferable.

Thanks

Eiichi

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-20  3:59         ` Eiichi Tsukata
@ 2019-07-20 12:49           ` Andy Lutomirski
  2019-07-20 14:23             ` Eiichi Tsukata
  0 siblings, 1 reply; 29+ messages in thread
From: Andy Lutomirski @ 2019-07-20 12:49 UTC (permalink / raw)
  To: Eiichi Tsukata
  Cc: Andy Lutomirski, Vegard Nossum, Peter Zijlstra, Thomas Gleixner,
	Borislav Petkov, Ingo Molnar, Steven Rostedt, Linus Torvalds,
	linux_lkml_grp, H. Peter Anvin, Dave Hansen, Juergen Gross, LKML,
	He Zhe, Joel Fernandes

On Fri, Jul 19, 2019 at 8:59 PM Eiichi Tsukata <devel@etsukata.com> wrote:
>
>
> On 2019/07/19 5:27, Andy Lutomirski wrote:
> > Hi all-
> >
> > I suspect that a bunch of the bugs you're all finding boil down to:
> >
> >  - Nested debug exceptions could corrupt the outer exception's DR6.
> >  - Nested debug exceptions in which *both* exceptions came from the
> > kernel were probably all kinds of buggy
> >  - Data breakpoints in bad places in the kernel were bad news
> >
> > Could you give this not-quite-finished series a try?
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/
> >
>
> Though I'm still trying to find out other cases(other areas which could
> be buggy if we set hw breakpoints), as far as I tested, there is
> no problem so far.
>
> If I understand correctly, the call trace and the dr6 value will be:
>
> ====
>
> debug() // dr6: 0xffff4ff0, user_mode: 1
>   TRACE_IRQS_OFF
>     arch_stack_user_walk()
>       debug()  // dr6: 0xffff4ff1 == 0xffff4ff0 | 0xffff0ff1 ... (*)
>         do_debug()
>           WARN_ON_ONCE
>   do_debug() // dr6: 0xffff0ff0(cleared in the above do_debug())

The dr6 register will indeed be cleared like this, but the dr6
variable should still be 0xffff4ff0.

>
> (*) :
> >   * The Intel SDM says:
> >   *
> >   *   Certain debug exceptions may clear bits 0-3. The remaining
> >   *   contents of the DR6 register are never cleared by the
> >   *   processor. To avoid confusion in identifying debug
> >   *   exceptions, debug handlers should clear the register before
> >   *   returning to the interrupted task.
>
> ====
>
> Note: printk() in do_debug() can cause infinite loop(printk() ->
> irq_disable() -> do_debug() -> printk() ...), so printk_deferred()
> was preferable.
>

Shouldn't that be fixed with my patches?  It should only be able to
recurse two deep: do_debug() from user mode can indeed trip
breakpoints, but the next do_debug() will clear DR7 in paranoid_entry.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v3 0/6] Tracing vs CR2
  2019-07-20 12:49           ` Andy Lutomirski
@ 2019-07-20 14:23             ` Eiichi Tsukata
  0 siblings, 0 replies; 29+ messages in thread
From: Eiichi Tsukata @ 2019-07-20 14:23 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Vegard Nossum, Peter Zijlstra, Thomas Gleixner, Borislav Petkov,
	Ingo Molnar, Steven Rostedt, Linus Torvalds, linux_lkml_grp,
	H. Peter Anvin, Dave Hansen, Juergen Gross, LKML, He Zhe,
	Joel Fernandes


On 2019/07/20 21:49, Andy Lutomirski wrote:
> On Fri, Jul 19, 2019 at 8:59 PM Eiichi Tsukata <devel@etsukata.com> wrote:
>>
...
>>
>> ====
>>
>> debug() // dr6: 0xffff4ff0, user_mode: 1
>>   TRACE_IRQS_OFF
>>     arch_stack_user_walk()
>>       debug()  // dr6: 0xffff4ff1 == 0xffff4ff0 | 0xffff0ff1 ... (*)
>>         do_debug()
>>           WARN_ON_ONCE
>>   do_debug() // dr6: 0xffff0ff0(cleared in the above do_debug())
> 
> The dr6 register will indeed be cleared like this, but the dr6
> variable should still be 0xffff4ff0.

I should have use DR6 to mean it is a register, not variable.
"dr6" was ambiguous.

> 
>>
...
>>
>> Note: printk() in do_debug() can cause infinite loop(printk() ->
>> irq_disable() -> do_debug() -> printk() ...), so printk_deferred()
>> was preferable.
>>
> 
> Shouldn't that be fixed with my patches?  It should only be able to
> recurse two deep: do_debug() from user mode can indeed trip
> breakpoints, but the next do_debug() will clear DR7 in paranoid_entry.
> 

Sorry, I missed that. Now I confirmed your patches fixed the loop.

Thanks

Eiichi 



^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2019-07-20 14:24 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-11 11:40 [PATCH v3 0/6] Tracing vs CR2 Peter Zijlstra
2019-07-11 11:40 ` [PATCH v3 1/6] x86/paravirt: Make read_cr2() CALLEE_SAVE Peter Zijlstra
2019-07-17 21:22   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
2019-07-11 11:40 ` [PATCH v3 2/6] x86/entry/32: Simplify common_exception Peter Zijlstra
2019-07-17 21:23   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
2019-07-11 11:40 ` [PATCH v3 3/6] x86/entry/64: Simplify idtentry a little Peter Zijlstra
2019-07-17 21:24   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
2019-07-11 11:40 ` [PATCH v3 4/6] x86/entry/64: Update comments and sanity tests for create_gap Peter Zijlstra
2019-07-17 21:25   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
2019-07-11 11:40 ` [PATCH v3 5/6] x86/mm, tracing: Fix CR2 corruption Peter Zijlstra
2019-07-17 21:25   ` [tip:x86/urgent] " tip-bot for Peter Zijlstra
2019-07-11 11:41 ` [PATCH v3 6/6] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Peter Zijlstra
2019-07-11 14:45   ` Andy Lutomirski
2019-07-11 18:28     ` Peter Zijlstra
2019-07-16 19:33 ` [PATCH v3 0/6] Tracing vs CR2 Vegard Nossum
2019-07-16 21:51   ` Vegard Nossum
2019-07-17  1:02     ` Andy Lutomirski
2019-07-17  7:46       ` Vegard Nossum
2019-07-17  7:47       ` Peter Zijlstra
2019-07-17  9:37     ` Eiichi Tsukata
2019-07-18 20:27       ` Andy Lutomirski
2019-07-20  3:59         ` Eiichi Tsukata
2019-07-20 12:49           ` Andy Lutomirski
2019-07-20 14:23             ` Eiichi Tsukata
2019-07-17  8:07   ` Peter Zijlstra
2019-07-17  8:09     ` Vegard Nossum
2019-07-18  8:57       ` [PATCH] stacktrace: Force USER_DS for stack_trace_save_user() Peter Zijlstra
2019-07-18 13:21         ` Joel Fernandes
2019-07-18 14:52         ` [tip:core/urgent] " tip-bot for Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).