All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Ingo Molnar <mingo@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andy Lutomirski <luto@kernel.org>,
	Nicolai Stange <nstange@suse.de>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	the arch/x86 maintainers <x86@kernel.org>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Jiri Kosina <jikos@kernel.org>, Miroslav Benes <mbenes@suse.cz>,
	Petr Mladek <pmladek@suse.com>,
	Joe Lawrence <joe.lawrence@redhat.com>,
	Shuah Khan <shuah@kernel.org>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Mimi Zohar <zohar@linux.ibm.com>, Juergen Gross <jgross@suse.com>,
	Nick Desaulniers <ndesaulniers@google.com>,
	Nayna Jain <nayna@linux.ibm.com>,
	Masahiro Yamada <yamada.masahiro@socionext.com>,
	Joerg Roedel <jroedel@suse.de>,
	"open list:KERNEL SELFTEST FRAMEWORK" 
	<linux-kselftest@vger.kernel.org>,
	stable@vger.kernel.org
Subject: Re: [RFC][PATCH 1/2] x86: Allow breakpoints to emulate call functions
Date: Thu, 2 May 2019 18:21:33 +0200	[thread overview]
Message-ID: <20190502162133.GX2623@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20190501232412.1196ef18@oasis.local.home>

On Wed, May 01, 2019 at 11:24:12PM -0400, Steven Rostedt wrote:
> On Wed, 01 May 2019 16:28:31 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> > index d309f30cf7af..50bbf4035baf 100644
> > --- a/arch/x86/entry/entry_32.S
> > +++ b/arch/x86/entry/entry_32.S
> > @@ -1478,6 +1478,17 @@ ENTRY(int3)
> >  	ASM_CLAC
> >  	pushl	$-1				# mark this as an int
> >  
> > +#ifdef CONFIG_VM86
> > +	testl	$X86_EFLAGS_VM, PT_EFLAGS(%esp)
> > +	jnz	.Lfrom_usermode_no_gap
> > +#endif
> > +	testl	$SEGMENT_RPL_MASK, PT_CS(%esp)
> > +	jnz	.Lfrom_usermode_no_gap
> > +	.rept 6
> > +	pushl	5*4(%esp)
> > +	.endr
> > +.Lfrom_usermode_no_gap:
> > +
> >  	SAVE_ALL switch_stacks=1
> >  	ENCODE_FRAME_POINTER
> >  	TRACE_IRQS_OFF
> 
> This failed to work on 32 bit at all (crashed and burned badly - triple
> fault!). 

Indeed so; find a working version below (albeit with a lot of debug
garbage still in).

It also includes the self-test code that Andy wanted -- it's what I used
to debug this mess.

Much thanks to Joerg Roedel for talking entry_32.S with me.

TL;DR, on x86_32 kernel->kernel IRET frames are only 3 entries and do
not include ESP/SS, so not only wasn't regs->sp setup, if you changed it
it wouldn't be effective and corrupt random stack state.

---
 arch/x86/entry/entry_32.S            |  87 +++++++++++++++++++++++---
 arch/x86/entry/entry_64.S            |  14 ++++-
 arch/x86/include/asm/text-patching.h |  20 ++++++
 arch/x86/kernel/alternative.c        | 116 +++++++++++++++++++++++++++++++++--
 arch/x86/kernel/traps.c              |   1 +
 5 files changed, 225 insertions(+), 13 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..01c5bdbe5f39 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -203,7 +203,7 @@
 .Lend_\@:
 .endm
 
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 clear_cs=1
 	cld
 	PUSH_GS
 	pushl	%fs
@@ -225,7 +225,7 @@
 
 	/* Switch to kernel stack if necessary */
 .if \switch_stacks > 0
-	SWITCH_TO_KERNEL_STACK
+	SWITCH_TO_KERNEL_STACK \clear_cs
 .endif
 
 .endm
@@ -377,8 +377,9 @@
 
 #define CS_FROM_ENTRY_STACK	(1 << 31)
 #define CS_FROM_USER_CR3	(1 << 30)
+#define CS_FROM_INT3		(1 << 29)
 
-.macro SWITCH_TO_KERNEL_STACK
+.macro SWITCH_TO_KERNEL_STACK clear_cs=1
 
 	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
 
@@ -391,12 +392,13 @@
 	 * that register for the time this macro runs
 	 */
 
+	.if \clear_cs
 	/*
-	 * The high bits of the CS dword (__csh) are used for
-	 * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
-	 * hardware didn't do this for us.
+	 * The high bits of the CS dword (__csh) are used for CS_FROM_*. Clear
+	 * them in case hardware didn't do this for us.
 	 */
 	andl	$(0x0000ffff), PT_CS(%esp)
+	.endif
 
 	/* Are we on the entry stack? Bail out if not! */
 	movl	PER_CPU_VAR(cpu_entry_area), %ecx
@@ -1019,6 +1021,29 @@ ENTRY(entry_INT80_32)
 	/* Restore user state */
 	RESTORE_REGS pop=4			# skip orig_eax/error_code
 .Lirq_return:
+	testl $CS_FROM_INT3, 4(%esp)
+	jz .Lno_iret_fixup
+
+	/*
+	 * Undo the magic from ENTRY(int3), in particular consider the case
+	 * where regs->sp has been modified.
+	 */
+
+	pushl	%eax
+	movl	%esp, %eax
+
+	movl	4*4(%eax), %esp		# restore (modified) regs->sp
+
+	/* rebuild IRET frame */
+	pushl	3*4(%eax)		# flags
+	pushl	2*4(%eax)		# cs
+	pushl	1*4(%eax)		# ip
+
+	andl	$0x0000ffff, 4(%esp)	# clear high CS bits
+
+	movl	(%eax), %eax		# restore eax
+
+.Lno_iret_fixup:
 	/*
 	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
 	 * when returning from IPI handler and when returning from
@@ -1477,9 +1502,57 @@ END(nmi)
 
 ENTRY(int3)
 	ASM_CLAC
+
+	/*
+	 * The high bits of the CS dword (__csh) are used for CS_FROM_*. Clear
+	 * them in case hardware didn't do this for us.
+	 */
+	andl	$0x0000ffff, 4(%esp)
+
+#ifdef CONFIG_VM86
+	testl	$X86_EFLAGS_VM, 8(%esp)
+	jnz	.Lfrom_usermode_no_gap
+#endif
+	testl	$SEGMENT_RPL_MASK, 4(%esp)
+	jnz	.Lfrom_usermode_no_gap
+
+	/*
+	 * Here from kernel mode; so the (exception) stack looks like:
+	 *
+	 * 12(esp) - <previous context>
+	 *  8(esp) - flags
+	 *  4(esp) - cs
+	 *  0(esp) - ip
+	 *
+	 * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+	 * is complete and in particular regs->sp is correct. This gives us
+	 * the original 3 enties as gap:
+	 *
+	 * 32(esp) - <previous context>
+	 * 28(esp) - orig_flags / gap
+	 * 24(esp) - orig_cs	/ gap
+	 * 20(esp) - orig_ip	/ gap
+	 * 16(esp) - ss
+	 * 12(esp) - sp
+	 *  8(esp) - flags
+	 *  4(esp) - cs
+	 *  0(esp) - ip
+	 */
+	pushl	%ss	  # ss
+	pushl	%esp      # sp (points at ss)
+	pushl	4*4(%esp) # flags
+	pushl	4*4(%esp) # cs
+	pushl	4*4(%esp) # ip
+
+	add	$16, 12(%esp) # point sp back at the previous context
+
+	orl	$CS_FROM_INT3, 4(%esp) # mark magic IRET
+
+.Lfrom_usermode_no_gap:
+
 	pushl	$-1				# mark this as an int
 
-	SAVE_ALL switch_stacks=1
+	SAVE_ALL switch_stacks=1 clear_cs=0
 	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	xorl	%edx, %edx			# zero error code
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 20e45d9b4e15..268cd9affe04 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -878,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
@@ -898,6 +898,16 @@ ENTRY(\sym)
 	jnz	.Lfrom_usermode_switch_stack_\@
 	.endif
 
+	.if \create_gap == 1
+	testb	$3, CS-ORIG_RAX(%rsp)
+	jnz	.Lfrom_usermode_no_gap_\@
+	.rept 6
+	pushq	5*8(%rsp)
+	.endr
+	UNWIND_HINT_IRET_REGS offset=8
+.Lfrom_usermode_no_gap_\@:
+	.endif
+
 	.if \paranoid
 	call	paranoid_entry
 	.else
@@ -1129,7 +1139,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
 #endif /* CONFIG_HYPERV */
 
 idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
-idtentry int3			do_int3			has_error_code=0
+idtentry int3			do_int3			has_error_code=0	create_gap=1
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
 #ifdef CONFIG_XEN_PV
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index c90678fd391a..6aac6abf931e 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -42,4 +42,24 @@ extern int after_bootmem;
 extern __ro_after_init struct mm_struct *poking_mm;
 extern __ro_after_init unsigned long poking_addr;
 
+static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
+{
+	regs->sp -= sizeof(unsigned long);
+	*(unsigned long *)regs->sp = val;
+}
+
+static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
+{
+	regs->ip = ip;
+}
+
+#define INT3_INSN_SIZE 1
+#define CALL_INSN_SIZE 5
+
+static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
+{
+	int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
+	int3_emulate_jmp(regs, func);
+}
+
 #endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4db9c0d29bc1..1e11076c3a2b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -613,11 +613,118 @@ extern struct paravirt_patch_site __start_parainstructions[],
 	__stop_parainstructions[];
 #endif	/* CONFIG_PARAVIRT */
 
+static __always_inline void print_stack(struct pt_regs *regs)
+{
+#if 1
+	unsigned long *end = (unsigned long *)current_stack_pointer;
+	unsigned long *frame = (unsigned long *)__builtin_frame_address(0);
+	unsigned long *stack = (unsigned long *)(current_stack_pointer & ~(THREAD_SIZE - 1));
+	int i, j;
+
+	stack += THREAD_SIZE / sizeof(unsigned long);
+
+	printk("stack dump from: %lx\n", stack);
+
+	for (i=0; ; i++) {
+		pr_info("stack[%03d]: ", 16*i);
+		for (j=0; j<16; j++) {
+			if (i==0 && j==0) {
+				pr_cont(" %08lx  ", 0UL);
+				stack--;
+				continue;
+			}
+			if (stack == end)
+				pr_cont(">%08lx< ", *(stack--));
+			else if (stack == frame)
+				pr_cont("*%08lx* ", *(stack--));
+			else if (stack == regs)
+				pr_cont("r%08lxr ", *(stack--));
+			else if (regs && stack == regs->sp)
+				pr_cont("s%08lxs ", *(stack--));
+			else
+				pr_cont(" %08lx  ", *(stack--));
+		}
+		pr_cont("\n");
+
+		if (stack < end)
+			break;
+	}
+#endif
+}
+
+static void __init int3_magic(unsigned int *ptr)
+{
+	printk("*************** %lx\n", (unsigned long)ptr);
+	print_stack(NULL);
+	*ptr = 1;
+}
+
+static __initdata unsigned long int3_ip;
+
+static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct die_args *args = data;
+	struct pt_regs *regs = args->regs;
+
+	if (!regs || user_mode(regs))
+		return NOTIFY_DONE;
+
+	if (val != DIE_INT3)
+		return NOTIFY_DONE;
+
+	printk("XXXXXXXXXXXXXXXXXXXXXXXXXX %lx %lx\n", regs->ip, int3_ip);
+	if (regs->ip - INT3_INSN_SIZE != int3_ip)
+		return NOTIFY_DONE;
+
+	print_stack(regs);
+	int3_emulate_call(regs, (unsigned long)&int3_magic);
+	print_stack(regs);
+
+	return NOTIFY_STOP;
+}
+
+static void __init int3_selftest(void)
+{
+	static __initdata struct notifier_block int3_exception_nb = {
+		.notifier_call	= int3_exception_notify,
+		.priority	= INT_MAX-1, /* last */
+	};
+	unsigned int val = 0;
+
+	BUG_ON(register_die_notifier(&int3_exception_nb));
+
+	printk("+++++++++++++++++++ %lx %lx\n", (unsigned long)&val, (unsigned long)&int3_ip);
+
+	print_stack(NULL);
+
+	/*
+	 * Basically: int3_magic(&val); but really complicated :-)
+	 *
+	 * Stick the address of the INT3 instruction into int3_ip, then trigger
+	 * the INT3, padded with NOPs to match a CALL instruction length.
+	 */
+#ifdef CONFIG_X86_32
+	asm volatile ("call 1f; 1: pop (%%edx); add $5, (%%edx);"
+		      "int3; nop; nop; nop; nop" : : "d" (&int3_ip), "a" (&val) : "memory");
+#else /* CONFIG_X86_64 */
+	asm volatile ("call 1f; 1: pop (%%rdx); add $5, (%%rdx);"
+		      "int3; nop; nop; nop; nop" : : "d" (&int3_ip), "D" (&val) : "memory");
+#endif
+
+	BUG_ON(val != 1);
+
+	unregister_die_notifier(&int3_exception_nb);
+}
+
 void __init alternative_instructions(void)
 {
-	/* The patching is not fully atomic, so try to avoid local interruptions
-	   that might execute the to be patched code.
-	   Other CPUs are not running. */
+	int3_selftest();
+
+	/*
+	 * The patching is not fully atomic, so try to avoid local
+	 * interruptions that might execute the to be patched code.
+	 * Other CPUs are not running.
+	 */
 	stop_nmi();
 
 	/*
@@ -642,10 +749,11 @@ void __init alternative_instructions(void)
 					    _text, _etext);
 	}
 
-	if (!uniproc_patched || num_possible_cpus() == 1)
+	if (!uniproc_patched || num_possible_cpus() == 1) {
 		free_init_pages("SMP alternatives",
 				(unsigned long)__smp_locks,
 				(unsigned long)__smp_locks_end);
+	}
 #endif
 
 	apply_paravirt(__parainstructions, __parainstructions_end);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8b6d03e55d2f..e072cdd07284 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -572,6 +572,7 @@ NOKPROBE_SYMBOL(do_general_protection);
 
 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
+	printk("int3 frame: %lx\n", __builtin_frame_address(0));
 #ifdef CONFIG_DYNAMIC_FTRACE
 	/*
 	 * ftrace must be first, everything else may cause a recursive crash.

WARNING: multiple messages have this Message-ID (diff)
From: peterz at infradead.org (Peter Zijlstra)
Subject: [RFC][PATCH 1/2] x86: Allow breakpoints to emulate call functions
Date: Thu, 2 May 2019 18:21:33 +0200	[thread overview]
Message-ID: <20190502162133.GX2623@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20190501232412.1196ef18@oasis.local.home>

On Wed, May 01, 2019 at 11:24:12PM -0400, Steven Rostedt wrote:
> On Wed, 01 May 2019 16:28:31 -0400
> Steven Rostedt <rostedt at goodmis.org> wrote:
> 
> > diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> > index d309f30cf7af..50bbf4035baf 100644
> > --- a/arch/x86/entry/entry_32.S
> > +++ b/arch/x86/entry/entry_32.S
> > @@ -1478,6 +1478,17 @@ ENTRY(int3)
> >  	ASM_CLAC
> >  	pushl	$-1				# mark this as an int
> >  
> > +#ifdef CONFIG_VM86
> > +	testl	$X86_EFLAGS_VM, PT_EFLAGS(%esp)
> > +	jnz	.Lfrom_usermode_no_gap
> > +#endif
> > +	testl	$SEGMENT_RPL_MASK, PT_CS(%esp)
> > +	jnz	.Lfrom_usermode_no_gap
> > +	.rept 6
> > +	pushl	5*4(%esp)
> > +	.endr
> > +.Lfrom_usermode_no_gap:
> > +
> >  	SAVE_ALL switch_stacks=1
> >  	ENCODE_FRAME_POINTER
> >  	TRACE_IRQS_OFF
> 
> This failed to work on 32 bit at all (crashed and burned badly - triple
> fault!). 

Indeed so; find a working version below (albeit with a lot of debug
garbage still in).

It also includes the self-test code that Andy wanted -- it's what I used
to debug this mess.

Much thanks to Joerg Roedel for talking entry_32.S with me.

TL;DR, on x86_32 kernel->kernel IRET frames are only 3 entries and do
not include ESP/SS, so not only wasn't regs->sp setup, if you changed it
it wouldn't be effective and corrupt random stack state.

---
 arch/x86/entry/entry_32.S            |  87 +++++++++++++++++++++++---
 arch/x86/entry/entry_64.S            |  14 ++++-
 arch/x86/include/asm/text-patching.h |  20 ++++++
 arch/x86/kernel/alternative.c        | 116 +++++++++++++++++++++++++++++++++--
 arch/x86/kernel/traps.c              |   1 +
 5 files changed, 225 insertions(+), 13 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..01c5bdbe5f39 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -203,7 +203,7 @@
 .Lend_\@:
 .endm
 
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 clear_cs=1
 	cld
 	PUSH_GS
 	pushl	%fs
@@ -225,7 +225,7 @@
 
 	/* Switch to kernel stack if necessary */
 .if \switch_stacks > 0
-	SWITCH_TO_KERNEL_STACK
+	SWITCH_TO_KERNEL_STACK \clear_cs
 .endif
 
 .endm
@@ -377,8 +377,9 @@
 
 #define CS_FROM_ENTRY_STACK	(1 << 31)
 #define CS_FROM_USER_CR3	(1 << 30)
+#define CS_FROM_INT3		(1 << 29)
 
-.macro SWITCH_TO_KERNEL_STACK
+.macro SWITCH_TO_KERNEL_STACK clear_cs=1
 
 	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
 
@@ -391,12 +392,13 @@
 	 * that register for the time this macro runs
 	 */
 
+	.if \clear_cs
 	/*
-	 * The high bits of the CS dword (__csh) are used for
-	 * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
-	 * hardware didn't do this for us.
+	 * The high bits of the CS dword (__csh) are used for CS_FROM_*. Clear
+	 * them in case hardware didn't do this for us.
 	 */
 	andl	$(0x0000ffff), PT_CS(%esp)
+	.endif
 
 	/* Are we on the entry stack? Bail out if not! */
 	movl	PER_CPU_VAR(cpu_entry_area), %ecx
@@ -1019,6 +1021,29 @@ ENTRY(entry_INT80_32)
 	/* Restore user state */
 	RESTORE_REGS pop=4			# skip orig_eax/error_code
 .Lirq_return:
+	testl $CS_FROM_INT3, 4(%esp)
+	jz .Lno_iret_fixup
+
+	/*
+	 * Undo the magic from ENTRY(int3), in particular consider the case
+	 * where regs->sp has been modified.
+	 */
+
+	pushl	%eax
+	movl	%esp, %eax
+
+	movl	4*4(%eax), %esp		# restore (modified) regs->sp
+
+	/* rebuild IRET frame */
+	pushl	3*4(%eax)		# flags
+	pushl	2*4(%eax)		# cs
+	pushl	1*4(%eax)		# ip
+
+	andl	$0x0000ffff, 4(%esp)	# clear high CS bits
+
+	movl	(%eax), %eax		# restore eax
+
+.Lno_iret_fixup:
 	/*
 	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
 	 * when returning from IPI handler and when returning from
@@ -1477,9 +1502,57 @@ END(nmi)
 
 ENTRY(int3)
 	ASM_CLAC
+
+	/*
+	 * The high bits of the CS dword (__csh) are used for CS_FROM_*. Clear
+	 * them in case hardware didn't do this for us.
+	 */
+	andl	$0x0000ffff, 4(%esp)
+
+#ifdef CONFIG_VM86
+	testl	$X86_EFLAGS_VM, 8(%esp)
+	jnz	.Lfrom_usermode_no_gap
+#endif
+	testl	$SEGMENT_RPL_MASK, 4(%esp)
+	jnz	.Lfrom_usermode_no_gap
+
+	/*
+	 * Here from kernel mode; so the (exception) stack looks like:
+	 *
+	 * 12(esp) - <previous context>
+	 *  8(esp) - flags
+	 *  4(esp) - cs
+	 *  0(esp) - ip
+	 *
+	 * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+	 * is complete and in particular regs->sp is correct. This gives us
+	 * the original 3 enties as gap:
+	 *
+	 * 32(esp) - <previous context>
+	 * 28(esp) - orig_flags / gap
+	 * 24(esp) - orig_cs	/ gap
+	 * 20(esp) - orig_ip	/ gap
+	 * 16(esp) - ss
+	 * 12(esp) - sp
+	 *  8(esp) - flags
+	 *  4(esp) - cs
+	 *  0(esp) - ip
+	 */
+	pushl	%ss	  # ss
+	pushl	%esp      # sp (points at ss)
+	pushl	4*4(%esp) # flags
+	pushl	4*4(%esp) # cs
+	pushl	4*4(%esp) # ip
+
+	add	$16, 12(%esp) # point sp back at the previous context
+
+	orl	$CS_FROM_INT3, 4(%esp) # mark magic IRET
+
+.Lfrom_usermode_no_gap:
+
 	pushl	$-1				# mark this as an int
 
-	SAVE_ALL switch_stacks=1
+	SAVE_ALL switch_stacks=1 clear_cs=0
 	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	xorl	%edx, %edx			# zero error code
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 20e45d9b4e15..268cd9affe04 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -878,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
@@ -898,6 +898,16 @@ ENTRY(\sym)
 	jnz	.Lfrom_usermode_switch_stack_\@
 	.endif
 
+	.if \create_gap == 1
+	testb	$3, CS-ORIG_RAX(%rsp)
+	jnz	.Lfrom_usermode_no_gap_\@
+	.rept 6
+	pushq	5*8(%rsp)
+	.endr
+	UNWIND_HINT_IRET_REGS offset=8
+.Lfrom_usermode_no_gap_\@:
+	.endif
+
 	.if \paranoid
 	call	paranoid_entry
 	.else
@@ -1129,7 +1139,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
 #endif /* CONFIG_HYPERV */
 
 idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
-idtentry int3			do_int3			has_error_code=0
+idtentry int3			do_int3			has_error_code=0	create_gap=1
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
 #ifdef CONFIG_XEN_PV
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index c90678fd391a..6aac6abf931e 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -42,4 +42,24 @@ extern int after_bootmem;
 extern __ro_after_init struct mm_struct *poking_mm;
 extern __ro_after_init unsigned long poking_addr;
 
+static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
+{
+	regs->sp -= sizeof(unsigned long);
+	*(unsigned long *)regs->sp = val;
+}
+
+static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
+{
+	regs->ip = ip;
+}
+
+#define INT3_INSN_SIZE 1
+#define CALL_INSN_SIZE 5
+
+static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
+{
+	int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
+	int3_emulate_jmp(regs, func);
+}
+
 #endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4db9c0d29bc1..1e11076c3a2b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -613,11 +613,118 @@ extern struct paravirt_patch_site __start_parainstructions[],
 	__stop_parainstructions[];
 #endif	/* CONFIG_PARAVIRT */
 
+static __always_inline void print_stack(struct pt_regs *regs)
+{
+#if 1
+	unsigned long *end = (unsigned long *)current_stack_pointer;
+	unsigned long *frame = (unsigned long *)__builtin_frame_address(0);
+	unsigned long *stack = (unsigned long *)(current_stack_pointer & ~(THREAD_SIZE - 1));
+	int i, j;
+
+	stack += THREAD_SIZE / sizeof(unsigned long);
+
+	printk("stack dump from: %lx\n", stack);
+
+	for (i=0; ; i++) {
+		pr_info("stack[%03d]: ", 16*i);
+		for (j=0; j<16; j++) {
+			if (i==0 && j==0) {
+				pr_cont(" %08lx  ", 0UL);
+				stack--;
+				continue;
+			}
+			if (stack == end)
+				pr_cont(">%08lx< ", *(stack--));
+			else if (stack == frame)
+				pr_cont("*%08lx* ", *(stack--));
+			else if (stack == regs)
+				pr_cont("r%08lxr ", *(stack--));
+			else if (regs && stack == regs->sp)
+				pr_cont("s%08lxs ", *(stack--));
+			else
+				pr_cont(" %08lx  ", *(stack--));
+		}
+		pr_cont("\n");
+
+		if (stack < end)
+			break;
+	}
+#endif
+}
+
+static void __init int3_magic(unsigned int *ptr)
+{
+	printk("*************** %lx\n", (unsigned long)ptr);
+	print_stack(NULL);
+	*ptr = 1;
+}
+
+static __initdata unsigned long int3_ip;
+
+static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct die_args *args = data;
+	struct pt_regs *regs = args->regs;
+
+	if (!regs || user_mode(regs))
+		return NOTIFY_DONE;
+
+	if (val != DIE_INT3)
+		return NOTIFY_DONE;
+
+	printk("XXXXXXXXXXXXXXXXXXXXXXXXXX %lx %lx\n", regs->ip, int3_ip);
+	if (regs->ip - INT3_INSN_SIZE != int3_ip)
+		return NOTIFY_DONE;
+
+	print_stack(regs);
+	int3_emulate_call(regs, (unsigned long)&int3_magic);
+	print_stack(regs);
+
+	return NOTIFY_STOP;
+}
+
+static void __init int3_selftest(void)
+{
+	static __initdata struct notifier_block int3_exception_nb = {
+		.notifier_call	= int3_exception_notify,
+		.priority	= INT_MAX-1, /* last */
+	};
+	unsigned int val = 0;
+
+	BUG_ON(register_die_notifier(&int3_exception_nb));
+
+	printk("+++++++++++++++++++ %lx %lx\n", (unsigned long)&val, (unsigned long)&int3_ip);
+
+	print_stack(NULL);
+
+	/*
+	 * Basically: int3_magic(&val); but really complicated :-)
+	 *
+	 * Stick the address of the INT3 instruction into int3_ip, then trigger
+	 * the INT3, padded with NOPs to match a CALL instruction length.
+	 */
+#ifdef CONFIG_X86_32
+	asm volatile ("call 1f; 1: pop (%%edx); add $5, (%%edx);"
+		      "int3; nop; nop; nop; nop" : : "d" (&int3_ip), "a" (&val) : "memory");
+#else /* CONFIG_X86_64 */
+	asm volatile ("call 1f; 1: pop (%%rdx); add $5, (%%rdx);"
+		      "int3; nop; nop; nop; nop" : : "d" (&int3_ip), "D" (&val) : "memory");
+#endif
+
+	BUG_ON(val != 1);
+
+	unregister_die_notifier(&int3_exception_nb);
+}
+
 void __init alternative_instructions(void)
 {
-	/* The patching is not fully atomic, so try to avoid local interruptions
-	   that might execute the to be patched code.
-	   Other CPUs are not running. */
+	int3_selftest();
+
+	/*
+	 * The patching is not fully atomic, so try to avoid local
+	 * interruptions that might execute the to be patched code.
+	 * Other CPUs are not running.
+	 */
 	stop_nmi();
 
 	/*
@@ -642,10 +749,11 @@ void __init alternative_instructions(void)
 					    _text, _etext);
 	}
 
-	if (!uniproc_patched || num_possible_cpus() == 1)
+	if (!uniproc_patched || num_possible_cpus() == 1) {
 		free_init_pages("SMP alternatives",
 				(unsigned long)__smp_locks,
 				(unsigned long)__smp_locks_end);
+	}
 #endif
 
 	apply_paravirt(__parainstructions, __parainstructions_end);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8b6d03e55d2f..e072cdd07284 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -572,6 +572,7 @@ NOKPROBE_SYMBOL(do_general_protection);
 
 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
+	printk("int3 frame: %lx\n", __builtin_frame_address(0));
 #ifdef CONFIG_DYNAMIC_FTRACE
 	/*
 	 * ftrace must be first, everything else may cause a recursive crash.

WARNING: multiple messages have this Message-ID (diff)
From: peterz@infradead.org (Peter Zijlstra)
Subject: [RFC][PATCH 1/2] x86: Allow breakpoints to emulate call functions
Date: Thu, 2 May 2019 18:21:33 +0200	[thread overview]
Message-ID: <20190502162133.GX2623@hirez.programming.kicks-ass.net> (raw)
Message-ID: <20190502162133.yGi7gpsYG1vHeJT7Oli2VcgALUHHlrWHTOeI8wZIrjk@z> (raw)
In-Reply-To: <20190501232412.1196ef18@oasis.local.home>

On Wed, May 01, 2019@11:24:12PM -0400, Steven Rostedt wrote:
> On Wed, 01 May 2019 16:28:31 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> > index d309f30cf7af..50bbf4035baf 100644
> > --- a/arch/x86/entry/entry_32.S
> > +++ b/arch/x86/entry/entry_32.S
> > @@ -1478,6 +1478,17 @@ ENTRY(int3)
> >  	ASM_CLAC
> >  	pushl	$-1				# mark this as an int
> >  
> > +#ifdef CONFIG_VM86
> > +	testl	$X86_EFLAGS_VM, PT_EFLAGS(%esp)
> > +	jnz	.Lfrom_usermode_no_gap
> > +#endif
> > +	testl	$SEGMENT_RPL_MASK, PT_CS(%esp)
> > +	jnz	.Lfrom_usermode_no_gap
> > +	.rept 6
> > +	pushl	5*4(%esp)
> > +	.endr
> > +.Lfrom_usermode_no_gap:
> > +
> >  	SAVE_ALL switch_stacks=1
> >  	ENCODE_FRAME_POINTER
> >  	TRACE_IRQS_OFF
> 
> This failed to work on 32 bit at all (crashed and burned badly - triple
> fault!). 

Indeed so; find a working version below (albeit with a lot of debug
garbage still in).

It also includes the self-test code that Andy wanted -- it's what I used
to debug this mess.

Much thanks to Joerg Roedel for talking entry_32.S with me.

TL;DR, on x86_32 kernel->kernel IRET frames are only 3 entries and do
not include ESP/SS, so not only wasn't regs->sp setup, if you changed it
it wouldn't be effective and corrupt random stack state.

---
 arch/x86/entry/entry_32.S            |  87 +++++++++++++++++++++++---
 arch/x86/entry/entry_64.S            |  14 ++++-
 arch/x86/include/asm/text-patching.h |  20 ++++++
 arch/x86/kernel/alternative.c        | 116 +++++++++++++++++++++++++++++++++--
 arch/x86/kernel/traps.c              |   1 +
 5 files changed, 225 insertions(+), 13 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..01c5bdbe5f39 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -203,7 +203,7 @@
 .Lend_\@:
 .endm
 
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 clear_cs=1
 	cld
 	PUSH_GS
 	pushl	%fs
@@ -225,7 +225,7 @@
 
 	/* Switch to kernel stack if necessary */
 .if \switch_stacks > 0
-	SWITCH_TO_KERNEL_STACK
+	SWITCH_TO_KERNEL_STACK \clear_cs
 .endif
 
 .endm
@@ -377,8 +377,9 @@
 
 #define CS_FROM_ENTRY_STACK	(1 << 31)
 #define CS_FROM_USER_CR3	(1 << 30)
+#define CS_FROM_INT3		(1 << 29)
 
-.macro SWITCH_TO_KERNEL_STACK
+.macro SWITCH_TO_KERNEL_STACK clear_cs=1
 
 	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
 
@@ -391,12 +392,13 @@
 	 * that register for the time this macro runs
 	 */
 
+	.if \clear_cs
 	/*
-	 * The high bits of the CS dword (__csh) are used for
-	 * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
-	 * hardware didn't do this for us.
+	 * The high bits of the CS dword (__csh) are used for CS_FROM_*. Clear
+	 * them in case hardware didn't do this for us.
 	 */
 	andl	$(0x0000ffff), PT_CS(%esp)
+	.endif
 
 	/* Are we on the entry stack? Bail out if not! */
 	movl	PER_CPU_VAR(cpu_entry_area), %ecx
@@ -1019,6 +1021,29 @@ ENTRY(entry_INT80_32)
 	/* Restore user state */
 	RESTORE_REGS pop=4			# skip orig_eax/error_code
 .Lirq_return:
+	testl $CS_FROM_INT3, 4(%esp)
+	jz .Lno_iret_fixup
+
+	/*
+	 * Undo the magic from ENTRY(int3), in particular consider the case
+	 * where regs->sp has been modified.
+	 */
+
+	pushl	%eax
+	movl	%esp, %eax
+
+	movl	4*4(%eax), %esp		# restore (modified) regs->sp
+
+	/* rebuild IRET frame */
+	pushl	3*4(%eax)		# flags
+	pushl	2*4(%eax)		# cs
+	pushl	1*4(%eax)		# ip
+
+	andl	$0x0000ffff, 4(%esp)	# clear high CS bits
+
+	movl	(%eax), %eax		# restore eax
+
+.Lno_iret_fixup:
 	/*
 	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
 	 * when returning from IPI handler and when returning from
@@ -1477,9 +1502,57 @@ END(nmi)
 
 ENTRY(int3)
 	ASM_CLAC
+
+	/*
+	 * The high bits of the CS dword (__csh) are used for CS_FROM_*. Clear
+	 * them in case hardware didn't do this for us.
+	 */
+	andl	$0x0000ffff, 4(%esp)
+
+#ifdef CONFIG_VM86
+	testl	$X86_EFLAGS_VM, 8(%esp)
+	jnz	.Lfrom_usermode_no_gap
+#endif
+	testl	$SEGMENT_RPL_MASK, 4(%esp)
+	jnz	.Lfrom_usermode_no_gap
+
+	/*
+	 * Here from kernel mode; so the (exception) stack looks like:
+	 *
+	 * 12(esp) - <previous context>
+	 *  8(esp) - flags
+	 *  4(esp) - cs
+	 *  0(esp) - ip
+	 *
+	 * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+	 * is complete and in particular regs->sp is correct. This gives us
+	 * the original 3 enties as gap:
+	 *
+	 * 32(esp) - <previous context>
+	 * 28(esp) - orig_flags / gap
+	 * 24(esp) - orig_cs	/ gap
+	 * 20(esp) - orig_ip	/ gap
+	 * 16(esp) - ss
+	 * 12(esp) - sp
+	 *  8(esp) - flags
+	 *  4(esp) - cs
+	 *  0(esp) - ip
+	 */
+	pushl	%ss	  # ss
+	pushl	%esp      # sp (points at ss)
+	pushl	4*4(%esp) # flags
+	pushl	4*4(%esp) # cs
+	pushl	4*4(%esp) # ip
+
+	add	$16, 12(%esp) # point sp back at the previous context
+
+	orl	$CS_FROM_INT3, 4(%esp) # mark magic IRET
+
+.Lfrom_usermode_no_gap:
+
 	pushl	$-1				# mark this as an int
 
-	SAVE_ALL switch_stacks=1
+	SAVE_ALL switch_stacks=1 clear_cs=0
 	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	xorl	%edx, %edx			# zero error code
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 20e45d9b4e15..268cd9affe04 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -878,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
@@ -898,6 +898,16 @@ ENTRY(\sym)
 	jnz	.Lfrom_usermode_switch_stack_\@
 	.endif
 
+	.if \create_gap == 1
+	testb	$3, CS-ORIG_RAX(%rsp)
+	jnz	.Lfrom_usermode_no_gap_\@
+	.rept 6
+	pushq	5*8(%rsp)
+	.endr
+	UNWIND_HINT_IRET_REGS offset=8
+.Lfrom_usermode_no_gap_\@:
+	.endif
+
 	.if \paranoid
 	call	paranoid_entry
 	.else
@@ -1129,7 +1139,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
 #endif /* CONFIG_HYPERV */
 
 idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
-idtentry int3			do_int3			has_error_code=0
+idtentry int3			do_int3			has_error_code=0	create_gap=1
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
 #ifdef CONFIG_XEN_PV
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index c90678fd391a..6aac6abf931e 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -42,4 +42,24 @@ extern int after_bootmem;
 extern __ro_after_init struct mm_struct *poking_mm;
 extern __ro_after_init unsigned long poking_addr;
 
+static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
+{
+	regs->sp -= sizeof(unsigned long);
+	*(unsigned long *)regs->sp = val;
+}
+
+static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
+{
+	regs->ip = ip;
+}
+
+#define INT3_INSN_SIZE 1
+#define CALL_INSN_SIZE 5
+
+static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
+{
+	int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
+	int3_emulate_jmp(regs, func);
+}
+
 #endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4db9c0d29bc1..1e11076c3a2b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -613,11 +613,118 @@ extern struct paravirt_patch_site __start_parainstructions[],
 	__stop_parainstructions[];
 #endif	/* CONFIG_PARAVIRT */
 
+static __always_inline void print_stack(struct pt_regs *regs)
+{
+#if 1
+	unsigned long *end = (unsigned long *)current_stack_pointer;
+	unsigned long *frame = (unsigned long *)__builtin_frame_address(0);
+	unsigned long *stack = (unsigned long *)(current_stack_pointer & ~(THREAD_SIZE - 1));
+	int i, j;
+
+	stack += THREAD_SIZE / sizeof(unsigned long);
+
+	printk("stack dump from: %lx\n", stack);
+
+	for (i=0; ; i++) {
+		pr_info("stack[%03d]: ", 16*i);
+		for (j=0; j<16; j++) {
+			if (i==0 && j==0) {
+				pr_cont(" %08lx  ", 0UL);
+				stack--;
+				continue;
+			}
+			if (stack == end)
+				pr_cont(">%08lx< ", *(stack--));
+			else if (stack == frame)
+				pr_cont("*%08lx* ", *(stack--));
+			else if (stack == regs)
+				pr_cont("r%08lxr ", *(stack--));
+			else if (regs && stack == regs->sp)
+				pr_cont("s%08lxs ", *(stack--));
+			else
+				pr_cont(" %08lx  ", *(stack--));
+		}
+		pr_cont("\n");
+
+		if (stack < end)
+			break;
+	}
+#endif
+}
+
+static void __init int3_magic(unsigned int *ptr)
+{
+	printk("*************** %lx\n", (unsigned long)ptr);
+	print_stack(NULL);
+	*ptr = 1;
+}
+
+static __initdata unsigned long int3_ip;
+
+static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct die_args *args = data;
+	struct pt_regs *regs = args->regs;
+
+	if (!regs || user_mode(regs))
+		return NOTIFY_DONE;
+
+	if (val != DIE_INT3)
+		return NOTIFY_DONE;
+
+	printk("XXXXXXXXXXXXXXXXXXXXXXXXXX %lx %lx\n", regs->ip, int3_ip);
+	if (regs->ip - INT3_INSN_SIZE != int3_ip)
+		return NOTIFY_DONE;
+
+	print_stack(regs);
+	int3_emulate_call(regs, (unsigned long)&int3_magic);
+	print_stack(regs);
+
+	return NOTIFY_STOP;
+}
+
+static void __init int3_selftest(void)
+{
+	static __initdata struct notifier_block int3_exception_nb = {
+		.notifier_call	= int3_exception_notify,
+		.priority	= INT_MAX-1, /* last */
+	};
+	unsigned int val = 0;
+
+	BUG_ON(register_die_notifier(&int3_exception_nb));
+
+	printk("+++++++++++++++++++ %lx %lx\n", (unsigned long)&val, (unsigned long)&int3_ip);
+
+	print_stack(NULL);
+
+	/*
+	 * Basically: int3_magic(&val); but really complicated :-)
+	 *
+	 * Stick the address of the INT3 instruction into int3_ip, then trigger
+	 * the INT3, padded with NOPs to match a CALL instruction length.
+	 */
+#ifdef CONFIG_X86_32
+	asm volatile ("call 1f; 1: pop (%%edx); add $5, (%%edx);"
+		      "int3; nop; nop; nop; nop" : : "d" (&int3_ip), "a" (&val) : "memory");
+#else /* CONFIG_X86_64 */
+	asm volatile ("call 1f; 1: pop (%%rdx); add $5, (%%rdx);"
+		      "int3; nop; nop; nop; nop" : : "d" (&int3_ip), "D" (&val) : "memory");
+#endif
+
+	BUG_ON(val != 1);
+
+	unregister_die_notifier(&int3_exception_nb);
+}
+
 void __init alternative_instructions(void)
 {
-	/* The patching is not fully atomic, so try to avoid local interruptions
-	   that might execute the to be patched code.
-	   Other CPUs are not running. */
+	int3_selftest();
+
+	/*
+	 * The patching is not fully atomic, so try to avoid local
+	 * interruptions that might execute the to be patched code.
+	 * Other CPUs are not running.
+	 */
 	stop_nmi();
 
 	/*
@@ -642,10 +749,11 @@ void __init alternative_instructions(void)
 					    _text, _etext);
 	}
 
-	if (!uniproc_patched || num_possible_cpus() == 1)
+	if (!uniproc_patched || num_possible_cpus() == 1) {
 		free_init_pages("SMP alternatives",
 				(unsigned long)__smp_locks,
 				(unsigned long)__smp_locks_end);
+	}
 #endif
 
 	apply_paravirt(__parainstructions, __parainstructions_end);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8b6d03e55d2f..e072cdd07284 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -572,6 +572,7 @@ NOKPROBE_SYMBOL(do_general_protection);
 
 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
+	printk("int3 frame: %lx\n", __builtin_frame_address(0));
 #ifdef CONFIG_DYNAMIC_FTRACE
 	/*
 	 * ftrace must be first, everything else may cause a recursive crash.

  reply	other threads:[~2019-05-02 16:22 UTC|newest]

Thread overview: 307+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-01 20:28 [RFC][PATCH 0/2] ftrace/x86: Allow for breakpoint handlers to emulate call functions Steven Rostedt
2019-05-01 20:28 ` [RFC][PATCH 1/2] x86: Allow breakpoints " Steven Rostedt
2019-05-01 20:28   ` Steven Rostedt
2019-05-01 20:28   ` rostedt
2019-05-02  3:24   ` Steven Rostedt
2019-05-02  3:24     ` Steven Rostedt
2019-05-02  3:24     ` rostedt
2019-05-02 16:21     ` Peter Zijlstra [this message]
2019-05-02 16:21       ` Peter Zijlstra
2019-05-02 16:21       ` peterz
2019-05-02 16:29       ` Peter Zijlstra
2019-05-02 16:29         ` Peter Zijlstra
2019-05-02 16:29         ` peterz
2019-05-02 18:02       ` Linus Torvalds
2019-05-02 18:02         ` Linus Torvalds
2019-05-02 18:02         ` torvalds
2019-05-02 18:18         ` Peter Zijlstra
2019-05-02 18:18           ` Peter Zijlstra
2019-05-02 18:18           ` peterz
2019-05-02 18:30           ` Peter Zijlstra
2019-05-02 18:30             ` Peter Zijlstra
2019-05-02 18:30             ` peterz
2019-05-02 18:43           ` Linus Torvalds
2019-05-02 18:43             ` Linus Torvalds
2019-05-02 18:43             ` torvalds
2019-05-02 19:28             ` Jiri Kosina
2019-05-02 19:28               ` Jiri Kosina
2019-05-02 19:28               ` jikos
2019-05-02 20:25               ` Andy Lutomirski
2019-05-02 20:25                 ` Andy Lutomirski
2019-05-02 20:25                 ` luto
2019-05-02 20:21             ` Peter Zijlstra
2019-05-02 20:21               ` Peter Zijlstra
2019-05-02 20:21               ` peterz
2019-05-02 20:49               ` Linus Torvalds
2019-05-02 20:49                 ` Linus Torvalds
2019-05-02 20:49                 ` torvalds
2019-05-02 21:32                 ` Peter Zijlstra
2019-05-02 21:32                   ` Peter Zijlstra
2019-05-02 21:32                   ` peterz
2019-05-03 19:24                 ` Steven Rostedt
2019-05-03 19:24                   ` Steven Rostedt
2019-05-03 19:24                   ` rostedt
2019-05-03 21:46                   ` Linus Torvalds
2019-05-03 21:46                     ` Linus Torvalds
2019-05-03 21:46                     ` torvalds
2019-05-03 22:49                     ` Steven Rostedt
2019-05-03 22:49                       ` Steven Rostedt
2019-05-03 22:49                       ` rostedt
2019-05-03 23:07                       ` Linus Torvalds
2019-05-03 23:07                         ` Linus Torvalds
2019-05-03 23:07                         ` torvalds
2019-05-04  4:17                         ` Steven Rostedt
2019-05-04  4:17                           ` Steven Rostedt
2019-05-04  4:17                           ` rostedt
     [not found]                           ` <CAHk-=wiuSFbv_rELND-BLWcP0GSZ0yF=xOAEcf61GE3bU9d=yg@mail.gmail.com>
2019-05-04 18:59                             ` Linus Torvalds
2019-05-04 18:59                               ` Linus Torvalds
2019-05-04 18:59                               ` torvalds
2019-05-04 20:12                               ` Andy Lutomirski
2019-05-04 20:12                                 ` Andy Lutomirski
2019-05-04 20:12                                 ` luto
2019-05-04 20:28                                 ` Linus Torvalds
2019-05-04 20:28                                   ` Linus Torvalds
2019-05-04 20:28                                   ` torvalds
2019-05-04 20:36                                 ` Linus Torvalds
2019-05-04 20:36                                   ` Linus Torvalds
2019-05-04 20:36                                   ` torvalds
2019-05-03 22:55                     ` Andy Lutomirski
2019-05-03 22:55                       ` Andy Lutomirski
2019-05-03 22:55                       ` luto
2019-05-03 23:16                       ` Linus Torvalds
2019-05-03 23:16                         ` Linus Torvalds
2019-05-03 23:16                         ` torvalds
2019-05-03 23:32                         ` Andy Lutomirski
2019-05-03 23:32                           ` Andy Lutomirski
2019-05-03 23:32                           ` luto
2019-05-02 22:52               ` Steven Rostedt
2019-05-02 22:52                 ` Steven Rostedt
2019-05-02 22:52                 ` rostedt
2019-05-02 23:31                 ` Steven Rostedt
2019-05-02 23:31                   ` Steven Rostedt
2019-05-02 23:31                   ` rostedt
2019-05-02 23:50                   ` Steven Rostedt
2019-05-02 23:50                     ` Steven Rostedt
2019-05-02 23:50                     ` rostedt
2019-05-03  1:51                     ` [RFC][PATCH 1/2 v2] " Steven Rostedt
2019-05-03  1:51                       ` Steven Rostedt
2019-05-03  1:51                       ` rostedt
2019-05-03  9:29                     ` [RFC][PATCH 1/2] " Peter Zijlstra
2019-05-03  9:29                       ` Peter Zijlstra
2019-05-03  9:29                       ` peterz
2019-05-03 13:22                       ` Steven Rostedt
2019-05-03 13:22                         ` Steven Rostedt
2019-05-03 13:22                         ` rostedt
2019-05-03 16:20                         ` Andy Lutomirski
2019-05-03 16:20                           ` Andy Lutomirski
2019-05-03 16:20                           ` luto
2019-05-03 16:31                           ` Steven Rostedt
2019-05-03 16:31                             ` Steven Rostedt
2019-05-03 16:31                             ` rostedt
2019-05-03 16:35                             ` Peter Zijlstra
2019-05-03 16:35                               ` Peter Zijlstra
2019-05-03 16:35                               ` peterz
2019-05-03 16:44                               ` Andy Lutomirski
2019-05-03 16:44                                 ` Andy Lutomirski
2019-05-03 16:44                                 ` luto
2019-05-03 16:49                                 ` Steven Rostedt
2019-05-03 16:49                                   ` Steven Rostedt
2019-05-03 16:49                                   ` rostedt
2019-05-03 16:32                           ` Peter Zijlstra
2019-05-03 16:32                             ` Peter Zijlstra
2019-05-03 16:32                             ` peterz
2019-05-03 18:57                           ` Linus Torvalds
2019-05-03 18:57                             ` Linus Torvalds
2019-05-03 18:57                             ` torvalds
2019-05-06  8:19                             ` Peter Zijlstra
2019-05-06  8:19                               ` Peter Zijlstra
2019-05-06  8:19                               ` peterz
2019-05-06 13:56                               ` Steven Rostedt
2019-05-06 13:56                                 ` Steven Rostedt
2019-05-06 13:56                                 ` rostedt
2019-05-06 16:17                                 ` Linus Torvalds
2019-05-06 16:17                                   ` Linus Torvalds
2019-05-06 16:17                                   ` torvalds
2019-05-06 16:19                                   ` Linus Torvalds
2019-05-06 16:19                                     ` Linus Torvalds
2019-05-06 16:19                                     ` torvalds
2019-05-06 17:06                                   ` Steven Rostedt
2019-05-06 17:06                                     ` Steven Rostedt
2019-05-06 17:06                                     ` rostedt
2019-05-06 18:06                                     ` Linus Torvalds
2019-05-06 18:06                                       ` Linus Torvalds
2019-05-06 18:06                                       ` torvalds
2019-05-06 18:57                                       ` Steven Rostedt
2019-05-06 18:57                                         ` Steven Rostedt
2019-05-06 18:57                                         ` rostedt
2019-05-06 19:46                                         ` Linus Torvalds
2019-05-06 19:46                                           ` Linus Torvalds
2019-05-06 19:46                                           ` torvalds
2019-05-06 20:29                                           ` Steven Rostedt
2019-05-06 20:29                                             ` Steven Rostedt
2019-05-06 20:29                                             ` rostedt
2019-05-06 20:42                                             ` Linus Torvalds
2019-05-06 20:42                                               ` Linus Torvalds
2019-05-06 20:42                                               ` torvalds
2019-05-06 20:44                                               ` Linus Torvalds
2019-05-06 20:44                                                 ` Linus Torvalds
2019-05-06 20:44                                                 ` torvalds
2019-05-06 21:45                                               ` Steven Rostedt
2019-05-06 21:45                                                 ` Steven Rostedt
2019-05-06 21:45                                                 ` rostedt
2019-05-06 22:06                                                 ` Linus Torvalds
2019-05-06 22:06                                                   ` Linus Torvalds
2019-05-06 22:06                                                   ` torvalds
2019-05-06 22:31                                                   ` Linus Torvalds
2019-05-06 22:31                                                     ` Linus Torvalds
2019-05-06 22:31                                                     ` torvalds
2019-05-07  0:10                                                     ` Steven Rostedt
2019-05-07  0:10                                                       ` Steven Rostedt
2019-05-07  0:10                                                       ` rostedt
2019-05-07  1:06                                                       ` Linus Torvalds
2019-05-07  1:06                                                         ` Linus Torvalds
2019-05-07  1:06                                                         ` torvalds
2019-05-07  1:04                                                   ` Steven Rostedt
2019-05-07  1:04                                                     ` Steven Rostedt
2019-05-07  1:04                                                     ` rostedt
2019-05-07  1:34                                                     ` Steven Rostedt
2019-05-07  1:34                                                       ` Steven Rostedt
2019-05-07  1:34                                                       ` rostedt
2019-05-07  1:34                                                     ` Linus Torvalds
2019-05-07  1:34                                                       ` Linus Torvalds
2019-05-07  1:34                                                       ` torvalds
2019-05-07  1:53                                                       ` Steven Rostedt
2019-05-07  1:53                                                         ` Steven Rostedt
2019-05-07  1:53                                                         ` rostedt
2019-05-07  2:22                                                         ` Linus Torvalds
2019-05-07  2:22                                                           ` Linus Torvalds
2019-05-07  2:22                                                           ` torvalds
2019-05-07  2:58                                                           ` Steven Rostedt
2019-05-07  2:58                                                             ` Steven Rostedt
2019-05-07  2:58                                                             ` rostedt
2019-05-07  3:05                                                             ` Linus Torvalds
2019-05-07  3:05                                                               ` Linus Torvalds
2019-05-07  3:05                                                               ` torvalds
2019-05-07  3:21                                                               ` Steven Rostedt
2019-05-07  3:21                                                                 ` Steven Rostedt
2019-05-07  3:21                                                                 ` rostedt
2019-05-07  3:28                                                                 ` Linus Torvalds
2019-05-07  3:28                                                                   ` Linus Torvalds
2019-05-07  3:28                                                                   ` torvalds
2019-05-07 14:54                                                                   ` Linus Torvalds
2019-05-07 14:54                                                                     ` Linus Torvalds
2019-05-07 14:54                                                                     ` torvalds
2019-05-07 15:12                                                                     ` Steven Rostedt
2019-05-07 15:12                                                                       ` Steven Rostedt
2019-05-07 15:12                                                                       ` rostedt
2019-05-07 15:25                                                                       ` Steven Rostedt
2019-05-07 15:25                                                                         ` Steven Rostedt
2019-05-07 15:25                                                                         ` rostedt
2019-05-07 16:25                                                                         ` Steven Rostedt
2019-05-07 16:25                                                                           ` Steven Rostedt
2019-05-07 16:25                                                                           ` rostedt
2019-05-07 15:31                                                                       ` Linus Torvalds
2019-05-07 15:31                                                                         ` Linus Torvalds
2019-05-07 15:31                                                                         ` torvalds
2019-05-07 15:45                                                                         ` Steven Rostedt
2019-05-07 15:45                                                                           ` Steven Rostedt
2019-05-07 15:45                                                                           ` rostedt
2019-05-07 16:34                                                                         ` Peter Zijlstra
2019-05-07 16:34                                                                           ` Peter Zijlstra
2019-05-07 16:34                                                                           ` peterz
2019-05-07 17:08                                                                           ` Linus Torvalds
2019-05-07 17:08                                                                             ` Linus Torvalds
2019-05-07 17:08                                                                             ` torvalds
2019-05-07 17:21                                                                             ` Josh Poimboeuf
2019-05-07 17:21                                                                               ` Josh Poimboeuf
2019-05-07 17:21                                                                               ` jpoimboe
2019-05-07 21:24                                                                               ` Steven Rostedt
2019-05-07 21:24                                                                                 ` Steven Rostedt
2019-05-07 21:24                                                                                 ` rostedt
2019-05-08  4:50                                                                                 ` Linus Torvalds
2019-05-08  4:50                                                                                   ` Linus Torvalds
2019-05-08  4:50                                                                                   ` torvalds
2019-05-08 16:37                                                                                   ` Steven Rostedt
2019-05-08 16:37                                                                                     ` Steven Rostedt
2019-05-08 16:37                                                                                     ` rostedt
2019-05-07 17:38                                                                             ` Peter Zijlstra
2019-05-07 17:38                                                                               ` Peter Zijlstra
2019-05-07 17:38                                                                               ` peterz
2019-05-07  9:51                                                           ` Peter Zijlstra
2019-05-07  9:51                                                             ` Peter Zijlstra
2019-05-07  9:51                                                             ` peterz
2019-05-07 14:48                                                           ` Andy Lutomirski
2019-05-07 14:48                                                             ` Andy Lutomirski
2019-05-07 14:48                                                             ` luto
2019-05-07 14:57                                                             ` Linus Torvalds
2019-05-07 14:57                                                               ` Linus Torvalds
2019-05-07 14:57                                                               ` torvalds
2019-05-07 14:13                                                 ` Masami Hiramatsu
2019-05-07 14:13                                                   ` Masami Hiramatsu
2019-05-07 14:13                                                   ` mhiramat
2019-05-07 17:15                                                   ` Masami Hiramatsu
2019-05-07 17:15                                                     ` Masami Hiramatsu
2019-05-07 17:15                                                     ` mhiramat
2019-05-06 14:22                               ` Peter Zijlstra
2019-05-06 14:22                                 ` Peter Zijlstra
2019-05-06 14:22                                 ` peterz
2019-05-07  8:57                               ` Peter Zijlstra
2019-05-07  8:57                                 ` Peter Zijlstra
2019-05-07  8:57                                 ` peterz
2019-05-07  9:18                                 ` David Laight
2019-05-07  9:18                                   ` David Laight
2019-05-07  9:18                                   ` David.Laight
2019-05-07 11:30                                   ` Peter Zijlstra
2019-05-07 11:30                                     ` Peter Zijlstra
2019-05-07 11:30                                     ` peterz
2019-05-07 12:57                                     ` David Laight
2019-05-07 12:57                                       ` David Laight
2019-05-07 12:57                                       ` David.Laight
2019-05-07 13:14                                       ` Steven Rostedt
2019-05-07 13:14                                         ` Steven Rostedt
2019-05-07 13:14                                         ` rostedt
2019-05-07 14:50                                         ` David Laight
2019-05-07 14:50                                           ` David Laight
2019-05-07 14:50                                           ` David.Laight
2019-05-07 14:57                                           ` Steven Rostedt
2019-05-07 14:57                                             ` Steven Rostedt
2019-05-07 14:57                                             ` rostedt
2019-05-07 15:46                                             ` David Laight
2019-05-07 15:46                                               ` David Laight
2019-05-07 15:46                                               ` David.Laight
2019-05-07 13:32                                       ` Peter Zijlstra
2019-05-07 13:32                                         ` Peter Zijlstra
2019-05-07 13:32                                         ` peterz
2019-05-07  9:27                                 ` Peter Zijlstra
2019-05-07  9:27                                   ` Peter Zijlstra
2019-05-07  9:27                                   ` peterz
2019-05-07 12:27                                   ` Steven Rostedt
2019-05-07 12:27                                     ` Steven Rostedt
2019-05-07 12:27                                     ` rostedt
2019-05-07 12:41                                     ` Peter Zijlstra
2019-05-07 12:41                                       ` Peter Zijlstra
2019-05-07 12:41                                       ` peterz
2019-05-07 12:54                                       ` Steven Rostedt
2019-05-07 12:54                                         ` Steven Rostedt
2019-05-07 12:54                                         ` rostedt
2019-05-07 17:22                                         ` Masami Hiramatsu
2019-05-07 17:22                                           ` Masami Hiramatsu
2019-05-07 17:22                                           ` masami.hiramatsu
2019-05-07 14:28                                 ` Peter Zijlstra
2019-05-07 14:28                                   ` Peter Zijlstra
2019-05-07 14:28                                   ` peterz
2019-05-02 20:48         ` Steven Rostedt
2019-05-02 20:48           ` Steven Rostedt
2019-05-02 20:48           ` rostedt
2019-05-06 15:14         ` Josh Poimboeuf
2019-05-06 15:14           ` Josh Poimboeuf
2019-05-06 15:14           ` jpoimboe
2019-05-01 20:28 ` [RFC][PATCH 2/2] ftrace/x86: Emulate call function while updating in breakpoint handler Steven Rostedt
2019-05-01 20:28   ` Steven Rostedt
2019-05-01 20:28   ` rostedt
2019-05-03 10:22 ` [RFC][PATCH 1.5/2] x86: Add int3_emulate_call() selftest Peter Zijlstra
2019-05-03 10:22   ` Peter Zijlstra
2019-05-03 10:22   ` peterz
2019-05-03 18:46   ` Steven Rostedt
2019-05-03 18:46     ` Steven Rostedt
2019-05-03 18:46     ` rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190502162133.GX2623@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=akpm@linux-foundation.org \
    --cc=bigeasy@linutronix.de \
    --cc=bp@alien8.de \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=jikos@kernel.org \
    --cc=joe.lawrence@redhat.com \
    --cc=jpoimboe@redhat.com \
    --cc=jroedel@suse.de \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mbenes@suse.cz \
    --cc=mingo@kernel.org \
    --cc=mingo@redhat.com \
    --cc=nayna@linux.ibm.com \
    --cc=ndesaulniers@google.com \
    --cc=nstange@suse.de \
    --cc=pmladek@suse.com \
    --cc=rostedt@goodmis.org \
    --cc=shuah@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tim.c.chen@linux.intel.com \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    --cc=yamada.masahiro@socionext.com \
    --cc=zohar@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.