arch/x86/entry/entry_32.S | 7 +++- arch/x86/entry/entry_64.S | 14 ++++++-- arch/x86/include/asm/ftrace.h | 2 +- arch/x86/include/asm/text-patching.h | 66 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/ftrace.c | 29 +++++++++++++--- arch/x86/kernel/traps.c | 15 +++++--- 6 files changed, 120 insertions(+), 13 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index d309f30cf7af..138ac432221b 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1483,7 +1483,12 @@ ENTRY(int3) TRACE_IRQS_OFF xorl %edx, %edx # zero error code movl %esp, %eax # pt_regs pointer - call do_int3 + + # make room on kernel stack for push emulation + # do_kernel_int3 returns possibly updated pt_regs + subl $8, %esp + call do_kernel_int3 + movl %eax, %esp jmp ret_from_exception END(int3) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1f0efdb7b629..834ec1397dab 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -899,6 +899,16 @@ ENTRY(\sym) jnz .Lfrom_usermode_switch_stack_\@ .endif + .if \create_gap == 1 + testb $3, CS-ORIG_RAX(%rsp) + jnz .Lfrom_usermode_no_gap_\@ + .rept 6 + pushq 5*8(%rsp) + .endr + UNWIND_HINT_IRET_REGS offset=8 +.Lfrom_usermode_no_gap_\@: + .endif + .if \paranoid call paranoid_entry .else @@ -1130,7 +1140,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 +idtentry int3 do_int3 has_error_code=0 create_gap=1 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN_PV diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index cf350639e76d..4b335ac5afcc 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -37,7 +37,7 @@ struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; -int ftrace_int3_handler(struct pt_regs *regs); +int ftrace_int3_handler(struct pt_regs **regs); #define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index e85ff65c43c3..c91fdecab9c2 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -39,4 +39,70 @@ extern int poke_int3_handler(struct pt_regs *regs); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); extern int after_bootmem; +static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + +#define INT3_INSN_SIZE 1 +#define CALL_INSN_SIZE 5 + +/* + * The 32-bit kernel entry exception stack does not contain the whole + * 'struct pt_regs', because the hardware will not push sp/ss when the + * protection level doesn't change. + * + * So pushing a value onto the stack requires us to make room for it by + * moving the whole truncated 'pt_regs' down by four bytes, and the + * stack we return to will be directly after it. The lowlevel x86-32 + * entry code will have made room on the stack for this, so we're not + * overwriting anything else there. + * + * On x86-64, the exception stack is much simpler, and always contains + * sp/ss, so we can just update the values directly. Again, the entry + * code has made sure there is a gap (now above pt_regs!) that this is ok. + */ +static inline struct pt_regs *int3_emulate_push(struct pt_regs *regs, unsigned long value) +{ +#ifdef CONFIG_X86_32 +#define SAVED_KERNEL_REGS_SIZE (offsetof(struct pt_regs, sp)) + struct pt_regs *new = (void *)regs - sizeof(long); + + /* Inlined "memmove()" of the pt_regs */ + unsigned long *p = (unsigned long *) new; + int i = SAVED_KERNEL_REGS_SIZE / sizeof(unsigned long); + do { *p = p[1]; p++; } while (--i); + + /* + * NOTE! '&new->sp' is actually the top of stack that that 'int3' + * will return to! So despite what this looks like, this doesn't + * update the stack pointer, but writing the value to 'new->sp' + * is actually writing the value *to* the top of the stack in + * the returning context! + * + * The thing that updates the stack pointer in the context we + * return to is the fact that we moved 'struct pt_regs' itself, + * and will be returning using that moved stack frame. + */ + new->sp = value; + return new; +#else + unsigned long *sp; + + regs->sp -= sizeof(long); + sp = (unsigned long *)regs->sp; + *sp = value; + return regs; +#endif +} + +static inline struct pt_regs *int3_emulate_call(struct pt_regs *regs, unsigned long func) +{ + unsigned long next_ip = regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE; + + regs = int3_emulate_push(regs, next_ip); + regs->ip = func; + return regs; +} + #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ef49517f6bb2..6c239fca99d0 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_DYNAMIC_FTRACE @@ -231,6 +232,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } static unsigned long ftrace_update_func; +static unsigned long ftrace_update_func_call; static int update_ftrace_func(unsigned long ip, void *new) { @@ -259,6 +261,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func) unsigned char *new; int ret; + ftrace_update_func_call = (unsigned long)func; + new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -287,20 +291,32 @@ static nokprobe_inline int is_ftrace_caller(unsigned long ip) * call to a nop. While the change is taking place, we treat * it just like it was a nop. */ -int ftrace_int3_handler(struct pt_regs *regs) +int ftrace_int3_handler(struct pt_regs **pregs) { + struct pt_regs *regs = *pregs; unsigned long ip; if (WARN_ON_ONCE(!regs)) return 0; - ip = regs->ip - 1; - if (!ftrace_location(ip) && !is_ftrace_caller(ip)) + if (user_mode(regs)) return 0; - regs->ip += MCOUNT_INSN_SIZE - 1; + ip = regs->ip - INT3_INSN_SIZE; - return 1; + if (ftrace_location(ip)) { + *pregs = int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); + return 1; + } else if (is_ftrace_caller(ip)) { + if (!ftrace_update_func_call) { + int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); + return 1; + } + *pregs = int3_emulate_call(regs, ftrace_update_func_call); + return 1; + } + + return 0; } NOKPROBE_SYMBOL(ftrace_int3_handler); @@ -859,6 +875,8 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) func = ftrace_ops_get_func(ops); + ftrace_update_func_call = (unsigned long)func; + /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); @@ -960,6 +978,7 @@ static int ftrace_mod_jmp(unsigned long ip, void *func) { unsigned char *new; + ftrace_update_func_call = 0UL; new = ftrace_jmp_replace(ip, (unsigned long)func); return update_ftrace_func(ip, new); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d26f9e9c3d83..e811e55ab030 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -570,7 +570,7 @@ do_general_protection(struct pt_regs *regs, long error_code) } NOKPROBE_SYMBOL(do_general_protection); -dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) +dotraplinkage struct pt_regs * notrace do_kernel_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE /* @@ -578,11 +578,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) * See note by declaration of modifying_ftrace_code in ftrace.c */ if (unlikely(atomic_read(&modifying_ftrace_code)) && - ftrace_int3_handler(regs)) - return; + ftrace_int3_handler(®s)) + return regs; #endif if (poke_int3_handler(regs)) - return; + return regs; /* * Use ist_enter despite the fact that we don't use an IST stack. @@ -614,6 +614,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) exit: ist_exit(regs); + return regs; +} +NOKPROBE_SYMBOL(do_kernel_int3); + +dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) +{ + WARN_ON_ONCE(do_kernel_int3(regs, error_code) != regs); } NOKPROBE_SYMBOL(do_int3);