diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d6e1faa28c58..6adaf18b3365 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -158,6 +158,7 @@ config X86 select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_DYNAMIC_FTRACE_WITH_IPMODIFY select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EISA diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 024c3053dbba..009b60426926 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -283,6 +283,12 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = update_ftrace_func(ip, new); } + if (!ret) { + ip = (unsigned long)(&ftrace_ip_call); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = update_ftrace_func(ip, new); + } + return ret; } @@ -715,9 +721,12 @@ static inline void tramp_free(void *tramp) { } /* Defined as markers to the end of the ftrace default trampolines */ extern void ftrace_regs_caller_end(void); +extern void ftrace_ip_caller_end(void); extern void ftrace_epilogue(void); extern void ftrace_caller_op_ptr(void); extern void ftrace_regs_caller_op_ptr(void); +extern void ftrace_ip_caller_op_ptr(void); +extern void ftrace_caller_op_ptr(void); /* movq function_trace_op(%rip), %rdx */ /* 0x48 0x8b 0x15 */ @@ -763,6 +772,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) start_offset = (unsigned long)ftrace_regs_caller; end_offset = (unsigned long)ftrace_regs_caller_end; op_offset = (unsigned long)ftrace_regs_caller_op_ptr; + } else if (ops->flags & FTRACE_OPS_FL_IPMODIFY) { + start_offset = (unsigned long)ftrace_ip_caller; + end_offset = (unsigned long)ftrace_ip_caller_end; + op_offset = (unsigned long)ftrace_ip_caller_op_ptr; } else { start_offset = (unsigned long)ftrace_caller; end_offset = (unsigned long)ftrace_epilogue; @@ -771,6 +784,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) size = end_offset - start_offset; + printk("create tramploine to %pS\n", (void *)start_offset); /* * Allocate enough size to store the ftrace_caller code, * the iret , as well as the address of the ftrace_ops this @@ -840,14 +854,17 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) return 0; } -static unsigned long calc_trampoline_call_offset(bool save_regs) +static unsigned long calc_trampoline_call_offset(int flags) { unsigned long start_offset; unsigned long call_offset; - if (save_regs) { + if (flags & FTRACE_OPS_FL_SAVE_REGS) { start_offset = (unsigned long)ftrace_regs_caller; call_offset = (unsigned long)ftrace_regs_call; + } else if (flags & FTRACE_OPS_FL_IPMODIFY) { + start_offset = (unsigned long)ftrace_ip_caller; + call_offset = (unsigned long)ftrace_ip_call; } else { start_offset = (unsigned long)ftrace_caller; call_offset = (unsigned long)ftrace_call; @@ -882,7 +899,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) npages = PAGE_ALIGN(size) >> PAGE_SHIFT; } - offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); + offset = calc_trampoline_call_offset(ops->flags); ip = ops->trampoline + offset; func = ftrace_ops_get_func(ops); @@ -927,7 +944,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) { unsigned long offset; - bool save_regs = rec->flags & FTRACE_FL_REGS_EN; void *ptr; if (ops && ops->trampoline) { @@ -942,10 +958,12 @@ static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) return NULL; } - offset = calc_trampoline_call_offset(save_regs); + offset = calc_trampoline_call_offset(rec->flags); - if (save_regs) + if (rec->flags & FTRACE_FL_REGS_EN) ptr = (void *)FTRACE_REGS_ADDR + offset; + else if (rec->flags & FTRACE_FL_IP_EN) + ptr = (void *)FTRACE_IP_ADDR + offset; else ptr = (void *)FTRACE_ADDR + offset; @@ -960,7 +978,7 @@ void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) return static_tramp_func(ops, rec); - offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); + offset = calc_trampoline_call_offset(ops->flags); return addr_from_call((void *)ops->trampoline + offset); } diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 809d54397dba..5930810bf71d 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -172,6 +172,39 @@ WEAK(ftrace_stub) retq ENDPROC(ftrace_caller) + +ENTRY(ftrace_ip_caller) + save_mcount_regs + /* save_mcount_regs fills in first two parameters */ + +GLOBAL(ftrace_ip_caller_op_ptr) + /* Load the ftrace_ops into the 3rd parameter */ + movq function_trace_op(%rip), %rdx + + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + +GLOBAL(ftrace_ip_call) + call ftrace_stub + + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, MCOUNT_REG_SIZE(%rsp) + + restore_mcount_regs + + /* + * As this jmp to ftrace_epilogue can be a short jump + * it must not be copied into the trampoline. + * The trampoline will add the code to jump + * to the return. + */ +GLOBAL(ftrace_ip_caller_end) + + jmp ftrace_epilogue + +ENDPROC(ftrace_ip_caller) + ENTRY(ftrace_regs_caller) /* Save the current flags before any operations that can change them */ pushfq @@ -214,7 +247,7 @@ GLOBAL(ftrace_regs_call) /* Copy flags back to SS, to restore them */ movq EFLAGS(%rsp), %rax - movq %rax, MCOUNT_REG_SIZE(%rsp) + movq %rax, MCOUNT_REG_SIZE+8(%rsp) /* Handlers can change the RIP */ movq RIP(%rsp), %rax diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8385cafe4f9f..ba1cf5640639 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -333,6 +333,8 @@ bool is_ftrace_trampoline(unsigned long addr); * REGS_EN - the function is set up to save regs. * IPMODIFY - the record allows for the IP address to be changed. * DISABLED - the record is not ready to be touched yet + * IP - the record wants ip modification only (no regs) + * IP_EN - the record has ip modification only * * When a new ftrace_ops is registered and wants a function to save * pt_regs, the rec->flag REGS is set. When the function has been @@ -348,10 +350,12 @@ enum { FTRACE_FL_TRAMP_EN = (1UL << 27), FTRACE_FL_IPMODIFY = (1UL << 26), FTRACE_FL_DISABLED = (1UL << 25), + FTRACE_FL_IP = (1UL << 24), + FTRACE_FL_IP_EN = (1UL << 23), }; -#define FTRACE_REF_MAX_SHIFT 25 -#define FTRACE_FL_BITS 7 +#define FTRACE_REF_MAX_SHIFT 23 +#define FTRACE_FL_BITS 9 #define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1) #define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT) #define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) @@ -458,8 +462,10 @@ extern void ftrace_replace_code(int enable); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_regs_caller(void); +extern void ftrace_ip_caller(void); extern void ftrace_call(void); extern void ftrace_regs_call(void); +extern void ftrace_ip_call(void); extern void mcount_call(void); void ftrace_modify_all_code(int command); @@ -475,8 +481,14 @@ void ftrace_modify_all_code(int command); #ifndef FTRACE_REGS_ADDR #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) +# ifdef CONFIG_DYNAMIC_FTRACE_WITH_IPMODIFY_ONLY +# define FTRACE_IP_ADDR ((unsigned long)ftrace_ip_caller) +# else +# define FTRACE_IP_ADDR FTRACE_REGS_ADDR +# endif #else # define FTRACE_REGS_ADDR FTRACE_ADDR +# define FTRACE_IP_ADDR FTRACE_ADDR #endif #endif diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 399669f7eba8..c2037587dc95 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -114,6 +114,7 @@ void do_raw_spin_lock(raw_spinlock_t *lock) mmiowb_spin_lock(); debug_spin_lock_after(lock); } +EXPORT_SYMBOL_GPL(do_raw_spin_lock); int do_raw_spin_trylock(raw_spinlock_t *lock) { diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2a58380ea310..a1b88b810b8a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -33,6 +33,9 @@ config HAVE_DYNAMIC_FTRACE config HAVE_DYNAMIC_FTRACE_WITH_REGS bool +config HAVE_DYNAMIC_FTRACE_WITH_IPMODIFY + bool + config HAVE_FTRACE_MCOUNT_RECORD bool help @@ -557,6 +560,11 @@ config DYNAMIC_FTRACE_WITH_REGS depends on DYNAMIC_FTRACE depends on HAVE_DYNAMIC_FTRACE_WITH_REGS +config DYNAMIC_FTRACE_WITH_IPMODIFY_ONLY + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_IPMODIFY + config FUNCTION_PROFILER bool "Kernel function profiler" depends on FUNCTION_TRACER diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index dacb8b50a263..fec64bf679e8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -326,6 +326,13 @@ int __register_ftrace_function(struct ftrace_ops *ops) if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) ops->flags |= FTRACE_OPS_FL_SAVE_REGS; #endif + +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_IPMODIFY_ONLY + if (ops->flags & FTRACE_OPS_FL_IPMODIFY && + !(ops->flags & FTRACE_OPS_FL_SAVE_REGS)) + return -EINVAL; +#endif + if (!ftrace_enabled && (ops->flags & FTRACE_OPS_FL_PERMANENT)) return -EBUSY; @@ -1625,6 +1632,26 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) return keep_regs; } +/* Test if ops registered to this rec needs ip only */ +static bool test_rec_ops_needs_ipmod(struct dyn_ftrace *rec) +{ + struct ftrace_ops *ops; + bool keep_ip = false; + + for (ops = ftrace_ops_list; + ops != &ftrace_list_end; ops = ops->next) { + /* pass rec in as regs to have non-NULL val */ + if (ftrace_ops_test(ops, rec->ip, rec)) { + if (ops->flags & FTRACE_OPS_FL_IPMODIFY) { + keep_ip = true; + break; + } + } + } + + return keep_ip; +} + static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); static struct ftrace_ops * @@ -1737,8 +1764,20 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, * If any ops wants regs saved for this function * then all ops will get saved regs. */ - if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { rec->flags |= FTRACE_FL_REGS; + rec->flags &= ~FTRACE_FL_IP; + } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_IPMODIFY_ONLY + /* + * If any ops wants ip modification without regs + * handle that case too. + */ + if (!(rec->flags & FTRACE_FL_REGS) && + ops->flags & FTRACE_OPS_FL_IPMODIFY) + rec->flags |= FTRACE_FL_IP; +#endif } else { if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0)) return false; @@ -1753,8 +1792,18 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, if (ftrace_rec_count(rec) > 0 && rec->flags & FTRACE_FL_REGS && ops->flags & FTRACE_OPS_FL_SAVE_REGS) { - if (!test_rec_ops_needs_regs(rec)) + if (!test_rec_ops_needs_regs(rec)) { rec->flags &= ~FTRACE_FL_REGS; + + /* Downgrade to just ipmodify? */ + if (test_rec_ops_needs_ipmod(rec)) + rec->flags |= FTRACE_FL_IP; + + } else if (rec->flags & FTRACE_FL_REGS && + ops->flags & FTRACE_OPS_FL_IPMODIFY) { + if (!test_rec_ops_needs_ipmod(rec)) + rec->flags &= ~FTRACE_FL_IP; + } } /* @@ -2033,7 +2082,8 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) pr_info("ftrace record flags: %lx\n", rec->flags); pr_cont(" (%ld)%s", ftrace_rec_count(rec), - rec->flags & FTRACE_FL_REGS ? " R" : " "); + rec->flags & FTRACE_FL_REGS ? " R" : + rec->flags & FTRACE_FL_IP ? " r" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); if (ops) { @@ -2085,6 +2135,10 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) !(rec->flags & FTRACE_FL_REGS_EN)) flag |= FTRACE_FL_REGS; + if (!(rec->flags & FTRACE_FL_IP) != + !(rec->flags & FTRACE_FL_IP_EN)) + flag |= FTRACE_FL_IP; + if (!(rec->flags & FTRACE_FL_TRAMP) != !(rec->flags & FTRACE_FL_TRAMP_EN)) flag |= FTRACE_FL_TRAMP; @@ -2112,6 +2166,12 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) else rec->flags &= ~FTRACE_FL_TRAMP_EN; } + if (flag & FTRACE_FL_IP) { + if (rec->flags & FTRACE_FL_REGS) + rec->flags |= FTRACE_FL_IP_EN; + else + rec->flags &= ~FTRACE_FL_IP_EN; + } } /* @@ -2141,7 +2201,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) * and REGS states. The _EN flags must be disabled though. */ rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN | - FTRACE_FL_REGS_EN); + FTRACE_FL_REGS_EN | FTRACE_FL_IP_EN); } ftrace_bug_type = FTRACE_BUG_NOP; @@ -2324,6 +2384,8 @@ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) if (rec->flags & FTRACE_FL_REGS) return (unsigned long)FTRACE_REGS_ADDR; + else if (rec->flags & FTRACE_FL_IP) + return (unsigned long)FTRACE_IP_ADDR; else return (unsigned long)FTRACE_ADDR; } @@ -2356,6 +2418,8 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) if (rec->flags & FTRACE_FL_REGS_EN) return (unsigned long)FTRACE_REGS_ADDR; + else if (rec->flags & FTRACE_FL_IP_EN) + return (unsigned long)FTRACE_IP_ADDR; else return (unsigned long)FTRACE_ADDR; } @@ -3462,7 +3526,7 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, " (%ld)%s%s", ftrace_rec_count(rec), rec->flags & FTRACE_FL_REGS ? " R" : " ", - rec->flags & FTRACE_FL_IPMODIFY ? " I" : " "); + rec->flags & FTRACE_FL_IP ? " I" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); if (ops) { diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 1a72b7d95cdc..e87d9decb994 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -11,6 +11,41 @@ #define CREATE_TRACE_POINTS #include "trace-events-sample.h" +#include + +void do_raw_spin_lock(raw_spinlock_t *lock); + +int x; + +static void my_do_spin_lock(raw_spinlock_t *lock) +{ + int ret; + +// trace_printk("takeing %p\n", lock); + do_raw_spin_lock(lock); + /* Force not having a tail call */ + if (!x) + return; + x++; +} + +static void my_hijack_func(unsigned long ip, unsigned long pip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + unsigned long this_func = (unsigned long)my_do_spin_lock; + + if (pip >= this_func && pip <= this_func + 0x10000) + return; + + regs->ip = this_func; +} + +static struct ftrace_ops my_ops = { + .func = my_hijack_func, + .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION_SAFE, +// | FTRACE_OPS_FL_SAVE_REGS, +}; + static const char *random_strings[] = { "Mother Goose", "Snoopy", @@ -115,6 +150,13 @@ void foo_bar_unreg(void) static int __init trace_event_init(void) { + int ret; + + ret = ftrace_set_filter_ip(&my_ops, (unsigned long)do_raw_spin_lock, 0, 0); + if (!ret) + register_ftrace_function(&my_ops); + + return 0; simple_tsk = kthread_run(simple_thread, NULL, "event-sample"); if (IS_ERR(simple_tsk)) return -1; @@ -124,6 +166,8 @@ static int __init trace_event_init(void) static void __exit trace_event_exit(void) { + unregister_ftrace_function(&my_ops); + return; kthread_stop(simple_tsk); mutex_lock(&thread_mutex); if (simple_tsk_fn)