From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756864Ab2BGTpc (ORCPT ); Tue, 7 Feb 2012 14:45:32 -0500 Received: from mx1.redhat.com ([209.132.183.28]:39600 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756824Ab2BGTp0 (ORCPT ); Tue, 7 Feb 2012 14:45:26 -0500 From: Jiri Olsa To: rostedt@goodmis.org, fweisbec@gmail.com, mingo@redhat.com, paulus@samba.org, acme@ghostprotocols.net, a.p.zijlstra@chello.nl Cc: linux-kernel@vger.kernel.org, aarapov@redhat.com, Jiri Olsa Subject: [PATCH 6/8] ftrace, perf: Add support to use function tracepoint in perf Date: Tue, 7 Feb 2012 20:44:57 +0100 Message-Id: <1328643899-8361-7-git-send-email-jolsa@redhat.com> In-Reply-To: <1328643899-8361-1-git-send-email-jolsa@redhat.com> References: <1327776209-4883-1-git-send-email-jolsa@redhat.com> <1328643899-8361-1-git-send-email-jolsa@redhat.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Adding perf registration support for the ftrace function event, so it is now possible to register it via perf interface. The perf_event struct statically contains ftrace_ops as a handle for function tracer. The function tracer is registered/unregistered in open/close actions. To be efficient, we enable/disable ftrace_ops each time the traced process is scheduled in/out (via TRACE_REG_PERF_(ADD|DELL) handlers). This way tracing is enabled only when the process is running. Intentionally using this way instead of the event's hw state PERF_HES_STOPPED, which would not disable the ftrace_ops. It is now possible to use function trace within perf commands like: perf record -e ftrace:function ls perf stat -e ftrace:function ls Allowed only for root. Signed-off-by: Jiri Olsa --- include/linux/perf_event.h | 3 + kernel/trace/trace.h | 7 +++ kernel/trace/trace_entries.h | 6 ++- kernel/trace/trace_event_perf.c | 84 +++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_export.c | 5 ++ 5 files changed, 103 insertions(+), 2 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 412b790..92a056f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -859,6 +859,9 @@ struct perf_event { #ifdef CONFIG_EVENT_TRACING struct ftrace_event_call *tp_event; struct event_filter *filter; +#ifdef CONFIG_FUNCTION_TRACER + struct ftrace_ops ftrace_ops; +#endif #endif #ifdef CONFIG_CGROUP_PERF diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 638476a..61bc283 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -595,6 +595,8 @@ static inline int ftrace_trace_task(struct task_struct *task) static inline int ftrace_is_dead(void) { return 0; } #endif +int ftrace_event_is_function(struct ftrace_event_call *call); + /* * struct trace_parser - servers for reading the user input separated by spaces * @cont: set if the input is not complete - no final space char was found @@ -832,4 +834,9 @@ extern const char *__stop___trace_bprintk_fmt[]; FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" +#ifdef CONFIG_PERF_EVENTS +int perf_ftrace_event_register(struct ftrace_event_call *call, + enum trace_reg type, void *data); +#endif + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 9336590..47db7ed 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -55,7 +55,7 @@ /* * Function trace entry - function address and parent function address: */ -FTRACE_ENTRY(function, ftrace_entry, +FTRACE_ENTRY_REG(function, ftrace_entry, TRACE_FN, @@ -64,7 +64,9 @@ FTRACE_ENTRY(function, ftrace_entry, __field( unsigned long, parent_ip ) ), - F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) + F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip), + + perf_ftrace_event_register ); /* Function call entry */ diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index d72af0b..32f8806 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -24,6 +24,11 @@ static int total_ref_count; static int perf_trace_event_perm(struct ftrace_event_call *tp_event, struct perf_event *p_event) { + /* The ftrace function trace is allowed only for root. */ + if (ftrace_event_is_function(tp_event) && + perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + /* No tracing, just counting, so no obvious leak */ if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) return 0; @@ -250,3 +255,82 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, return raw_data; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); + +static void +perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) +{ + struct ftrace_entry *entry; + struct hlist_head *head; + struct pt_regs regs; + int rctx; + +#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ + sizeof(u64)) - sizeof(u32)) + + BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE); + + perf_fetch_caller_regs(®s); + + entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx); + if (!entry) + return; + + entry->ip = ip; + entry->parent_ip = parent_ip; + + head = this_cpu_ptr(event_function.perf_events); + perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, + 1, ®s, head); + +#undef ENTRY_SIZE +} + +static int perf_ftrace_function_register(struct perf_event *event) +{ + struct ftrace_ops *ops = &event->ftrace_ops; + + ops->flags |= FTRACE_OPS_FL_CONTROL; + ops->func = perf_ftrace_function_call; + return register_ftrace_function(ops); +} + +static int perf_ftrace_function_unregister(struct perf_event *event) +{ + struct ftrace_ops *ops = &event->ftrace_ops; + return unregister_ftrace_function(ops); +} + +static void perf_ftrace_function_enable(struct perf_event *event) +{ + ftrace_function_local_enable(&event->ftrace_ops); +} + +static void perf_ftrace_function_disable(struct perf_event *event) +{ + ftrace_function_local_disable(&event->ftrace_ops); +} + +int perf_ftrace_event_register(struct ftrace_event_call *call, + enum trace_reg type, void *data) +{ + switch (type) { + case TRACE_REG_REGISTER: + case TRACE_REG_UNREGISTER: + break; + case TRACE_REG_PERF_REGISTER: + case TRACE_REG_PERF_UNREGISTER: + return 0; + case TRACE_REG_PERF_OPEN: + return perf_ftrace_function_register(data); + case TRACE_REG_PERF_CLOSE: + return perf_ftrace_function_unregister(data); + case TRACE_REG_PERF_ADD: + perf_ftrace_function_enable(data); + return 0; + case TRACE_REG_PERF_DEL: + perf_ftrace_function_disable(data); + return 0; + } + + return -EINVAL; +} diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index f74de86..a3dbee6 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -184,4 +184,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; FTRACE_ENTRY_REG(call, struct_name, etype, \ PARAMS(tstruct), PARAMS(print), NULL) +int ftrace_event_is_function(struct ftrace_event_call *call) +{ + return call == &event_function; +} + #include "trace_entries.h" -- 1.7.1