From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932325AbdJJPV6 (ORCPT ); Tue, 10 Oct 2017 11:21:58 -0400 Received: from merlin.infradead.org ([205.233.59.134]:54354 "EHLO merlin.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932176AbdJJPV4 (ORCPT ); Tue, 10 Oct 2017 11:21:56 -0400 Date: Tue, 10 Oct 2017 17:21:46 +0200 From: Peter Zijlstra To: Cheng Jian Cc: mingo@redhat.com, acme@kernel.org, alexander.shishkin@linux.intel.com, linux-kernel@vger.kernel.org, xiexiuqi@huawei.com, huawei.libin@huawei.com, Steven Rostedt , Jiri Olsa Subject: Re: [PATCH] perf/ftrace : Fix repetitious traces when specify a target task Message-ID: <20171010152146.lewhp2t2cu3wd4d3@hirez.programming.kicks-ass.net> References: <1507598379-148309-1-git-send-email-cj.chengjian@huawei.com> <20171010113321.rq7w2kb77vyf6qoj@hirez.programming.kicks-ass.net> <20171010130448.4goubbka7e4bguar@hirez.programming.kicks-ass.net> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20171010130448.4goubbka7e4bguar@hirez.programming.kicks-ass.net> User-Agent: NeoMutt/20170609 (1.8.3) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Tue, Oct 10, 2017 at 03:04:48PM +0200, Peter Zijlstra wrote: > On Tue, Oct 10, 2017 at 01:33:21PM +0200, Peter Zijlstra wrote: > > But now you've got me looking at 75e8387685f6, which also looks > > completely insane. > > The reason I insta stumbled on that patch is that it only addresses the > ftrace situation and doesn't mention the other _5_ places that use this > interface. It doesn't explain why those don't have the problem and if > not, why their solution doesn't work for ftrace. > > So all (well syscall and regular tracepoints, didn't check the others) > avoid that problem by simply not registering multiple times at the > tracepoint. Tracepoints use tp_event->perf_refcount and the syscall > things use sys_perf_refcount_{enter,exit} for that. > > Doing the same for function trace looks a little something like the > below (after reverting 75e8387685f6) > > Except the below doesn't compile because of > kernel/trace/trace_event_filter.c, which is where I lost the plot. OK, so that filter stuff was the entire reason for this trainwreck :/ Using ftrace_ops filters allows ftrace to patch less functions etc.. So that requires an ftrace_ops per event. Still that then instantly suggests we fix the whole hlist situation instead of making it worse. See below; I now have 3 patches: revert, the below, kill FTRACE_OPS_FL_PER_CPU. How's this? --- kernel/trace/trace_event_perf.c | 68 +++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 32 deletions(-) --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -240,27 +240,31 @@ void perf_trace_destroy(struct perf_even int perf_trace_add(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; - struct hlist_head __percpu *pcpu_list; - struct hlist_head *list; - pcpu_list = tp_event->perf_events; - if (WARN_ON_ONCE(!pcpu_list)) - return -EINVAL; + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) { + struct hlist_head __percpu *pcpu_list; + struct hlist_head *list; + + pcpu_list = tp_event->perf_events; + if (WARN_ON_ONCE(!pcpu_list)) + return -EINVAL; - if (!(flags & PERF_EF_START)) - p_event->hw.state = PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + p_event->hw.state = PERF_HES_STOPPED; - list = this_cpu_ptr(pcpu_list); - hlist_add_head_rcu(&p_event->hlist_entry, list); + list = this_cpu_ptr(pcpu_list); + hlist_add_head_rcu(&p_event->hlist_entry, list); + } - return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); + return 0; } void perf_trace_del(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; - hlist_del_rcu(&p_event->hlist_entry); - tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); + + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event)) + hlist_del_rcu(&p_event->hlist_entry); } void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) @@ -306,15 +310,19 @@ static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { + struct hlist_head head = HLIST_HEAD_INIT; struct ftrace_entry *entry; - struct hlist_head *head; + struct perf_event *event; struct pt_regs regs; int rctx; - head = this_cpu_ptr(event_function.perf_events); - if (hlist_empty(head)) + event = container_of(ops, struct perf_event, ftrace_ops); + + if (!event->ftrace_ops.private) return; + hlist_add_head(&event->hlist_entry, &head); + #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ sizeof(u64)) - sizeof(u32)) @@ -330,17 +338,21 @@ perf_ftrace_function_call(unsigned long entry->ip = ip; entry->parent_ip = parent_ip; perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, - 1, ®s, head, NULL); + 1, ®s, &head, NULL); #undef ENTRY_SIZE + + hlist_del_init(&event->hlist_entry); } static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU; - ops->func = perf_ftrace_function_call; + ops->flags = FTRACE_OPS_FL_RCU; + ops->func = perf_ftrace_function_call; + ops->private = NULL; + return register_ftrace_function(ops); } @@ -352,19 +364,11 @@ static int perf_ftrace_function_unregist return ret; } -static void perf_ftrace_function_enable(struct perf_event *event) -{ - ftrace_function_local_enable(&event->ftrace_ops); -} - -static void perf_ftrace_function_disable(struct perf_event *event) -{ - ftrace_function_local_disable(&event->ftrace_ops); -} - int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data) { + struct perf_event *event = data; + switch (type) { case TRACE_REG_REGISTER: case TRACE_REG_UNREGISTER: @@ -377,11 +381,11 @@ int perf_ftrace_event_register(struct tr case TRACE_REG_PERF_CLOSE: return perf_ftrace_function_unregister(data); case TRACE_REG_PERF_ADD: - perf_ftrace_function_enable(data); - return 0; + event->ftrace_ops.private = (void *)1UL; + return 1; case TRACE_REG_PERF_DEL: - perf_ftrace_function_disable(data); - return 0; + event->ftrace_ops.private = (void *)0UL; + return 1; } return -EINVAL;