From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933639AbeAXLwJ (ORCPT ); Wed, 24 Jan 2018 06:52:09 -0500 Received: from mx1.redhat.com ([209.132.183.28]:44686 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933617AbeAXLwG (ORCPT ); Wed, 24 Jan 2018 06:52:06 -0500 From: Jiri Olsa To: Peter Zijlstra , Ingo Molnar Cc: lkml , Namhyung Kim , David Ahern , Andi Kleen , Alexander Shishkin , Andy Lutomirski , Arnaldo Carvalho de Melo Subject: [PATCH 08/21] perf: Add PERF_SAMPLE_CALLCHAIN to user data event Date: Wed, 24 Jan 2018 12:51:30 +0100 Message-Id: <20180124115143.14322-9-jolsa@kernel.org> In-Reply-To: <20180124115143.14322-1-jolsa@kernel.org> References: <20180124115143.14322-1-jolsa@kernel.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Adding PERF_SAMPLE_CALLCHAIN to user data event and allowing to defer callchain retrieval to user data task work. Callchain data is stored in the same way as for sample events. Using also the sample sample type bits for the USER DATA event 'type' value. Link: http://lkml.kernel.org/n/tip-drrmdnu591ix4rul0kktud4f@git.kernel.org Signed-off-by: Jiri Olsa --- include/linux/sched.h | 1 + include/uapi/linux/perf_event.h | 3 +++ kernel/events/core.c | 50 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index a2e041acfc4e..97d30eabb266 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -519,6 +519,7 @@ struct perf_user_data { int enabled_count; struct mutex enabled_mutex; u64 id; + u16 max_stack; }; enum perf_event_task_context { diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 3df8024f54f1..d30583411f97 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -939,9 +939,12 @@ enum perf_event_type { * # PERF_SAMPLE_* bits: * # * # PERF_SAMPLE_USER_DATA_ID + * # PERF_SAMPLE_CALLCHAIN * # * # and governs the data portion: * + * { u64 nr, + * u64 ips[nr];} && PERF_SAMPLE_CALLCHAIN * { u64 user_data_id;} && PERF_SAMPLE_USER_DATA_ID * * struct sample_id sample_id; diff --git a/kernel/events/core.c b/kernel/events/core.c index 1edf02dcd6e8..4676fbf681c7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6018,7 +6018,8 @@ static void user_data(struct user_data *ud, struct perf_event *event) static struct perf_callchain_entry __empty_callchain = { .nr = 0, }; static struct perf_callchain_entry * -perf_callchain(struct perf_event *event, struct pt_regs *regs) +perf_callchain(struct perf_event *event, struct pt_regs *regs, + struct user_data *ud) { bool kernel = !event->attr.exclude_callchain_kernel; bool user = !event->attr.exclude_callchain_user; @@ -6027,6 +6028,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) const u32 max_stack = event->attr.sample_max_stack; struct perf_callchain_entry *callchain; + if (ud->allow && user && !crosstask) { + ud->type |= PERF_SAMPLE_CALLCHAIN; + user = false; + } + if (!kernel && !user) return &__empty_callchain; @@ -6059,7 +6065,7 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_CALLCHAIN) { int size = 1; - data->callchain = perf_callchain(event, regs); + data->callchain = perf_callchain(event, regs, &ud); size += data->callchain->nr; header->size += size * sizeof(u64); @@ -6166,6 +6172,8 @@ void perf_prepare_sample(struct perf_event_header *header, header->misc |= PERF_RECORD_MISC_USER_DATA; user_data->type |= ud.type; + user_data->max_stack = max(user_data->max_stack, + event->attr.sample_max_stack); if (!user_data->state) user_data->state = PERF_USER_DATA_STATE_ENABLE; @@ -6360,12 +6368,29 @@ perf_iterate_sb(perf_iterate_f output, void *data, } struct perf_user_data_event { + struct perf_callchain_entry *callchain; + struct { struct perf_event_header header; u64 type; } event_id; }; +static struct perf_callchain_entry *perf_user_callchain(u16 max_stack) +{ + struct perf_callchain_entry *callchain; + + callchain = get_perf_callchain(task_pt_regs(current), + /* init_nr */ 0, + /* kernel */ false, + /* user */ true, + max_stack, + /* crosstask */ false, + /* add_mark */ true); + + return callchain ?: &__empty_callchain; +} + static void perf_user_data_output(struct perf_event *event, void *data) { struct perf_user_data *user_data = ¤t->perf_user_data; @@ -6373,6 +6398,7 @@ static void perf_user_data_output(struct perf_event *event, void *data) struct perf_output_handle handle; struct perf_sample_data sample; u16 header_size = user->event_id.header.size; + u64 nr; if (!event->attr.user_data) return; @@ -6382,6 +6408,18 @@ static void perf_user_data_output(struct perf_event *event, void *data) perf_event_header__init_id(&user->event_id.header, &sample, event); + if (user->event_id.type & PERF_SAMPLE_CALLCHAIN) { + int size = 1; + + nr = user->callchain->nr; + nr = min((__u16) nr, event->attr.sample_max_stack); + + size += nr; + size *= sizeof(u64); + + user->event_id.header.size += size; + } + if (user->event_id.type & PERF_SAMPLE_USER_DATA_ID) user->event_id.header.size += sizeof(u64); @@ -6390,6 +6428,11 @@ static void perf_user_data_output(struct perf_event *event, void *data) perf_output_put(&handle, user->event_id); + if (user->event_id.type & PERF_SAMPLE_CALLCHAIN) { + perf_output_put(&handle, nr); + __output_copy(&handle, user->callchain->ip, nr * sizeof(u64)); + } + if (user->event_id.type & PERF_SAMPLE_USER_DATA_ID) perf_output_put(&handle, user_data->id); @@ -6413,6 +6456,9 @@ static void perf_user_data_event(struct perf_user_data *user_data) }, }; + if (user_data->type & PERF_SAMPLE_CALLCHAIN) + event.callchain = perf_user_callchain(user_data->max_stack); + perf_iterate_sb(perf_user_data_output, &event, NULL); /* -- 2.13.6