From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754919Ab0KKKjA (ORCPT ); Thu, 11 Nov 2010 05:39:00 -0500 Received: from mx3.mail.elte.hu ([157.181.1.138]:45468 "EHLO mx3.mail.elte.hu" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753562Ab0KKKi6 (ORCPT ); Thu, 11 Nov 2010 05:38:58 -0500 Date: Thu, 11 Nov 2010 11:38:37 +0100 From: Ingo Molnar To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, Peter Zijlstra , Arnaldo Carvalho de Melo , =?iso-8859-1?Q?Fr=E9d=E9ric?= Weisbecker , Steven Rostedt , Thomas Gleixner , Andrew Morton Subject: [GIT PULL] perf fixes Message-ID: <20101111103837.GA16486@elte.hu> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.20 (2009-08-17) X-ELTE-SpamScore: -2.0 X-ELTE-SpamLevel: X-ELTE-SpamCheck: no X-ELTE-SpamVersion: ELTE 2.0 X-ELTE-SpamCheck-Details: score=-2.0 required=5.9 tests=BAYES_00 autolearn=no SpamAssassin version=3.2.5 -2.0 BAYES_00 BODY: Bayesian spam probability is 0 to 1% [score: 0.0000] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Linus, Please pull the latest perf-fixes-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf-fixes-for-linus Thanks, Ingo ------------------> Corey Ashford (1): perf: Fix usages of profile_cpu in builtin-top.c to use cpu_list Cyrill Gorcunov (1): perf, ui: Eliminate stack-smashing protection compiler complaint Peter Zijlstra (1): perf, amd: Use kmalloc_node(,__GFP_ZERO) for northbridge structure allocation Stephane Eranian (1): perf_events: Fix time tracking in samples Tom Zanussi (7): perf trace scripting: fix some small memory leaks and missing error checks perf trace scripting: remove system-wide param from shell scripts perf record: make the record options available outside perf record perf trace record: handle commands correctly perf trace: live-mode command-line cleanup perf trace: update Documentation with new perf trace variants perf trace: update usage arch/x86/kernel/cpu/perf_event_amd.c | 4 +- include/linux/perf_event.h | 10 + kernel/perf_event.c | 42 ++++- tools/perf/Documentation/perf-trace.txt | 57 +++++- tools/perf/builtin-record.c | 10 +- tools/perf/builtin-top.c | 12 +- tools/perf/builtin-trace.c | 209 ++++++++++++++------ tools/perf/scripts/perl/bin/failed-syscalls-record | 2 +- tools/perf/scripts/perl/bin/rw-by-file-record | 2 +- tools/perf/scripts/perl/bin/rw-by-pid-record | 2 +- tools/perf/scripts/perl/bin/rwtop-record | 2 +- tools/perf/scripts/perl/bin/wakeup-latency-record | 2 +- tools/perf/scripts/perl/bin/workqueue-stats-record | 2 +- .../python/bin/failed-syscalls-by-pid-record | 2 +- .../scripts/python/bin/futex-contention-record | 2 +- tools/perf/scripts/python/bin/netdev-times-record | 2 +- .../perf/scripts/python/bin/sched-migration-record | 2 +- tools/perf/scripts/python/bin/sctop-record | 2 +- .../python/bin/syscall-counts-by-pid-record | 2 +- .../perf/scripts/python/bin/syscall-counts-record | 2 +- tools/perf/util/ui/util.c | 5 +- 21 files changed, 271 insertions(+), 104 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 46d5844..e421b8c 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -280,11 +280,11 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) struct amd_nb *nb; int i; - nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); + nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO, + cpu_to_node(cpu)); if (!nb) return NULL; - memset(nb, 0, sizeof(*nb)); nb->nb_id = nb_id; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 057bf22..40150f3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -747,6 +747,16 @@ struct perf_event { u64 tstamp_running; u64 tstamp_stopped; + /* + * timestamp shadows the actual context timing but it can + * be safely used in NMI interrupt context. It reflects the + * context time as it was when the event was last scheduled in. + * + * ctx_time already accounts for ctx->timestamp. Therefore to + * compute ctx_time for a sample, simply add perf_clock(). + */ + u64 shadow_ctx_time; + struct perf_event_attr attr; struct hw_perf_event hw; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 517d827..cb6c0d2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -674,6 +674,8 @@ event_sched_in(struct perf_event *event, event->tstamp_running += ctx->time - event->tstamp_stopped; + event->shadow_ctx_time = ctx->time - ctx->timestamp; + if (!is_software_event(event)) cpuctx->active_oncpu++; ctx->nr_active++; @@ -3396,7 +3398,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) } static void perf_output_read_one(struct perf_output_handle *handle, - struct perf_event *event) + struct perf_event *event, + u64 enabled, u64 running) { u64 read_format = event->attr.read_format; u64 values[4]; @@ -3404,11 +3407,11 @@ static void perf_output_read_one(struct perf_output_handle *handle, values[n++] = perf_event_count(event); if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = event->total_time_enabled + + values[n++] = enabled + atomic64_read(&event->child_total_time_enabled); } if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = event->total_time_running + + values[n++] = running + atomic64_read(&event->child_total_time_running); } if (read_format & PERF_FORMAT_ID) @@ -3421,7 +3424,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. */ static void perf_output_read_group(struct perf_output_handle *handle, - struct perf_event *event) + struct perf_event *event, + u64 enabled, u64 running) { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; @@ -3431,10 +3435,10 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = 1 + leader->nr_siblings; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = leader->total_time_enabled; + values[n++] = enabled; if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = leader->total_time_running; + values[n++] = running; if (leader != event) leader->pmu->read(leader); @@ -3459,13 +3463,35 @@ static void perf_output_read_group(struct perf_output_handle *handle, } } +#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ + PERF_FORMAT_TOTAL_TIME_RUNNING) + static void perf_output_read(struct perf_output_handle *handle, struct perf_event *event) { + u64 enabled = 0, running = 0, now, ctx_time; + u64 read_format = event->attr.read_format; + + /* + * compute total_time_enabled, total_time_running + * based on snapshot values taken when the event + * was last scheduled in. + * + * we cannot simply called update_context_time() + * because of locking issue as we are called in + * NMI context + */ + if (read_format & PERF_FORMAT_TOTAL_TIMES) { + now = perf_clock(); + ctx_time = event->shadow_ctx_time + now; + enabled = ctx_time - event->tstamp_enabled; + running = ctx_time - event->tstamp_running; + } + if (event->attr.read_format & PERF_FORMAT_GROUP) - perf_output_read_group(handle, event); + perf_output_read_group(handle, event, enabled, running); else - perf_output_read_one(handle, event); + perf_output_read_one(handle, event, enabled, running); } void perf_output_sample(struct perf_output_handle *handle, diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 122ec9d..26aff6b 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -8,7 +8,11 @@ perf-trace - Read perf.data (created by perf record) and display trace output SYNOPSIS -------- [verse] -'perf trace' {record