From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754566AbbFJSPI (ORCPT ); Wed, 10 Jun 2015 14:15:08 -0400 Received: from mx1.redhat.com ([209.132.183.28]:50350 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S964838AbbFJSO3 (ORCPT ); Wed, 10 Jun 2015 14:14:29 -0400 From: Jiri Olsa To: Arnaldo Carvalho de Melo Cc: lkml , Adrian Hunter , Andi Kleen , David Ahern , Ingo Molnar , Namhyung Kim , Peter Zijlstra , Stephane Eranian Subject: [PATCH 25/25] perf stat: Introduce --per-task option Date: Wed, 10 Jun 2015 20:10:58 +0200 Message-Id: <1433959858-31858-26-git-send-email-jolsa@kernel.org> In-Reply-To: <1433959858-31858-1-git-send-email-jolsa@kernel.org> References: <1433959858-31858-1-git-send-email-jolsa@kernel.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Currently all the -p option PID arguments tasks values get aggregated and printed as single values. Adding --per-tasks option to print values per task. $ perf stat -e cycles,instructions --per-task -p 12451,16173 ^C Performance counter stats for process id '12451,16173': TASK-12451 77,784 cycles TASK-16173 64,809 cycles TASK-12451 14,432 instructions TASK-16173 14,452 instructions 3.854957122 seconds time elapsed Also woks under interval mode: $ ./perf stat -e cycles,instructions --per-task -p 16431,16173 -I 1000 # time task counts unit events 1.000085816 TASK-16173 0 cycles 1.000085816 TASK-16431 3,358,360,926 cycles 1.000085816 TASK-16173 0 instructions 1.000085816 TASK-16431 9,062,422,086 instructions 2.000212262 TASK-16173 65,386 cycles 2.000212262 TASK-16431 3,349,355,309 cycles 2.000212262 TASK-16173 12,151 instructions 2.000212262 TASK-16431 9,039,401,422 instructions 3.000333402 TASK-16173 62,797 cycles 3.000333402 TASK-16431 3,357,140,183 cycles 3.000333402 TASK-16173 12,208 instructions 3.000333402 TASK-16431 9,058,080,762 instructions ^C 3.375949851 TASK-16173 0 cycles 3.375949851 TASK-16431 1,264,764,804 cycles 3.375949851 TASK-16173 0 instructions 3.375949851 TASK-16431 3,414,532,317 instructions Link: http://lkml.kernel.org/n/tip-0v0ixd9k7o9z1u8hqngm1coe@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-stat.c | 57 +++++++++++++++++++++++++++++++++++++++++++++-- tools/perf/util/stat.h | 1 + 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4a7cad8debac..293d1029e2ba 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -237,6 +237,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread, count = &zero; switch (aggr_mode) { + case AGGR_TASK: case AGGR_CORE: case AGGR_SOCKET: case AGGR_NONE: @@ -605,6 +606,11 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[id], csv_sep); break; + case AGGR_TASK: + fprintf(output, "TASK-%*d%s", + csv_output ? 0 : -8, + evsel->threads->map[id], csv_sep); + break; case AGGR_GLOBAL: default: break; @@ -753,6 +759,40 @@ static void print_aggr(char *prefix) } } +static void print_aggr_task(struct perf_evsel *counter, char *prefix) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int cpu, thread; + double uval; + + for (thread = 0; thread < nthreads; thread++) { + u64 ena = 0, run = 0, val = 0; + + for (cpu = 0; cpu < ncpus; cpu++) { + val += perf_counts(counter->counts, cpu, thread)->val; + ena += perf_counts(counter->counts, cpu, thread)->ena; + run += perf_counts(counter->counts, cpu, thread)->run; + } + + if (prefix) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + + if (nsec_counter(counter)) + nsec_printout(thread, 0, counter, uval); + else + abs_printout(thread, 0, counter, uval); + + if (!csv_output) + print_noise(counter, 1.0); + + print_running(run, ena); + fputc('\n', output); + } +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode @@ -879,6 +919,9 @@ static void print_interval(char *prefix, struct timespec *ts) case AGGR_NONE: fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); break; + case AGGR_TASK: + fprintf(output, "# time task counts %*s events\n", unit_width, "unit"); + break; case AGGR_GLOBAL: default: fprintf(output, "# time counts %*s events\n", unit_width, "unit"); @@ -947,6 +990,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) case AGGR_SOCKET: print_aggr(prefix); break; + case AGGR_TASK: + evlist__for_each(evsel_list, counter) + print_aggr_task(counter, prefix); + break; case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) print_counter_aggr(counter, prefix); @@ -1034,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void) break; case AGGR_NONE: case AGGR_GLOBAL: + case AGGR_TASK: default: break; } @@ -1258,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "aggregate counts per processor socket", AGGR_SOCKET), OPT_SET_UINT(0, "per-core", &aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), + OPT_SET_UINT(0, "per-task", &aggr_mode, + "aggregate counts per task", AGGR_TASK), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), OPT_END() @@ -1349,8 +1399,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) run_count = 1; } - /* no_aggr, cgroup are for system-wide only */ - if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && + /* + * no_aggr, cgroup are for system-wide only + * --per-task is aggregated per task, we dont mix it with cpu mode + */ + if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_TASK) || nr_cgroups) && !target__has_cpu(&target)) { fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 615c779eb42a..e4c616af057d 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -29,6 +29,7 @@ enum aggr_mode { AGGR_GLOBAL, AGGR_SOCKET, AGGR_CORE, + AGGR_TASK, }; void update_stats(struct stats *stats, u64 val); -- 1.9.3