From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755170AbcCBWRI (ORCPT ); Wed, 2 Mar 2016 17:17:08 -0500 Received: from bombadil.infradead.org ([198.137.202.9]:47917 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753879AbcCBWRG (ORCPT ); Wed, 2 Mar 2016 17:17:06 -0500 From: Arnaldo Carvalho de Melo To: Ingo Molnar Cc: linux-kernel@vger.kernel.org, Andi Kleen , Arnaldo Carvalho de Melo Subject: [PATCH 02/11] perf stat: Support metrics in --per-core/socket mode Date: Wed, 2 Mar 2016 19:16:37 -0300 Message-Id: <1456957006-3757-3-git-send-email-acme@kernel.org> X-Mailer: git-send-email 2.5.0 In-Reply-To: <1456957006-3757-1-git-send-email-acme@kernel.org> References: <1456957006-3757-1-git-send-email-acme@kernel.org> X-SRS-Rewrite: SMTP reverse-path rewritten from by bombadil.infradead.org. See http://www.infradead.org/rpr.html Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Andi Kleen Enable metrics printing in --per-core / --per-socket mode. We need to save the shadow metrics in a unique place. Always use the first CPU in the aggregation. Then use the same CPU to retrieve the shadow value later. Example output: % perf stat --per-core -a ./BC1s Performance counter stats for 'system wide': S0-C0 2 2966.020381 task-clock (msec) # 2.004 CPUs utilized (100.00%) S0-C0 2 49 context-switches # 0.017 K/sec (100.00%) S0-C0 2 4 cpu-migrations # 0.001 K/sec (100.00%) S0-C0 2 467 page-faults # 0.157 K/sec S0-C0 2 4,599,061,773 cycles # 1.551 GHz (100.00%) S0-C0 2 9,755,886,883 instructions # 2.12 insn per cycle (100.00%) S0-C0 2 1,906,272,125 branches # 642.704 M/sec (100.00%) S0-C0 2 81,180,867 branch-misses # 4.26% of all branches S0-C1 2 2965.995373 task-clock (msec) # 2.003 CPUs utilized (100.00%) S0-C1 2 62 context-switches # 0.021 K/sec (100.00%) S0-C1 2 8 cpu-migrations # 0.003 K/sec (100.00%) S0-C1 2 281 page-faults # 0.095 K/sec S0-C1 2 6,347,290 cycles # 0.002 GHz (100.00%) S0-C1 2 4,654,156 instructions # 0.73 insn per cycle (100.00%) S0-C1 2 947,121 branches # 0.319 M/sec (100.00%) S0-C1 2 37,322 branch-misses # 3.94% of all branches 1.480409747 seconds time elapsed v2: Rebase to older patches v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri) Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1456785386-19481-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 64 +++++++++++++++++++++++++++++++++++++------ tools/perf/util/stat-shadow.c | 7 +++++ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2ffb8221917a..9b5089c5dffe 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -740,6 +740,8 @@ struct outstate { bool newline; const char *prefix; int nfields; + int id, nr; + struct perf_evsel *evsel; }; #define METRIC_LEN 35 @@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_CORE) - fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_SOCKET) - fprintf(os->fh, " "); fprintf(os->fh, " "); } @@ -798,6 +797,7 @@ static void new_line_csv(void *ctx) fputc('\n', os->fh); if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(csv_sep, os->fh); } @@ -855,6 +855,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; + + if (!aggr_get_id) + return 0; + + if (stat_config.aggr_mode == AGGR_NONE) + return id; + + if (stat_config.aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -891,7 +913,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, struct perf_stat_output_ctx out; struct outstate os = { .fh = stat_config.output, - .prefix = prefix ? prefix : "" + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, }; print_metric_t pm = print_metric_std; void (*nl)(void *); @@ -958,16 +983,37 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, } perf_stat__print_shadow_stats(counter, uval, - stat_config.aggr_mode == AGGR_GLOBAL ? 0 : - cpu_map__id_to_cpu(id), + first_shadow_cpu(counter, id), &out); - if (!csv_output) { print_noise(counter, noise); print_running(run, ena); } } +static void aggr_update_shadow(void) +{ + int cpu, s2, id, s; + u64 val; + struct perf_evsel *counter; + + for (s = 0; s < aggr_map->nr; s++) { + id = aggr_map->map[s]; + evlist__for_each(evsel_list, counter) { + val = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = aggr_get_id(evsel_list->cpus, cpu); + if (s2 != id) + continue; + val += perf_counts(counter->counts, cpu, 0)->val; + } + val = val * counter->scale; + perf_stat__update_shadow_stats(counter, &val, + first_shadow_cpu(counter, id)); + } + } +} + static void print_aggr(char *prefix) { FILE *output = stat_config.output; @@ -979,6 +1025,8 @@ static void print_aggr(char *prefix) if (!(aggr_map || aggr_get_id)) return; + aggr_update_shadow(); + for (s = 0; s < aggr_map->nr; s++) { id = aggr_map->map[s]; evlist__for_each(evsel_list, counter) { diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 367e220e93d5..5e2d2e34e1bc 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -14,6 +14,13 @@ enum { #define NUM_CTX CTX_BIT_MAX +/* + * AGGR_GLOBAL: Use CPU 0 + * AGGR_SOCKET: Use first CPU of socket + * AGGR_CORE: Use first CPU of core + * AGGR_NONE: Use matching CPU + * AGGR_THREAD: Not supported? + */ static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -- 2.5.0