From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752021AbdBXAP4 (ORCPT ); Thu, 23 Feb 2017 19:15:56 -0500 Received: from mga11.intel.com ([192.55.52.93]:43791 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752117AbdBXAKo (ORCPT ); Thu, 23 Feb 2017 19:10:44 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.35,199,1484035200"; d="scan'208";a="69530543" From: Andi Kleen To: acme@kernel.org Cc: jolsa@kernel.org, linux-kernel@vger.kernel.org, Andi Kleen Subject: [PATCH 02/10] perf, tools, stat: Collapse identically named events Date: Thu, 23 Feb 2017 16:10:13 -0800 Message-Id: <20170224001021.6723-2-andi@firstfloor.org> X-Mailer: git-send-email 2.9.3 In-Reply-To: <20170224001021.6723-1-andi@firstfloor.org> References: <20170224001021.6723-1-andi@firstfloor.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Andi Kleen The uncore PMU has a lot of duplicated PMUs for different subsystems. When expanding an uncore alias we usually end up with a large number of identically named aliases, which makes perf stat output difficult to read. Automatically sum them up in perf stat, unless --no-merge is specified. This can be default because only the uncores generally have duplicated aliases. Other PMUs have unique names. Before: % perf stat --no-merge -a -e unc_c_llc_lookup.any sleep 1 Performance counter stats for 'system wide': 694,976 Bytes unc_c_llc_lookup.any 706,304 Bytes unc_c_llc_lookup.any 956,608 Bytes unc_c_llc_lookup.any 782,720 Bytes unc_c_llc_lookup.any 605,696 Bytes unc_c_llc_lookup.any 442,816 Bytes unc_c_llc_lookup.any 659,328 Bytes unc_c_llc_lookup.any 509,312 Bytes unc_c_llc_lookup.any 263,936 Bytes unc_c_llc_lookup.any 592,448 Bytes unc_c_llc_lookup.any 672,448 Bytes unc_c_llc_lookup.any 608,640 Bytes unc_c_llc_lookup.any 641,024 Bytes unc_c_llc_lookup.any 856,896 Bytes unc_c_llc_lookup.any 808,832 Bytes unc_c_llc_lookup.any 684,864 Bytes unc_c_llc_lookup.any 710,464 Bytes unc_c_llc_lookup.any 538,304 Bytes unc_c_llc_lookup.any 1.002577660 seconds time elapsed After: % perf stat -a -e unc_c_llc_lookup.any sleep 1 Performance counter stats for 'system wide': 2,685,120 Bytes unc_c_llc_lookup.any 1.002648032 seconds time elapsed v2: Split collect_aliases. Rename alias flag. v3: Make sure unsupported/not counted is always printed. v4: Factor out callback change into separate patch. Signed-off-by: Andi Kleen --- tools/perf/Documentation/perf-stat.txt | 3 +++ tools/perf/builtin-stat.c | 31 +++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + 3 files changed, 35 insertions(+) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index d96ccd4844df..320d8020bc5b 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -237,6 +237,9 @@ To interpret the results it is usually needed to know on which CPUs the workload runs on. If needed the CPUs can be forced using taskset. +--no-merge:: +Do not merge results from same PMUs. + EXAMPLES -------- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 43a7ef3d71ed..389c8e457bf0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -140,6 +140,7 @@ static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; static bool force_metric_only = false; +static bool no_merge = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -1178,12 +1179,34 @@ static void aggr_update_shadow(void) } } +static void collect_all_aliases(struct perf_evsel *counter, + void (*cb)(struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + struct perf_evsel *alias; + + alias = list_prepare_entry(counter, &(evsel_list->entries), node); + list_for_each_entry_continue (alias, &evsel_list->entries, node) { + if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + alias->scale != counter->scale || + alias->cgrp != counter->cgrp || + strcmp(alias->unit, counter->unit) || + nsec_counter(alias) != nsec_counter(counter)) + break; + alias->merged_stat = true; + cb(alias, data, false); + } +} + static void collect_data(struct perf_evsel *counter, void (*cb)(struct perf_evsel *counter, void *data, bool first), void *data) { cb(counter, data, true); + if (!no_merge) + collect_all_aliases(counter, cb, data); } struct aggr_data { @@ -1250,6 +1273,8 @@ static void print_aggr(char *prefix) ad.id = id = aggr_map->map[s]; first = true; evlist__for_each_entry(evsel_list, counter) { + if (counter->merged_stat) + continue; ad.val = ad.ena = ad.run = 0; ad.nr = 0; collect_data(counter, aggr_cb, &ad); @@ -1325,6 +1350,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) double uval; struct caggr_data cd = { .avg = 0.0 }; + if (counter->merged_stat) + return; collect_data(counter, counter_aggr_cb, &cd); if (prefix && !metric_only) @@ -1357,6 +1384,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix) double uval; int cpu; + if (counter->merged_stat) + return; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; @@ -1701,6 +1731,7 @@ static const struct option stat_options[] = { "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), + OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 06ef6f29efa1..bd2e9b112d49 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -131,6 +131,7 @@ struct perf_evsel { bool cmdline_group_boundary; struct list_head config_terms; int bpf_fd; + bool merged_stat; }; union u64_swap { -- 2.9.3