From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>, Thomas Gleixner <tglx@linutronix.de>
Cc: Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
Clark Williams <williams@redhat.com>,
linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
Andi Kleen <ak@linux.intel.com>,
Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 08/23] perf stat: Use affinity for reading
Date: Tue, 3 Dec 2019 10:55:51 -0300 [thread overview]
Message-ID: <20191203135606.24902-9-acme@kernel.org> (raw)
In-Reply-To: <20191203135606.24902-1-acme@kernel.org>
From: Andi Kleen <ak@linux.intel.com>
Restructure event reading to use affinity to minimize the number of IPIs
needed.
Before on a large test case with 94 CPUs:
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
3.16 0.106079 4 22082 read
After:
3.43 0.081295 3 22082 read
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-11-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/builtin-stat.c | 97 ++++++++++++++++++++++-----------------
tools/perf/util/evsel.h | 1 +
2 files changed, 57 insertions(+), 41 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index cf8516e701e2..a098c2ebf4ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
-static int read_counter(struct evsel *counter, struct timespec *rs)
+static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
- int ncpus, cpu, thread;
-
- if (target__has_cpu(&target) && !target__has_per_thread(&target))
- ncpus = perf_evsel__nr_cpus(counter);
- else
- ncpus = 1;
+ int thread;
if (!counter->supported)
return -ENOENT;
@@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
- struct perf_counts_values *count;
-
- count = perf_counts(counter->counts, cpu, thread);
-
- /*
- * The leader's group read loads data into its group members
- * (via perf_evsel__read_counter) and sets threir count->loaded.
- */
- if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
- read_single_counter(counter, cpu, thread, rs)) {
- counter->counts->scaled = -1;
- perf_counts(counter->counts, cpu, thread)->ena = 0;
- perf_counts(counter->counts, cpu, thread)->run = 0;
- return -1;
- }
+ struct perf_counts_values *count;
- perf_counts__set_loaded(counter->counts, cpu, thread, false);
+ count = perf_counts(counter->counts, cpu, thread);
- if (STAT_RECORD) {
- if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
- pr_err("failed to write stat event\n");
- return -1;
- }
- }
+ /*
+ * The leader's group read loads data into its group members
+ * (via perf_evsel__read_counter()) and sets their count->loaded.
+ */
+ if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
+ read_single_counter(counter, cpu, thread, rs)) {
+ counter->counts->scaled = -1;
+ perf_counts(counter->counts, cpu, thread)->ena = 0;
+ perf_counts(counter->counts, cpu, thread)->run = 0;
+ return -1;
+ }
+
+ perf_counts__set_loaded(counter->counts, cpu, thread, false);
- if (verbose > 1) {
- fprintf(stat_config.output,
- "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter),
- cpu,
- count->val, count->ena, count->run);
+ if (STAT_RECORD) {
+ if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ pr_err("failed to write stat event\n");
+ return -1;
}
}
+
+ if (verbose > 1) {
+ fprintf(stat_config.output,
+ "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ perf_evsel__name(counter),
+ cpu,
+ count->val, count->ena, count->run);
+ }
}
return 0;
@@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
static void read_counters(struct timespec *rs)
{
struct evsel *counter;
- int ret;
+ struct affinity affinity;
+ int i, ncpus, cpu;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
+ if (!target__has_cpu(&target) || target__has_per_thread(&target))
+ ncpus = 1;
+ evlist__for_each_cpu(evsel_list, i, cpu) {
+ if (i >= ncpus)
+ break;
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evsel_list, counter) {
+ if (evsel__cpu_iter_skip(counter, cpu))
+ continue;
+ if (!counter->err) {
+ counter->err = read_counter_cpu(counter, rs,
+ counter->cpu_iter - 1);
+ }
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evsel_list, counter) {
- ret = read_counter(counter, rs);
- if (ret)
+ if (counter->err)
pr_debug("failed to read counter %s\n", counter->name);
-
- if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+ if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
+ counter->err = 0;
}
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index ca82a93960cd..c8af4bc23f8f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -86,6 +86,7 @@ struct evsel {
struct list_head config_terms;
struct bpf_object *bpf_obj;
int bpf_fd;
+ int err;
bool auto_merge_stats;
bool merged_stat;
const char * metric_expr;
--
2.21.0
next prev parent reply other threads:[~2019-12-03 13:56 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-03 13:55 [GIT PULL] perf/core improvements and fixes Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 01/23] perf cpumap: Maintain cpumaps ordered and without dups Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 02/23] perf evlist: Maintain evlist->all_cpus Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 03/23] perf evsel: Add iterator to iterate over events ordered by CPU Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 04/23] perf evsel: Add functions to close evsel on a CPU Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 05/23] perf stat: Use affinity for closing file descriptors Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 06/23] perf stat: Factor out open error handling Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 07/23] perf stat: Use affinity for opening events Arnaldo Carvalho de Melo
2019-12-03 13:55 ` Arnaldo Carvalho de Melo [this message]
2019-12-03 13:55 ` [PATCH 09/23] perf evsel: Add functions to enable/disable for a specific CPU Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 10/23] perf stat: Use affinity for enabling/disabling events Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 11/23] perf jit: Move test functionality in to a test Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 12/23] perf machine: Fill map_symbol->maps in append_inlines() to fix segfault Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 13/23] perf bench: Update the copies of x86's mem{cpy,set}_64.S Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 14/23] tools arch x86: Sync the msr-index.h copy with the kernel sources Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 15/23] tools headers uapi: Sync linux/fscrypt.h " Arnaldo Carvalho de Melo
2019-12-03 13:55 ` [PATCH 16/23] tools headers uapi: Sync linux/stat.h " Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 17/23] tools headers kvm: Sync kvm headers " Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 18/23] tools headers UAPI: Sync sched.h with the kernel Arnaldo Carvalho de Melo
2019-12-03 15:40 ` Christian Brauner
2019-12-03 13:56 ` [PATCH 19/23] perf beauty: Add CLEAR_SIGHAND support for clone's flags arg Arnaldo Carvalho de Melo
2019-12-03 15:41 ` Christian Brauner
2019-12-03 13:56 ` [PATCH 20/23] tools arch x86: Sync asm/cpufeatures.h with the kernel sources Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 21/23] perf kvm: Clarify the 'perf kvm' -i and -o command line options Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 22/23] libtraceevent: Fix lib installation with O= Arnaldo Carvalho de Melo
2019-12-03 13:56 ` [PATCH 23/23] libtraceevent: Copy pkg-config file to output folder when using O= Arnaldo Carvalho de Melo
2019-12-04 7:51 ` [GIT PULL] perf/core improvements and fixes Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191203135606.24902-9-acme@kernel.org \
--to=acme@kernel.org \
--cc=acme@redhat.com \
--cc=ak@linux.intel.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=tglx@linutronix.de \
--cc=williams@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).