linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: acme@kernel.org
Cc: jolsa@kernel.org, linux-kernel@vger.kernel.org,
	Andi Kleen <ak@linux.intel.com>
Subject: [PATCH 10/12] perf stat: Use affinity for reading
Date: Wed, 20 Nov 2019 16:15:20 -0800	[thread overview]
Message-ID: <20191121001522.180827-11-andi@firstfloor.org> (raw)
In-Reply-To: <20191121001522.180827-1-andi@firstfloor.org>

From: Andi Kleen <ak@linux.intel.com>

Restructure event reading to use affinity to minimize the number
of IPIs needed.

Before on a large test case with 94 CPUs:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
  3.16    0.106079           4     22082           read

After:

  3.43    0.081295           3     22082           read

Signed-off-by: Andi Kleen <ak@linux.intel.com>

---

v2: Use new iterator macros
v3: Use new iterator macros
v4: Change iterator macros even more
v5: Preserve counter->err in all cases
v6: Rename read_counter -> read_counter_cpu
v7: Use perf_cpu_map__nr. Add {} brackets to multiline statement.
---
 tools/perf/builtin-stat.c | 97 ++++++++++++++++++++++-----------------
 tools/perf/util/evsel.h   |  1 +
 2 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5ff9c8b0de38..17dc4c686f8a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
  * Read out the results of a single counter:
  * do not aggregate counts across CPUs in system-wide mode
  */
-static int read_counter(struct evsel *counter, struct timespec *rs)
+static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
 {
 	int nthreads = perf_thread_map__nr(evsel_list->core.threads);
-	int ncpus, cpu, thread;
-
-	if (target__has_cpu(&target) && !target__has_per_thread(&target))
-		ncpus = perf_evsel__nr_cpus(counter);
-	else
-		ncpus = 1;
+	int thread;
 
 	if (!counter->supported)
 		return -ENOENT;
@@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
 		nthreads = 1;
 
 	for (thread = 0; thread < nthreads; thread++) {
-		for (cpu = 0; cpu < ncpus; cpu++) {
-			struct perf_counts_values *count;
-
-			count = perf_counts(counter->counts, cpu, thread);
-
-			/*
-			 * The leader's group read loads data into its group members
-			 * (via perf_evsel__read_counter) and sets threir count->loaded.
-			 */
-			if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
-			    read_single_counter(counter, cpu, thread, rs)) {
-				counter->counts->scaled = -1;
-				perf_counts(counter->counts, cpu, thread)->ena = 0;
-				perf_counts(counter->counts, cpu, thread)->run = 0;
-				return -1;
-			}
+		struct perf_counts_values *count;
 
-			perf_counts__set_loaded(counter->counts, cpu, thread, false);
+		count = perf_counts(counter->counts, cpu, thread);
 
-			if (STAT_RECORD) {
-				if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
-					pr_err("failed to write stat event\n");
-					return -1;
-				}
-			}
+		/*
+		 * The leader's group read loads data into its group members
+		 * (via perf_evsel__read_counter) and sets threir count->loaded.
+		 */
+		if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
+		    read_single_counter(counter, cpu, thread, rs)) {
+			counter->counts->scaled = -1;
+			perf_counts(counter->counts, cpu, thread)->ena = 0;
+			perf_counts(counter->counts, cpu, thread)->run = 0;
+			return -1;
+		}
 
-			if (verbose > 1) {
-				fprintf(stat_config.output,
-					"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-						perf_evsel__name(counter),
-						cpu,
-						count->val, count->ena, count->run);
+		perf_counts__set_loaded(counter->counts, cpu, thread, false);
+
+		if (STAT_RECORD) {
+			if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+				pr_err("failed to write stat event\n");
+				return -1;
 			}
 		}
+
+		if (verbose > 1) {
+			fprintf(stat_config.output,
+				"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+					perf_evsel__name(counter),
+					cpu,
+					count->val, count->ena, count->run);
+		}
 	}
 
 	return 0;
@@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
 static void read_counters(struct timespec *rs)
 {
 	struct evsel *counter;
-	int ret;
+	struct affinity affinity;
+	int i, ncpus, cpu;
+
+	if (affinity__setup(&affinity) < 0)
+		return;
+
+	ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
+	if (!target__has_cpu(&target) || target__has_per_thread(&target))
+		ncpus = 1;
+	evlist__for_each_cpu (evsel_list, i, cpu) {
+		if (i >= ncpus)
+			break;
+		affinity__set(&affinity, cpu);
+
+		evlist__for_each_entry(evsel_list, counter) {
+			if (evsel__cpu_iter_skip(counter, cpu))
+				continue;
+			if (!counter->err) {
+				counter->err = read_counter_cpu(counter, rs,
+								counter->cpu_iter - 1);
+			}
+		}
+	}
+	affinity__cleanup(&affinity);
 
 	evlist__for_each_entry(evsel_list, counter) {
-		ret = read_counter(counter, rs);
-		if (ret)
+		if (counter->err)
 			pr_debug("failed to read counter %s\n", counter->name);
-
-		if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+		if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
 			pr_warning("failed to process counter %s\n", counter->name);
+		counter->err = 0;
 	}
 }
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index ca82a93960cd..c8af4bc23f8f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -86,6 +86,7 @@ struct evsel {
 	struct list_head	config_terms;
 	struct bpf_object	*bpf_obj;
 	int			bpf_fd;
+	int			err;
 	bool			auto_merge_stats;
 	bool			merged_stat;
 	const char *		metric_expr;
-- 
2.23.0


  parent reply	other threads:[~2019-11-21  0:15 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-21  0:15 Optimize perf stat for large number of events/cpus Andi Kleen
2019-11-21  0:15 ` [PATCH 01/12] perf pmu: Use file system cache to optimize sysfs access Andi Kleen
2019-11-29  6:02   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 02/12] perf affinity: Add infrastructure to save/restore affinity Andi Kleen
2019-11-29  6:02   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 03/12] perf cpumap: Maintain cpumaps ordered and without dups Andi Kleen
2019-12-04  7:53   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 04/12] perf evlist: Maintain evlist->all_cpus Andi Kleen
2019-12-04  7:53   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 05/12] perf evsel: Add iterator to iterate over events ordered by CPU Andi Kleen
2019-12-04  7:53   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 06/12] perf evsel: Add functions to close evsel on a CPU Andi Kleen
2019-12-04  7:53   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 07/12] perf stat: Use affinity for closing file descriptors Andi Kleen
2019-12-04  7:53   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 08/12] perf stat: Factor out open error handling Andi Kleen
2019-12-04  7:53   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 09/12] perf stat: Use affinity for opening events Andi Kleen
2019-12-04  7:53   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-12-18  9:29   ` [perf stat] cc9cdf40ae: perf-sanity-tests.Event_times.fail kernel test robot
2019-11-21  0:15 ` Andi Kleen [this message]
2019-12-04  7:53   ` [tip: perf/urgent] perf stat: Use affinity for reading tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 11/12] perf evsel: Add functions to enable/disable for a specific CPU Andi Kleen
2019-12-04  7:53   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21  0:15 ` [PATCH 12/12] perf stat: Use affinity for enabling/disabling events Andi Kleen
2019-12-04  7:53   ` [tip: perf/urgent] " tip-bot2 for Andi Kleen
2019-11-21 12:47 ` Optimize perf stat for large number of events/cpus Andi Kleen
2019-11-21 14:32   ` Arnaldo Carvalho de Melo
2019-11-27 15:16 ` Arnaldo Carvalho de Melo
2019-11-27 15:43   ` Arnaldo Carvalho de Melo
2019-11-27 23:26     ` Andi Kleen
2019-11-28  0:01       ` Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191121001522.180827-11-andi@firstfloor.org \
    --to=andi@firstfloor.org \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --subject='Re: [PATCH 10/12] perf stat: Use affinity for reading' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
on how to clone and mirror all data and code used for this inbox