All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: jolsa@kernel.org
Cc: acme@kernel.org, linux-kernel@vger.kernel.org,
	Andi Kleen <ak@linux.intel.com>
Subject: [PATCH v5 11/13] perf stat: Use affinity for reading
Date: Thu,  7 Nov 2019 10:16:44 -0800	[thread overview]
Message-ID: <20191107181646.506734-12-andi@firstfloor.org> (raw)
In-Reply-To: <20191107181646.506734-1-andi@firstfloor.org>

From: Andi Kleen <ak@linux.intel.com>

Restructure event reading to use affinity to minimize the number
of IPIs needed.

Before on a large test case with 94 CPUs:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
  3.16    0.106079           4     22082           read

After:

  3.43    0.081295           3     22082           read

Signed-off-by: Andi Kleen <ak@linux.intel.com>

---

v2: Use new iterator macros
v3: Use new iterator macros
v4: Change iterator macros even more
---
 tools/perf/builtin-stat.c | 94 ++++++++++++++++++++++-----------------
 tools/perf/util/evsel.h   |  1 +
 2 files changed, 54 insertions(+), 41 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7f9ec41d8f62..dff817d714f6 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
  * Read out the results of a single counter:
  * do not aggregate counts across CPUs in system-wide mode
  */
-static int read_counter(struct evsel *counter, struct timespec *rs)
+static int read_counter(struct evsel *counter, struct timespec *rs, int cpu)
 {
 	int nthreads = perf_thread_map__nr(evsel_list->core.threads);
-	int ncpus, cpu, thread;
-
-	if (target__has_cpu(&target) && !target__has_per_thread(&target))
-		ncpus = perf_evsel__nr_cpus(counter);
-	else
-		ncpus = 1;
+	int thread;
 
 	if (!counter->supported)
 		return -ENOENT;
@@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
 		nthreads = 1;
 
 	for (thread = 0; thread < nthreads; thread++) {
-		for (cpu = 0; cpu < ncpus; cpu++) {
-			struct perf_counts_values *count;
-
-			count = perf_counts(counter->counts, cpu, thread);
-
-			/*
-			 * The leader's group read loads data into its group members
-			 * (via perf_evsel__read_counter) and sets threir count->loaded.
-			 */
-			if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
-			    read_single_counter(counter, cpu, thread, rs)) {
-				counter->counts->scaled = -1;
-				perf_counts(counter->counts, cpu, thread)->ena = 0;
-				perf_counts(counter->counts, cpu, thread)->run = 0;
-				return -1;
-			}
+		struct perf_counts_values *count;
 
-			perf_counts__set_loaded(counter->counts, cpu, thread, false);
+		count = perf_counts(counter->counts, cpu, thread);
 
-			if (STAT_RECORD) {
-				if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
-					pr_err("failed to write stat event\n");
-					return -1;
-				}
-			}
+		/*
+		 * The leader's group read loads data into its group members
+		 * (via perf_evsel__read_counter) and sets threir count->loaded.
+		 */
+		if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
+		    read_single_counter(counter, cpu, thread, rs)) {
+			counter->counts->scaled = -1;
+			perf_counts(counter->counts, cpu, thread)->ena = 0;
+			perf_counts(counter->counts, cpu, thread)->run = 0;
+			return -1;
+		}
+
+		perf_counts__set_loaded(counter->counts, cpu, thread, false);
 
-			if (verbose > 1) {
-				fprintf(stat_config.output,
-					"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-						perf_evsel__name(counter),
-						cpu,
-						count->val, count->ena, count->run);
+		if (STAT_RECORD) {
+			if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+				pr_err("failed to write stat event\n");
+				return -1;
 			}
 		}
+
+		if (verbose > 1) {
+			fprintf(stat_config.output,
+				"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+					perf_evsel__name(counter),
+					cpu,
+					count->val, count->ena, count->run);
+		}
 	}
 
 	return 0;
@@ -325,15 +318,34 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
 static void read_counters(struct timespec *rs)
 {
 	struct evsel *counter;
-	int ret;
+	struct affinity affinity;
+	int i, ncpus, cpu;
+
+	if (affinity__setup(&affinity) < 0)
+		return;
+
+	ncpus = evsel_list->core.all_cpus->nr;
+	if (!(target__has_cpu(&target) && !target__has_per_thread(&target)))
+		ncpus = 1;
+	evlist__for_each_cpu (evsel_list, i, cpu) {
+		if (i >= ncpus)
+			break;
+		affinity__set(&affinity, cpu);
+
+		evlist__for_each_entry(evsel_list, counter) {
+			if (evsel__cpu_iter_skip(counter, cpu))
+				continue;
+			counter->err = read_counter(counter, rs, counter->cpu_iter - 1);
+		}
+	}
+	affinity__cleanup(&affinity);
 
 	evlist__for_each_entry(evsel_list, counter) {
-		ret = read_counter(counter, rs);
-		if (ret)
+		if (counter->err)
 			pr_debug("failed to read counter %s\n", counter->name);
-
-		if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+		if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
 			pr_warning("failed to process counter %s\n", counter->name);
+		counter->err = 0;
 	}
 }
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index ca82a93960cd..c8af4bc23f8f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -86,6 +86,7 @@ struct evsel {
 	struct list_head	config_terms;
 	struct bpf_object	*bpf_obj;
 	int			bpf_fd;
+	int			err;
 	bool			auto_merge_stats;
 	bool			merged_stat;
 	const char *		metric_expr;
-- 
2.23.0


  parent reply	other threads:[~2019-11-07 18:17 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-07 18:16 Optimize perf stat for large number of events/cpus Andi Kleen
2019-11-07 18:16 ` [PATCH v5 01/13] perf pmu: Use file system cache to optimize sysfs access Andi Kleen
2019-11-07 18:16 ` [PATCH v5 02/13] perf affinity: Add infrastructure to save/restore affinity Andi Kleen
2019-11-07 18:16 ` [PATCH v5 03/13] perf cpumap: Maintain cpumaps ordered and without dups Andi Kleen
2019-11-07 18:16 ` [PATCH v5 04/13] perf evlist: Maintain evlist->all_cpus Andi Kleen
2019-11-11 13:31   ` Jiri Olsa
2019-11-07 18:16 ` [PATCH v5 05/13] perf evsel: Add iterator to iterate over events ordered by CPU Andi Kleen
2019-11-07 18:16 ` [PATCH v5 06/13] perf evsel: Add functions to close evsel on a CPU Andi Kleen
2019-11-07 18:16 ` [PATCH v5 07/13] perf stat: Use affinity for closing file descriptors Andi Kleen
2019-11-11 13:30   ` Jiri Olsa
2019-11-11 16:56     ` Andi Kleen
2019-11-07 18:16 ` [PATCH v5 08/13] perf stat: Factor out open error handling Andi Kleen
2019-11-07 18:16 ` [PATCH v5 09/13] perf evsel: Support opening on a specific CPU Andi Kleen
2019-11-11 13:30   ` Jiri Olsa
2019-11-12  0:41     ` Andi Kleen
2019-11-07 18:16 ` [PATCH v5 10/13] perf stat: Use affinity for opening events Andi Kleen
2019-11-11 13:30   ` Jiri Olsa
2019-11-11 13:30   ` Jiri Olsa
2019-11-11 13:31   ` Jiri Olsa
2019-11-11 17:02     ` Andi Kleen
2019-11-07 18:16 ` Andi Kleen [this message]
2019-11-11 13:31   ` [PATCH v5 11/13] perf stat: Use affinity for reading Jiri Olsa
2019-11-07 18:16 ` [PATCH v5 12/13] perf evsel: Add functions to enable/disable for a specific CPU Andi Kleen
2019-11-11 13:30   ` Jiri Olsa
2019-11-07 18:16 ` [PATCH v5 13/13] perf stat: Use affinity for enabling/disabling events Andi Kleen
2019-11-11 14:04   ` Jiri Olsa
2019-11-11 16:50     ` Andi Kleen
2019-11-11 20:06       ` Jiri Olsa
2019-11-11 23:31         ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191107181646.506734-12-andi@firstfloor.org \
    --to=andi@firstfloor.org \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.