All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stephane Eranian <eranian@google.com>
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, mingo@elte.hu, ak@linux.intel.com,
	acme@redhat.com, jolsa@redhat.com, namhyung.kim@lge.com,
	Stephane Eranian <eranian@gmail.com>
Subject: [PATCH v2 3/3] perf stat: add per-core aggregation
Date: Thu, 14 Feb 2013 13:57:29 +0100	[thread overview]
Message-ID: <1360846649-6411-4-git-send-email-eranian@google.com> (raw)
In-Reply-To: <1360846649-6411-1-git-send-email-eranian@google.com>

From: Stephane Eranian <eranian@gmail.com>

This patch adds the --per-core option to perf stat.

This option is used to aggregate system-wide counts
on a per physical core basis. On processors with
hyperthreading, this means counts of all HT threads
running on a physical core are aggregated.

This mode is useful to find imblance between physical
cores running an uniform workload. Cores are identified
by socket: S0-C1, means physical core 1 on socket 0. Note
that cores are identified using their physical core id,
thus their numbering may not be continuous.

Per core aggregation can be combined with interval printing:

 # perf stat -a --per-core -I 1000 -e cycles sleep 1000
 #           time core         cpus             counts events
      1.000090030 S0-C0           1          4,765,747 cycles
      1.000090030 S0-C1           1          5,580,647 cycles
      1.000090030 S0-C2           1            221,181 cycles
      1.000090030 S0-C3           1            266,092 cycles

Signed-off-by: Stephane Eranian <eranian@google.com>
---
 tools/perf/Documentation/perf-stat.txt |    6 +++++
 tools/perf/builtin-stat.c              |   37 ++++++++++++++++++++-----
 tools/perf/util/cpumap.c               |   46 ++++++++++++++++++++++++++++++++
 tools/perf/util/cpumap.h               |   12 +++++++++
 4 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 01117c5..8059d43 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -126,6 +126,12 @@ use --per-socket in addition to -a. (system-wide).  The output includes the
 socket number and the number of online processors on that socket. This is
 useful to gauge the amount of aggregation.
 
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements.  This
+is a useful mode to detect imbalance between physical cores.  To enable this mode,
+use --per-core in addition to -a. (system-wide).  The output includes the
+core number and the number of online logical processors on that physical processor.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 508d9b4..578f711 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -80,6 +80,7 @@ enum aggr_mode {
 	AGGR_NONE,
 	AGGR_GLOBAL,
 	AGGR_SOCKET,
+	AGGR_CORE,
 };
 
 static int			run_count			=  1;
@@ -326,6 +327,9 @@ static void print_interval(void)
 		case AGGR_SOCKET:
 			fprintf(output, "#           time socket cpus             counts events\n");
 			break;
+		case AGGR_CORE:
+			fprintf(output, "#           time core         cpus             counts events\n");
+			break;
 		case AGGR_NONE:
 			fprintf(output, "#           time CPU                 counts events\n");
 			break;
@@ -339,6 +343,7 @@ static void print_interval(void)
 		num_print_interval = 0;
 
 	switch (aggr_mode) {
+	case AGGR_CORE:
 	case AGGR_SOCKET:
 		print_aggr(prefix);
 		break;
@@ -553,13 +558,23 @@ static void print_noise(struct perf_evsel *evsel, double avg)
 	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 }
 
-static void aggr_printout(int cpu, int nr)
+static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 {
 	switch (aggr_mode) {
+	case AGGR_CORE:
+		fprintf(output, "S%d-C%*d%s%*d%s",
+			cpu_map__id_to_socket(id),
+			csv_output ? 0 : -8,
+			cpu_map__id_to_cpu(id),
+			csv_sep,
+			csv_output ? 0 : 4,
+			nr,
+			csv_sep);
+		break;
 	case AGGR_SOCKET:
 		fprintf(output, "S%*d%s%*d%s",
 			csv_output ? 0 : -5,
-			cpu,
+			id,
 			csv_sep,
 			csv_output ? 0 : 4,
 			nr,
@@ -568,7 +583,7 @@ static void aggr_printout(int cpu, int nr)
 	case AGGR_NONE:
 		fprintf(output, "CPU%*d%s",
 			csv_output ? 0 : -4,
-			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
+			perf_evsel__cpus(evsel)->map[id], csv_sep);
 		break;
 	case AGGR_GLOBAL:
 	default:
@@ -581,7 +596,7 @@ static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	double msecs = avg / 1e6;
 	const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
 
-	aggr_printout(cpu, nr);
+	aggr_printout(evsel, cpu, nr);
 
 	fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
 
@@ -789,7 +804,7 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	else
 		fmt = "%18.0f%s%-25s";
 
-	aggr_printout(cpu, nr);
+	aggr_printout(evsel, cpu, nr);
 
 	if (aggr_mode == AGGR_GLOBAL)
 		cpu = 0;
@@ -920,7 +935,7 @@ static void print_aggr(char *prefix)
 				fprintf(output, "%s", prefix);
 
 			if (run == 0 || ena == 0) {
-				aggr_printout(cpu, nr);
+				aggr_printout(counter, cpu, nr);
 
 				fprintf(output, "%*s%s%*s",
 					csv_output ? 0 : 18,
@@ -1082,6 +1097,7 @@ static void print_stat(int argc, const char **argv)
 	}
 
 	switch (aggr_mode) {
+	case AGGR_CORE:
 	case AGGR_SOCKET:
 		print_aggr(NULL);
 		break;
@@ -1150,6 +1166,13 @@ static int perf_stat_init_aggr_mode(void)
 		}
 		aggr_get_id = cpu_map__get_socket;
 		break;
+	case AGGR_CORE:
+		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
+			perror("cannot build core map");
+			return -1;
+		}
+		aggr_get_id = cpu_map__get_core;
+		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
 	default:
@@ -1359,6 +1382,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "print counts at regular interval in ms (>= 100)"),
 	OPT_SET_UINT(0, "per-socket", &aggr_mode,
 		     "aggregate counts per processor socket", AGGR_SOCKET),
+	OPT_SET_UINT(0, "per-core", &aggr_mode,
+		     "aggregate counts per physical processor core", AGGR_CORE),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 7bb8e87..beb8cf9 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -267,7 +267,53 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
 	return 0;
 }
 
+int cpu_map__get_core(struct cpu_map *map, int idx)
+{
+	FILE *fp;
+	const char *mnt;
+	char path[PATH_MAX];
+	int cpu, ret, s;
+
+	if (idx > map->nr)
+		return -1;
+
+	cpu = map->map[idx];
+
+	mnt = sysfs_find_mountpoint();
+	if (!mnt)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		"%s/devices/system/cpu/cpu%d/topology/core_id",
+		mnt, cpu);
+
+	fp = fopen(path, "r");
+	if (!fp)
+		return -1;
+	ret = fscanf(fp, "%d", &cpu);
+	fclose(fp);
+	if (ret != 1)
+		return -1;
+
+	s = cpu_map__get_socket(map, idx);
+	if (s == -1)
+		return -1;
+
+	/*
+	 * encode socket in upper 16 bits
+	 * core_id is relative to socket, and
+	 * we need a global id. So we combine
+	 * socket+ core id
+	 */
+	return (s << 16) | (cpu & 0xffff);
+}
+
 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
 {
 	return cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
 }
+
+int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
+{
+	return cpu_map__build_map(cpus, corep, cpu_map__get_core);
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 161b007..9bed02e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -15,7 +15,9 @@ void cpu_map__delete(struct cpu_map *map);
 struct cpu_map *cpu_map__read(FILE *file);
 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
 int cpu_map__get_socket(struct cpu_map *map, int idx);
+int cpu_map__get_core(struct cpu_map *map, int idx);
 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
+int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
 
 static inline int cpu_map__socket(struct cpu_map *sock, int s)
 {
@@ -24,6 +26,16 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
 	return sock->map[s];
 }
 
+static inline int cpu_map__id_to_socket(int id)
+{
+	return id >> 16;
+}
+
+static inline int cpu_map__id_to_cpu(int id)
+{
+	return id & 0xffff;
+}
+
 static inline int cpu_map__nr(const struct cpu_map *map)
 {
 	return map ? map->nr : 1;
-- 
1.7.9.5


  parent reply	other threads:[~2013-02-14 12:57 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-14 12:57 [PATCH v2 0/3] perf stat: add per-core count aggregation Stephane Eranian
2013-02-14 12:57 ` [PATCH v2 1/3] perf stat: refactor aggregation code Stephane Eranian
2013-03-07 21:38   ` Jiri Olsa
2013-03-25 16:22   ` Arnaldo Carvalho de Melo
2013-04-02  9:33   ` [tip:perf/core] perf stat: Refactor " tip-bot for Stephane Eranian
2013-02-14 12:57 ` [PATCH v2 2/3] perf stat: rename --aggr-socket to --per-socket Stephane Eranian
2013-04-02  9:34   ` [tip:perf/core] perf stat: Rename " tip-bot for Stephane Eranian
2013-02-14 12:57 ` Stephane Eranian [this message]
2013-04-02  9:36   ` [tip:perf/core] perf stat: Add per-core aggregation tip-bot for Stephane Eranian
2013-03-07 16:22 ` [PATCH v2 0/3] perf stat: add per-core count aggregation Stephane Eranian
2013-03-25 13:57   ` Stephane Eranian

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1360846649-6411-4-git-send-email-eranian@google.com \
    --to=eranian@google.com \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=eranian@gmail.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=namhyung.kim@lge.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.