linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] perf stat: add per-core count aggregation
@ 2013-02-12 14:09 Stephane Eranian
  2013-02-12 14:09 ` [PATCH 1/2] perf stat: refactor aggregation code Stephane Eranian
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Stephane Eranian @ 2013-02-12 14:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: peterz, mingo, ak, acme, jolsa, namhyung.kim

This patch series contains improvement to the aggregation support
in perf stat.

First, the aggregation code is refactored and a aggr_mode enum
is defined. There is also an important bug fix for the existing
per-socket aggregation.

Second, the patch adds a new --aggr-core option to perf stat.
It aggregates counts per physical core and becomes useful on
systems with hyper-threading. The cores are presented per
socket: S0-C1, means socket 0 core 1. Note that the core number
represents its physical core id. As such, numbers may not always
be contiguous. All of this is based on topology information available
in sysfs.

Per-core aggregation can be combined with interval printing:

 # perf stat -a --aggr-core -I 1000 -e cycles sleep 100
 #           time core         cpus             counts events
      1.000101160 S0-C0           2      6,051,254,899 cycles                   
      1.000101160 S0-C1           2      6,379,230,776 cycles                   
      1.000101160 S0-C2           2      6,480,268,471 cycles                   
      1.000101160 S0-C3           2      6,110,514,321 cycles                   
      2.000663750 S0-C0           2      6,572,533,016 cycles                   
      2.000663750 S0-C1           2      6,378,623,674 cycles                   
      2.000663750 S0-C2           2      6,264,127,589 cycles                   
      2.000663750 S0-C3           2      6,305,346,613 cycles                   

For instance here on this SNB machine, we can see that the load
is evenly balanced across all 4 physical core (HT is on).

Signed-off-by: Stephane Eranian <eranian@google.com>

-
Stephane Eranian (2):
  perf stat: refactor aggregation code
  perf stat: add per-core aggregation

 tools/perf/Documentation/perf-stat.txt |    6 +
 tools/perf/builtin-stat.c              |  237 ++++++++++++++++++++------------
 tools/perf/util/cpumap.c               |   86 ++++++++++--
 tools/perf/util/cpumap.h               |   12 ++
 4 files changed, 239 insertions(+), 102 deletions(-)

-- 
1.7.9.5


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/2] perf stat: refactor aggregation code
  2013-02-12 14:09 [PATCH 0/2] perf stat: add per-core count aggregation Stephane Eranian
@ 2013-02-12 14:09 ` Stephane Eranian
  2013-02-12 17:26   ` Andi Kleen
  2013-02-13  7:50   ` Namhyung Kim
  2013-02-12 14:09 ` [PATCH 2/2] perf stat: add per-core aggregation Stephane Eranian
  2013-02-12 17:23 ` [PATCH 0/2] perf stat: add per-core count aggregation Andi Kleen
  2 siblings, 2 replies; 10+ messages in thread
From: Stephane Eranian @ 2013-02-12 14:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: peterz, mingo, ak, acme, jolsa, namhyung.kim

Refactor aggregation code by introducing
a single aggr_mode variable and an enum
for aggregation.

Also refactor cpumap code having to do
with cpu to socket mappings. All in preparation
for extended modes, such as cpu -> core.

Also fix socket aggregation and ensure
that sockets are printed in increasing order.

Signed-off-by: Stephane Eranian <eranian@google.com>
---
 tools/perf/builtin-stat.c |  208 ++++++++++++++++++++++++++-------------------
 tools/perf/util/cpumap.c  |   40 ++++++---
 2 files changed, 148 insertions(+), 100 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9984876..a19f8d5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -68,7 +68,7 @@
 static void print_stat(int argc, const char **argv);
 static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
-static void print_aggr_socket(char *prefix);
+static void print_aggr(char *prefix);
 
 static struct perf_evlist	*evsel_list;
 
@@ -76,11 +76,16 @@ static struct perf_target	target = {
 	.uid	= UINT_MAX,
 };
 
+enum aggr_mode {
+	AGGR_NONE,
+	AGGR_GLOBAL,
+	AGGR_SOCKET,
+};
+
 static int			run_count			=  1;
 static bool			no_inherit			= false;
 static bool			scale				=  true;
-static bool			no_aggr				= false;
-static bool			aggr_socket			= false;
+static enum aggr_mode		aggr_mode			= AGGR_GLOBAL;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
@@ -95,7 +100,8 @@ static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
 static unsigned int		interval			= 0;
 static struct timespec		ref_time;
-static struct cpu_map		*sock_map;
+static struct cpu_map		*aggr_map;
+static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
 
 static volatile int done = 0;
 
@@ -297,41 +303,51 @@ static void print_interval(void)
 	struct timespec ts, rs;
 	char prefix[64];
 
-	if (no_aggr) {
+	if (aggr_mode == AGGR_GLOBAL) {
 		list_for_each_entry(counter, &evsel_list->entries, node) {
 			ps = counter->priv;
 			memset(ps->res_stats, 0, sizeof(ps->res_stats));
-			read_counter(counter);
+			read_counter_aggr(counter);
 		}
-	} else {
+	} else	{
 		list_for_each_entry(counter, &evsel_list->entries, node) {
 			ps = counter->priv;
 			memset(ps->res_stats, 0, sizeof(ps->res_stats));
-			read_counter_aggr(counter);
+			read_counter(counter);
 		}
 	}
+
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	diff_timespec(&rs, &ts, &ref_time);
 	sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
 
 	if (num_print_interval == 0 && !csv_output) {
-		if (aggr_socket)
+		switch (aggr_mode) {
+		case AGGR_SOCKET:
 			fprintf(output, "#           time socket cpus             counts events\n");
-		else if (no_aggr)
+			break;
+		case AGGR_NONE:
 			fprintf(output, "#           time CPU                 counts events\n");
-		else
+			break;
+		case AGGR_GLOBAL:
+		default:
 			fprintf(output, "#           time             counts events\n");
+		}
 	}
 
 	if (++num_print_interval == 25)
 		num_print_interval = 0;
 
-	if (aggr_socket)
-		print_aggr_socket(prefix);
-	else if (no_aggr) {
+	switch (aggr_mode) {
+	case AGGR_SOCKET:
+		print_aggr(prefix);
+		break;
+	case AGGR_NONE:
 		list_for_each_entry(counter, &evsel_list->entries, node)
 			print_counter(counter, prefix);
-	} else {
+		break;
+	case AGGR_GLOBAL:
+	default:
 		list_for_each_entry(counter, &evsel_list->entries, node)
 			print_counter_aggr(counter, prefix);
 	}
@@ -356,12 +372,6 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 		ts.tv_nsec = 0;
 	}
 
-	if (aggr_socket
-	    && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
-		perror("cannot build socket map");
-		return -1;
-	}
-
 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
 		perror("failed to create pipes");
 		return -1;
@@ -479,17 +489,18 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 
 	update_stats(&walltime_nsecs_stats, t1 - t0);
 
-	if (no_aggr) {
-		list_for_each_entry(counter, &evsel_list->entries, node) {
-			read_counter(counter);
-			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
-		}
-	} else {
+	if (aggr_mode == AGGR_GLOBAL) {
 		list_for_each_entry(counter, &evsel_list->entries, node) {
 			read_counter_aggr(counter);
 			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
 					     evsel_list->threads->nr);
 		}
+	} else {
+		list_for_each_entry(counter, &evsel_list->entries, node) {
+			read_counter(counter);
+			perf_evsel__close_fd(counter,
+					     perf_evsel__nr_cpus(counter), 1);
+		}
 	}
 
 	return WEXITSTATUS(status);
@@ -542,26 +553,37 @@ static void print_noise(struct perf_evsel *evsel, double avg)
 	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 }
 
-static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
+static void aggr_printout(int cpu, int nr)
 {
-	double msecs = avg / 1e6;
-	char cpustr[16] = { '\0', };
-	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
-
-	if (aggr_socket)
-		sprintf(cpustr, "S%*d%s%*d%s",
+	switch (aggr_mode) {
+	case AGGR_SOCKET:
+		fprintf(output, "S%*d%s%*d%s",
 			csv_output ? 0 : -5,
 			cpu,
 			csv_sep,
 			csv_output ? 0 : 4,
 			nr,
 			csv_sep);
-	else if (no_aggr)
-		sprintf(cpustr, "CPU%*d%s",
+			break;
+	case AGGR_NONE:
+		fprintf(output, "CPU%*d%s",
 			csv_output ? 0 : -4,
 			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
+		break;
+	case AGGR_GLOBAL:
+	default:
+		break;
+	}
+}
+
+static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
+{
+	double msecs = avg / 1e6;
+	const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
 
-	fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));
+	aggr_printout(cpu, nr);
+
+	fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
 
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -758,32 +780,21 @@ static void print_ll_cache_misses(int cpu,
 static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 {
 	double total, ratio = 0.0;
-	char cpustr[16] = { '\0', };
 	const char *fmt;
 
 	if (csv_output)
-		fmt = "%s%.0f%s%s";
+		fmt = "%.0f%s%s";
 	else if (big_num)
-		fmt = "%s%'18.0f%s%-25s";
+		fmt = "%'18.0f%s%-25s";
 	else
-		fmt = "%s%18.0f%s%-25s";
+		fmt = "%18.0f%s%-25s";
 
-	if (aggr_socket)
-		sprintf(cpustr, "S%*d%s%*d%s",
-			csv_output ? 0 : -5,
-			cpu,
-			csv_sep,
-			csv_output ? 0 : 4,
-			nr,
-			csv_sep);
-	else if (no_aggr)
-		sprintf(cpustr, "CPU%*d%s",
-			csv_output ? 0 : -4,
-			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
-	else
+	aggr_printout(cpu, nr);
+
+	if (aggr_mode == AGGR_GLOBAL)
 		cpu = 0;
 
-	fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel));
+	fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
 
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -882,23 +893,23 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	}
 }
 
-static void print_aggr_socket(char *prefix)
+static void print_aggr(char *prefix)
 {
 	struct perf_evsel *counter;
+	int cpu, s, s2, id, nr;
 	u64 ena, run, val;
-	int cpu, s, s2, sock, nr;
 
-	if (!sock_map)
+	if (!(aggr_map || aggr_get_id))
 		return;
 
-	for (s = 0; s < sock_map->nr; s++) {
-		sock = cpu_map__socket(sock_map, s);
+	for (s = 0; s < aggr_map->nr; s++) {
+		id = aggr_map->map[s];
 		list_for_each_entry(counter, &evsel_list->entries, node) {
 			val = ena = run = 0;
 			nr = 0;
 			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
-				s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
-				if (s2 != sock)
+				s2 = aggr_get_id(evsel_list->cpus, cpu);
+				if (s2 != id)
 					continue;
 				val += counter->counts->cpu[cpu].val;
 				ena += counter->counts->cpu[cpu].ena;
@@ -909,18 +920,15 @@ static void print_aggr_socket(char *prefix)
 				fprintf(output, "%s", prefix);
 
 			if (run == 0 || ena == 0) {
-				fprintf(output, "S%*d%s%*d%s%*s%s%*s",
-					csv_output ? 0 : -5,
-					s,
-					csv_sep,
-					csv_output ? 0 : 4,
-					nr,
-					csv_sep,
+				aggr_printout(cpu, nr);
+
+				fprintf(output, "%*s%s%*s",
 					csv_output ? 0 : 18,
 					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 					csv_sep,
 					csv_output ? 0 : -24,
 					perf_evsel__name(counter));
+
 				if (counter->cgrp)
 					fprintf(output, "%s%s",
 						csv_sep, counter->cgrp->name);
@@ -930,9 +938,9 @@ static void print_aggr_socket(char *prefix)
 			}
 
 			if (nsec_counter(counter))
-				nsec_printout(sock, nr, counter, val);
+				nsec_printout(id, nr, counter, val);
 			else
-				abs_printout(sock, nr, counter, val);
+				abs_printout(id, nr, counter, val);
 
 			if (!csv_output) {
 				print_noise(counter, 1.0);
@@ -1073,14 +1081,20 @@ static void print_stat(int argc, const char **argv)
 		fprintf(output, ":\n\n");
 	}
 
-	if (aggr_socket)
-		print_aggr_socket(NULL);
-	else if (no_aggr) {
-		list_for_each_entry(counter, &evsel_list->entries, node)
-			print_counter(counter, NULL);
-	} else {
+	switch (aggr_mode) {
+	case AGGR_SOCKET:
+		print_aggr(NULL);
+		break;
+	case AGGR_GLOBAL:
 		list_for_each_entry(counter, &evsel_list->entries, node)
 			print_counter_aggr(counter, NULL);
+		break;
+	case AGGR_NONE:
+		list_for_each_entry(counter, &evsel_list->entries, node)
+			print_counter(counter, NULL);
+		break;
+	default:
+		break;
 	}
 
 	if (!csv_output) {
@@ -1126,6 +1140,25 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
 	return 0;
 }
 
+static int perf_stat_init_aggr_mode(void)
+{
+	switch (aggr_mode) {
+	case AGGR_SOCKET:
+		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
+			perror("cannot build socket map");
+			return -1;
+		}
+		aggr_get_id = cpu_map__get_socket;
+		break;
+	case AGGR_NONE:
+	case AGGR_GLOBAL:
+	default:
+		break;
+	}
+	return 0;
+}
+
+
 /*
  * Add default attributes, if there were no attributes specified or
  * if -d/--detailed, -d -d or -d -d -d is used:
@@ -1308,7 +1341,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 			   stat__set_big_num),
 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
 		    "list of cpus to monitor in system-wide"),
-	OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"),
+	OPT_SET_UINT('A', "no-aggr", &aggr_mode,
+		    "disable CPU count aggregation", AGGR_NONE),
 	OPT_STRING('x', "field-separator", &csv_sep, "separator",
 		   "print counts with custom separator"),
 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
@@ -1323,7 +1357,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 			"command to run after to the measured command"),
 	OPT_UINTEGER('I', "interval-print", &interval,
 		    "print counts at regular interval in ms (>= 100)"),
-	OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
+	OPT_SET_UINT(0, "aggr-socket", &aggr_mode,
+		     "aggregate counts per processor socket", AGGR_SOCKET),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
@@ -1403,19 +1438,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		usage_with_options(stat_usage, options);
 
 	/* no_aggr, cgroup are for system-wide only */
-	if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) {
+	if ((aggr_mode != AGGR_GLOBAL || nr_cgroups)
+	     && !perf_target__has_cpu(&target)) {
 		fprintf(stderr, "both cgroup and no-aggregation "
 			"modes only available in system-wide mode\n");
 
 		usage_with_options(stat_usage, options);
-	}
-
-	if (aggr_socket) {
-		if (!perf_target__has_cpu(&target)) {
-			fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
-			usage_with_options(stat_usage, options);
-		}
-		no_aggr = true;
+		return -1;
 	}
 
 	if (add_default_attributes())
@@ -1450,6 +1479,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 	}
 
+	if (perf_stat_init_aggr_mode())
+		goto out;
+
 	/*
 	 * We dont want to block the signals - that would cause
 	 * child tasks to inherit that and Ctrl-C would not work.
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index f817046..7bb8e87 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -4,6 +4,7 @@
 #include "cpumap.h"
 #include <assert.h>
 #include <stdio.h>
+#include <stdlib.h>
 
 static struct cpu_map *cpu_map__default_new(void)
 {
@@ -219,7 +220,7 @@ int cpu_map__get_socket(struct cpu_map *map, int idx)
 	if (!mnt)
 		return -1;
 
-	sprintf(path,
+	snprintf(path, PATH_MAX,
 		"%s/devices/system/cpu/cpu%d/topology/physical_package_id",
 		mnt, cpu);
 
@@ -231,27 +232,42 @@ int cpu_map__get_socket(struct cpu_map *map, int idx)
 	return ret == 1 ? cpu : -1;
 }
 
-int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
+static int cmp_ids(const void *a, const void *b)
 {
-	struct cpu_map *sock;
+	return *(int *)a - *(int *)b;
+}
+
+static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
+			      int (*f)(struct cpu_map *map, int cpu))
+{
+	struct cpu_map *c;
 	int nr = cpus->nr;
 	int cpu, s1, s2;
 
-	sock = calloc(1, sizeof(*sock) + nr * sizeof(int));
-	if (!sock)
+	/* allocate as much as possible */
+	c = calloc(1, sizeof(*c) + nr * sizeof(int));
+	if (!c)
 		return -1;
 
 	for (cpu = 0; cpu < nr; cpu++) {
-		s1 = cpu_map__get_socket(cpus, cpu);
-		for (s2 = 0; s2 < sock->nr; s2++) {
-			if (s1 == sock->map[s2])
+		s1 = f(cpus, cpu);
+		for (s2 = 0; s2 < c->nr; s2++) {
+			if (s1 == c->map[s2])
 				break;
 		}
-		if (s2 == sock->nr) {
-			sock->map[sock->nr] = s1;
-			sock->nr++;
+		if (s2 == c->nr) {
+			c->map[c->nr] = s1;
+			c->nr++;
 		}
 	}
-	*sockp = sock;
+	/* ensure we process id in increasing order */
+	qsort(c->map, c->nr, sizeof(int), cmp_ids);
+
+	*res = c;
 	return 0;
 }
+
+int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
+{
+	return cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
+}
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] perf stat: add per-core aggregation
  2013-02-12 14:09 [PATCH 0/2] perf stat: add per-core count aggregation Stephane Eranian
  2013-02-12 14:09 ` [PATCH 1/2] perf stat: refactor aggregation code Stephane Eranian
@ 2013-02-12 14:09 ` Stephane Eranian
  2013-02-12 17:23 ` [PATCH 0/2] perf stat: add per-core count aggregation Andi Kleen
  2 siblings, 0 replies; 10+ messages in thread
From: Stephane Eranian @ 2013-02-12 14:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: peterz, mingo, ak, acme, jolsa, namhyung.kim

This patch adds the --aggr-core option to perf stat.

This option is used to aggregate system-wide counts
on a per physical core basis. On processors with
hyperthreading, this means counts of all HT threads
running on a physical core are aggregated.

This mode is useful to find imblance between physical
cores running an uniform workload. Cores are identified
by socket: S0-C1, means physical core 1 on socket 0. Note
that cores are identified using their physical core id,
thus their numbering may not be continuous.

Per core aggregation can be combined with interval printing:

 # perf stat -a --aggr-core -I 1000 -e cycles sleep 1000
 #           time core         cpus             counts events
      1.000090030 S0-C0           1          4,765,747 cycles
      1.000090030 S0-C1           1          5,580,647 cycles
      1.000090030 S0-C2           1            221,181 cycles
      1.000090030 S0-C3           1            266,092 cycles

Signed-off-by: Stephane Eranian <eranian@google.com>
---
 tools/perf/Documentation/perf-stat.txt |    6 +++++
 tools/perf/builtin-stat.c              |   37 ++++++++++++++++++++-----
 tools/perf/util/cpumap.c               |   46 ++++++++++++++++++++++++++++++++
 tools/perf/util/cpumap.h               |   12 +++++++++
 4 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index faf4f4f..d96408f 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -126,6 +126,12 @@ use --aggr-socket in addition to -a. (system-wide).  The output includes the
 socket number and the number of online processors on that socket. This is
 useful to gauge the amount of aggregation.
 
+--aggr-core::
+Aggregate counts per physical processor for system-wide mode measurements.  This
+is a useful mode to detect imbalance between physical cores.  To enable this mode,
+use --aggr-core in addition to -a. (system-wide).  The output includes the
+core number and the number of online logical processors on that physical processor.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a19f8d5..2a62ee7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -80,6 +80,7 @@ enum aggr_mode {
 	AGGR_NONE,
 	AGGR_GLOBAL,
 	AGGR_SOCKET,
+	AGGR_CORE,
 };
 
 static int			run_count			=  1;
@@ -326,6 +327,9 @@ static void print_interval(void)
 		case AGGR_SOCKET:
 			fprintf(output, "#           time socket cpus             counts events\n");
 			break;
+		case AGGR_CORE:
+			fprintf(output, "#           time core         cpus             counts events\n");
+			break;
 		case AGGR_NONE:
 			fprintf(output, "#           time CPU                 counts events\n");
 			break;
@@ -339,6 +343,7 @@ static void print_interval(void)
 		num_print_interval = 0;
 
 	switch (aggr_mode) {
+	case AGGR_CORE:
 	case AGGR_SOCKET:
 		print_aggr(prefix);
 		break;
@@ -553,13 +558,23 @@ static void print_noise(struct perf_evsel *evsel, double avg)
 	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 }
 
-static void aggr_printout(int cpu, int nr)
+static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 {
 	switch (aggr_mode) {
+	case AGGR_CORE:
+		fprintf(output, "S%d-C%*d%s%*d%s",
+			cpu_map__id_to_socket(id),
+			csv_output ? 0 : -8,
+			cpu_map__id_to_cpu(id),
+			csv_sep,
+			csv_output ? 0 : 4,
+			nr,
+			csv_sep);
+		break;
 	case AGGR_SOCKET:
 		fprintf(output, "S%*d%s%*d%s",
 			csv_output ? 0 : -5,
-			cpu,
+			id,
 			csv_sep,
 			csv_output ? 0 : 4,
 			nr,
@@ -568,7 +583,7 @@ static void aggr_printout(int cpu, int nr)
 	case AGGR_NONE:
 		fprintf(output, "CPU%*d%s",
 			csv_output ? 0 : -4,
-			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
+			perf_evsel__cpus(evsel)->map[id], csv_sep);
 		break;
 	case AGGR_GLOBAL:
 	default:
@@ -581,7 +596,7 @@ static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	double msecs = avg / 1e6;
 	const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
 
-	aggr_printout(cpu, nr);
+	aggr_printout(evsel, cpu, nr);
 
 	fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
 
@@ -789,7 +804,7 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	else
 		fmt = "%18.0f%s%-25s";
 
-	aggr_printout(cpu, nr);
+	aggr_printout(evsel, cpu, nr);
 
 	if (aggr_mode == AGGR_GLOBAL)
 		cpu = 0;
@@ -920,7 +935,7 @@ static void print_aggr(char *prefix)
 				fprintf(output, "%s", prefix);
 
 			if (run == 0 || ena == 0) {
-				aggr_printout(cpu, nr);
+				aggr_printout(counter, cpu, nr);
 
 				fprintf(output, "%*s%s%*s",
 					csv_output ? 0 : 18,
@@ -1082,6 +1097,7 @@ static void print_stat(int argc, const char **argv)
 	}
 
 	switch (aggr_mode) {
+	case AGGR_CORE:
 	case AGGR_SOCKET:
 		print_aggr(NULL);
 		break;
@@ -1150,6 +1166,13 @@ static int perf_stat_init_aggr_mode(void)
 		}
 		aggr_get_id = cpu_map__get_socket;
 		break;
+	case AGGR_CORE:
+		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
+			perror("cannot build core map");
+			return -1;
+		}
+		aggr_get_id = cpu_map__get_core;
+		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
 	default:
@@ -1359,6 +1382,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "print counts at regular interval in ms (>= 100)"),
 	OPT_SET_UINT(0, "aggr-socket", &aggr_mode,
 		     "aggregate counts per processor socket", AGGR_SOCKET),
+	OPT_SET_UINT(0, "aggr-core", &aggr_mode,
+		     "aggregate counts per physical processor core", AGGR_CORE),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 7bb8e87..beb8cf9 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -267,7 +267,53 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
 	return 0;
 }
 
+int cpu_map__get_core(struct cpu_map *map, int idx)
+{
+	FILE *fp;
+	const char *mnt;
+	char path[PATH_MAX];
+	int cpu, ret, s;
+
+	if (idx > map->nr)
+		return -1;
+
+	cpu = map->map[idx];
+
+	mnt = sysfs_find_mountpoint();
+	if (!mnt)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		"%s/devices/system/cpu/cpu%d/topology/core_id",
+		mnt, cpu);
+
+	fp = fopen(path, "r");
+	if (!fp)
+		return -1;
+	ret = fscanf(fp, "%d", &cpu);
+	fclose(fp);
+	if (ret != 1)
+		return -1;
+
+	s = cpu_map__get_socket(map, idx);
+	if (s == -1)
+		return -1;
+
+	/*
+	 * encode socket in upper 16 bits
+	 * core_id is relative to socket, and
+	 * we need a global id. So we combine
+	 * socket + core id
+	 */
+	return (s << 16) | (cpu & 0xffff);
+}
+
 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
 {
 	return cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
 }
+
+int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
+{
+	return cpu_map__build_map(cpus, corep, cpu_map__get_core);
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 161b007..9bed02e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -15,7 +15,9 @@ void cpu_map__delete(struct cpu_map *map);
 struct cpu_map *cpu_map__read(FILE *file);
 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
 int cpu_map__get_socket(struct cpu_map *map, int idx);
+int cpu_map__get_core(struct cpu_map *map, int idx);
 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
+int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
 
 static inline int cpu_map__socket(struct cpu_map *sock, int s)
 {
@@ -24,6 +26,16 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
 	return sock->map[s];
 }
 
+static inline int cpu_map__id_to_socket(int id)
+{
+	return id >> 16;
+}
+
+static inline int cpu_map__id_to_cpu(int id)
+{
+	return id & 0xffff;
+}
+
 static inline int cpu_map__nr(const struct cpu_map *map)
 {
 	return map ? map->nr : 1;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/2] perf stat: add per-core count aggregation
  2013-02-12 14:09 [PATCH 0/2] perf stat: add per-core count aggregation Stephane Eranian
  2013-02-12 14:09 ` [PATCH 1/2] perf stat: refactor aggregation code Stephane Eranian
  2013-02-12 14:09 ` [PATCH 2/2] perf stat: add per-core aggregation Stephane Eranian
@ 2013-02-12 17:23 ` Andi Kleen
  2013-02-12 17:26   ` Stephane Eranian
  2 siblings, 1 reply; 10+ messages in thread
From: Andi Kleen @ 2013-02-12 17:23 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: linux-kernel, peterz, mingo, acme, jolsa, namhyung.kim

On Tue, Feb 12, 2013 at 03:09:26PM +0100, Stephane Eranian wrote:
> This patch series contains improvement to the aggregation support
> in perf stat.
> 
> First, the aggregation code is refactored and a aggr_mode enum
> is defined. There is also an important bug fix for the existing
> per-socket aggregation.
> 
> Second, the patch adds a new --aggr-core option to perf stat.

Perhaps it's just me, but the option name is ugly (and sounds
aggressive)

--per-core perhaps?

The idea itself is useful.

> It aggregates counts per physical core and becomes useful on
> systems with hyper-threading. The cores are presented per
> socket: S0-C1, means socket 0 core 1. Note that the core number
> represents its physical core id. As such, numbers may not always
> be contiguous. All of this is based on topology information available
> in sysfs.
> 
> Per-core aggregation can be combined with interval printing:

FWIW this would be much nicer if stat had a Kevents or Mevents mode.
Usually we don't need all the digits.  But that could be added separately

Does it work for multiple events in parallel?
> 
>  # perf stat -a --aggr-core -I 1000 -e cycles sleep 100
>  #           time core         cpus             counts events
>       1.000101160 S0-C0           2      6,051,254,899 cycles                   
>       1.000101160 S0-C1           2      6,379,230,776 cycles                   
>       1.000101160 S0-C2           2      6,480,268,471 cycles                   
>       1.000101160 S0-C3           2      6,110,514,321 cycles                   
>       2.000663750 S0-C0           2      6,572,533,016 cycles                   
>       2.000663750 S0-C1           2      6,378,623,674 cycles                   
>       2.000663750 S0-C2           2      6,264,127,589 cycles                   
>       2.000663750 S0-C3           2      6,305,346,613 cycles                   

-Andi 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/2] perf stat: add per-core count aggregation
  2013-02-12 17:23 ` [PATCH 0/2] perf stat: add per-core count aggregation Andi Kleen
@ 2013-02-12 17:26   ` Stephane Eranian
  2013-02-12 17:29     ` Andi Kleen
  0 siblings, 1 reply; 10+ messages in thread
From: Stephane Eranian @ 2013-02-12 17:26 UTC (permalink / raw)
  To: Andi Kleen
  Cc: LKML, Peter Zijlstra, mingo, Arnaldo Carvalho de Melo, Jiri Olsa,
	Namhyung Kim

On Tue, Feb 12, 2013 at 6:23 PM, Andi Kleen <ak@linux.intel.com> wrote:
> On Tue, Feb 12, 2013 at 03:09:26PM +0100, Stephane Eranian wrote:
>> This patch series contains improvement to the aggregation support
>> in perf stat.
>>
>> First, the aggregation code is refactored and a aggr_mode enum
>> is defined. There is also an important bug fix for the existing
>> per-socket aggregation.
>>
>> Second, the patch adds a new --aggr-core option to perf stat.
>
> Perhaps it's just me, but the option name is ugly (and sounds
> aggressive)
>
> --per-core perhaps?
>
I chose that name to be similar to ---aggr-socket.
But we could change both at this point.


> The idea itself is useful.
>
Yes, it is.

>> It aggregates counts per physical core and becomes useful on
>> systems with hyper-threading. The cores are presented per
>> socket: S0-C1, means socket 0 core 1. Note that the core number
>> represents its physical core id. As such, numbers may not always
>> be contiguous. All of this is based on topology information available
>> in sysfs.
>>
>> Per-core aggregation can be combined with interval printing:
>
> FWIW this would be much nicer if stat had a Kevents or Mevents mode.
> Usually we don't need all the digits.  But that could be added separately
>
> Does it work for multiple events in parallel?

Yes, it does. It's all regular perf stat.


>>
>>  # perf stat -a --aggr-core -I 1000 -e cycles sleep 100
>>  #           time core         cpus             counts events
>>       1.000101160 S0-C0           2      6,051,254,899 cycles
>>       1.000101160 S0-C1           2      6,379,230,776 cycles
>>       1.000101160 S0-C2           2      6,480,268,471 cycles
>>       1.000101160 S0-C3           2      6,110,514,321 cycles
>>       2.000663750 S0-C0           2      6,572,533,016 cycles
>>       2.000663750 S0-C1           2      6,378,623,674 cycles
>>       2.000663750 S0-C2           2      6,264,127,589 cycles
>>       2.000663750 S0-C3           2      6,305,346,613 cycles
>
> -Andi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] perf stat: refactor aggregation code
  2013-02-12 14:09 ` [PATCH 1/2] perf stat: refactor aggregation code Stephane Eranian
@ 2013-02-12 17:26   ` Andi Kleen
  2013-02-13  7:50   ` Namhyung Kim
  1 sibling, 0 replies; 10+ messages in thread
From: Andi Kleen @ 2013-02-12 17:26 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: linux-kernel, peterz, mingo, acme, jolsa, namhyung.kim

On Tue, Feb 12, 2013 at 03:09:27PM +0100, Stephane Eranian wrote:
> -		fmt = "%s%.0f%s%s";
> +		fmt = "%.0f%s%s";
>  	else if (big_num)
> -		fmt = "%s%'18.0f%s%-25s";
> +		fmt = "%'18.0f%s%-25s";
>  	else
> -		fmt = "%s%18.0f%s%-25s";
> +		fmt = "%18.0f%s%-25s";

I realize the code was like this before, but it's better to not 
use variable sprintf formats, as you lose all the compile
time checking of the compiler and mistakes in printf are common. 
Better to duplicate the sprintf.

The rest looks good to me.

Reviewed-by: Andi Kleen <ak@linux.intel.com>

-Andi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/2] perf stat: add per-core count aggregation
  2013-02-12 17:26   ` Stephane Eranian
@ 2013-02-12 17:29     ` Andi Kleen
  2013-02-12 17:33       ` Stephane Eranian
  0 siblings, 1 reply; 10+ messages in thread
From: Andi Kleen @ 2013-02-12 17:29 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: LKML, Peter Zijlstra, mingo, Arnaldo Carvalho de Melo, Jiri Olsa,
	Namhyung Kim

> 
> > The idea itself is useful.
> >
> Yes, it is.

BTW it would be even more useful if it could print some of the
statistics turbostat does (in particular frequency and C0 residency)
Often you only care about cycles not idle, and the frequency
tells you how fast the cycles happen.

I think Cx could be added relatively easily as a software event,
but frequency doesn't fit very well into the perf counting model,
as it's really sampling.

-Andi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/2] perf stat: add per-core count aggregation
  2013-02-12 17:29     ` Andi Kleen
@ 2013-02-12 17:33       ` Stephane Eranian
  0 siblings, 0 replies; 10+ messages in thread
From: Stephane Eranian @ 2013-02-12 17:33 UTC (permalink / raw)
  To: Andi Kleen
  Cc: LKML, Peter Zijlstra, mingo, Arnaldo Carvalho de Melo, Jiri Olsa,
	Namhyung Kim

On Tue, Feb 12, 2013 at 6:29 PM, Andi Kleen <ak@linux.intel.com> wrote:
>>
>> > The idea itself is useful.
>> >
>> Yes, it is.
>
> BTW it would be even more useful if it could print some of the
> statistics turbostat does (in particular frequency and C0 residency)
> Often you only care about cycles not idle, and the frequency
> tells you how fast the cycles happen.
>
You get C-state residency at the socket level with my perf uncore
patch.

This patch is just about helping with unbalance at the physical
core level.

> I think Cx could be added relatively easily as a software event,
> but frequency doesn't fit very well into the perf counting model,
> as it's really sampling.
>
> -Andi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] perf stat: refactor aggregation code
  2013-02-12 14:09 ` [PATCH 1/2] perf stat: refactor aggregation code Stephane Eranian
  2013-02-12 17:26   ` Andi Kleen
@ 2013-02-13  7:50   ` Namhyung Kim
  2013-02-13  9:38     ` Stephane Eranian
  1 sibling, 1 reply; 10+ messages in thread
From: Namhyung Kim @ 2013-02-13  7:50 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, peterz, mingo, ak, acme, jolsa, namhyung.kim

Hi Stephane,

On Tue, 12 Feb 2013 15:09:27 +0100, Stephane Eranian wrote:
> Refactor aggregation code by introducing
> a single aggr_mode variable and an enum
> for aggregation.
>
> Also refactor cpumap code having to do
> with cpu to socket mappings. All in preparation
> for extended modes, such as cpu -> core.
>
> Also fix socket aggregation and ensure
> that sockets are printed in increasing order.
[snip]
> -static void print_aggr_socket(char *prefix)
> +static void print_aggr(char *prefix)
>  {
>  	struct perf_evsel *counter;
> +	int cpu, s, s2, id, nr;
>  	u64 ena, run, val;
> -	int cpu, s, s2, sock, nr;
>  
> -	if (!sock_map)
> +	if (!(aggr_map || aggr_get_id))
>  		return;
>  
> -	for (s = 0; s < sock_map->nr; s++) {
> -		sock = cpu_map__socket(sock_map, s);
> +	for (s = 0; s < aggr_map->nr; s++) {
> +		id = aggr_map->map[s];
>  		list_for_each_entry(counter, &evsel_list->entries, node) {
>  			val = ena = run = 0;
>  			nr = 0;
...
> @@ -1073,14 +1081,20 @@ static void print_stat(int argc, const char **argv)
>  		fprintf(output, ":\n\n");
>  	}
>  
> -	if (aggr_socket)
> -		print_aggr_socket(NULL);
> -	else if (no_aggr) {
> -		list_for_each_entry(counter, &evsel_list->entries, node)
> -			print_counter(counter, NULL);
> -	} else {
> +	switch (aggr_mode) {
> +	case AGGR_SOCKET:
> +		print_aggr(NULL);
> +		break;

This line should look like this IMHO:

		list_for_each_entry(counter, &evsel_list->entries, node)
			print_aggr(counter, NULL);

and the equivalent loop in the print_aggr() should be removed.  This is
for consistency with other formats if multiple events counted.

For instance, it'd sorting on events first and then sockets like:

  #      time socket cpus     counts events
           t0 S0     4        XXXXXX cycles
           t0 S1     4        XXXXXX cycles
           t0 S0     4          YYYY cache-misses
           t0 S1     4          YYYY cache-misses
           t1 S0     4        ZZZZZZ cycles
  ...

But current code looks like sorting on sockets first instead.

  #      time socket cpus     counts events
           t0 S0     4        XXXXXX cycles
           t0 S0     4          YYYY cache-misses
           t0 S1     4        XXXXXX cycles
           t0 S1     4          YYYY cache-misses
           t1 S0     4        ZZZZZZ cycles
  ...

Thanks,
Namhyung


> +	case AGGR_GLOBAL:
>  		list_for_each_entry(counter, &evsel_list->entries, node)
>  			print_counter_aggr(counter, NULL);
> +		break;
> +	case AGGR_NONE:
> +		list_for_each_entry(counter, &evsel_list->entries, node)
> +			print_counter(counter, NULL);
> +		break;
> +	default:
> +		break;
>  	}

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] perf stat: refactor aggregation code
  2013-02-13  7:50   ` Namhyung Kim
@ 2013-02-13  9:38     ` Stephane Eranian
  0 siblings, 0 replies; 10+ messages in thread
From: Stephane Eranian @ 2013-02-13  9:38 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: LKML, Peter Zijlstra, mingo, ak, Arnaldo Carvalho de Melo,
	Jiri Olsa, Namhyung Kim

On Wed, Feb 13, 2013 at 8:50 AM, Namhyung Kim <namhyung@kernel.org> wrote:
> Hi Stephane,
>
> On Tue, 12 Feb 2013 15:09:27 +0100, Stephane Eranian wrote:
>> Refactor aggregation code by introducing
>> a single aggr_mode variable and an enum
>> for aggregation.
>>
>> Also refactor cpumap code having to do
>> with cpu to socket mappings. All in preparation
>> for extended modes, such as cpu -> core.
>>
>> Also fix socket aggregation and ensure
>> that sockets are printed in increasing order.
> [snip]
>> -static void print_aggr_socket(char *prefix)
>> +static void print_aggr(char *prefix)
>>  {
>>       struct perf_evsel *counter;
>> +     int cpu, s, s2, id, nr;
>>       u64 ena, run, val;
>> -     int cpu, s, s2, sock, nr;
>>
>> -     if (!sock_map)
>> +     if (!(aggr_map || aggr_get_id))
>>               return;
>>
>> -     for (s = 0; s < sock_map->nr; s++) {
>> -             sock = cpu_map__socket(sock_map, s);
>> +     for (s = 0; s < aggr_map->nr; s++) {
>> +             id = aggr_map->map[s];
>>               list_for_each_entry(counter, &evsel_list->entries, node) {
>>                       val = ena = run = 0;
>>                       nr = 0;
> ...
>> @@ -1073,14 +1081,20 @@ static void print_stat(int argc, const char **argv)
>>               fprintf(output, ":\n\n");
>>       }
>>
>> -     if (aggr_socket)
>> -             print_aggr_socket(NULL);
>> -     else if (no_aggr) {
>> -             list_for_each_entry(counter, &evsel_list->entries, node)
>> -                     print_counter(counter, NULL);
>> -     } else {
>> +     switch (aggr_mode) {
>> +     case AGGR_SOCKET:
>> +             print_aggr(NULL);
>> +             break;
>
> This line should look like this IMHO:
>
>                 list_for_each_entry(counter, &evsel_list->entries, node)
>                         print_aggr(counter, NULL);
>
> and the equivalent loop in the print_aggr() should be removed.  This is
> for consistency with other formats if multiple events counted.
>
Sounds good. I will make the change.

> For instance, it'd sorting on events first and then sockets like:
>
>   #      time socket cpus     counts events
>            t0 S0     4        XXXXXX cycles
>            t0 S1     4        XXXXXX cycles
>            t0 S0     4          YYYY cache-misses
>            t0 S1     4          YYYY cache-misses
>            t1 S0     4        ZZZZZZ cycles
>   ...
>
> But current code looks like sorting on sockets first instead.
>
>   #      time socket cpus     counts events
>            t0 S0     4        XXXXXX cycles
>            t0 S0     4          YYYY cache-misses
>            t0 S1     4        XXXXXX cycles
>            t0 S1     4          YYYY cache-misses
>            t1 S0     4        ZZZZZZ cycles
>   ...
>
> Thanks,
> Namhyung
>
>
>> +     case AGGR_GLOBAL:
>>               list_for_each_entry(counter, &evsel_list->entries, node)
>>                       print_counter_aggr(counter, NULL);
>> +             break;
>> +     case AGGR_NONE:
>> +             list_for_each_entry(counter, &evsel_list->entries, node)
>> +                     print_counter(counter, NULL);
>> +             break;
>> +     default:
>> +             break;
>>       }

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-02-13  9:38 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-12 14:09 [PATCH 0/2] perf stat: add per-core count aggregation Stephane Eranian
2013-02-12 14:09 ` [PATCH 1/2] perf stat: refactor aggregation code Stephane Eranian
2013-02-12 17:26   ` Andi Kleen
2013-02-13  7:50   ` Namhyung Kim
2013-02-13  9:38     ` Stephane Eranian
2013-02-12 14:09 ` [PATCH 2/2] perf stat: add per-core aggregation Stephane Eranian
2013-02-12 17:23 ` [PATCH 0/2] perf stat: add per-core count aggregation Andi Kleen
2013-02-12 17:26   ` Stephane Eranian
2013-02-12 17:29     ` Andi Kleen
2013-02-12 17:33       ` Stephane Eranian

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).