linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: linux-kernel@vger.kernel.org
Cc: acme@redhat.com, peterz@infradead.org, jolsa@redhat.com,
	eranian@google.com, mingo@kernel.org, namhyung@kernel.org,
	Andi Kleen <ak@linux.intel.com>
Subject: [PATCH 29/32] perf, tools: Add perf stat --transaction v2
Date: Fri,  9 Nov 2012 17:27:45 -0800	[thread overview]
Message-ID: <1352510868-7911-30-git-send-email-andi@firstfloor.org> (raw)
In-Reply-To: <1352510868-7911-1-git-send-email-andi@firstfloor.org>

From: Andi Kleen <ak@linux.intel.com>

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the intx and intx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |    4 +
 tools/perf/builtin-stat.c              |  101 +++++++++++++++++++++++++++++++-
 tools/perf/util/evsel.h                |    6 ++
 3 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index cf0c310..0d5b8cb 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -114,6 +114,10 @@ with it.  --append may be used here.  Examples:
 
 perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
 
 EXAMPLES
 --------
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6888960..6dfc8f8 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,29 @@
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
 
+static const char *transaction_attrs[] = {
+	"task-clock",
+	"{"
+	"instructions,"
+	"cycles,"
+	"cpu/cycles-t/,"
+	"cpu/cycles-ct/,"
+	"cpu/tx-start/,"
+	"cpu/el-start/"
+	"}"
+};
+
+/* must match the transaction_attrs above */
+enum {
+	T_TASK_CLOCK,
+	T_INSTRUCTIONS,
+	T_CYCLES,
+	T_CYCLES_INTX,
+	T_CYCLES_INTX_CP,
+	T_TRANSACTION_START,
+	T_ELISION_START
+};
+
 static struct perf_evlist	*evsel_list;
 
 static struct perf_target	target = {
@@ -78,6 +101,7 @@ static bool			no_aggr				= false;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
+static bool			transaction_run			=  false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
@@ -127,7 +151,11 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_intx_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_intxcp_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static int create_perf_stat_counter(struct perf_evsel *evsel,
 				    struct perf_evsel *first)
@@ -187,6 +215,18 @@ static inline int nsec_counter(struct perf_evsel *evsel)
 	return 0;
 }
 
+static struct perf_evsel *nth_evsel(int n)
+{
+	struct perf_evsel *ev;
+	int j;
+
+	j = 0;
+	list_for_each_entry (ev, &evsel_list->entries, node)
+		if (j++ == n)
+			return ev;
+	return NULL;
+}
+
 /*
  * Update various tracking values we maintain to print
  * more semantic information such as miss/hit ratios,
@@ -198,8 +238,14 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 		update_stats(&runtime_nsecs_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 		update_stats(&runtime_cycles_stats[0], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
+	else if (perf_evsel__cmp(counter, nth_evsel(T_CYCLES_INTX)))
+		update_stats(&runtime_cycles_intx_stats[0], count[0]);
+	else if (perf_evsel__cmp(counter, nth_evsel(T_CYCLES_INTX_CP)))
+		update_stats(&runtime_cycles_intxcp_stats[0], count[0]);
+	else if (perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
+		update_stats(&runtime_transaction_stats[0], count[0]);
+	else if (perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
+		update_stats(&runtime_elision_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
@@ -663,7 +709,7 @@ static void print_ll_cache_misses(int cpu,
 
 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
-	double total, ratio = 0.0;
+	double total, ratio = 0.0, total2;
 	char cpustr[16] = { '\0', };
 	const char *fmt;
 
@@ -763,6 +809,41 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 			ratio = 1.0 * avg / total;
 
 		fprintf(output, " # %8.3f GHz                    ", ratio);
+	} else if (perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_INTX)) &&
+		   transaction_run) {
+		total = avg_stats(&runtime_cycles_stats[cpu]);
+		if (total)
+			fprintf(output,
+				" #   %5.2f%% transactional cycles   ",
+				100.0 * (avg / total));
+	} else if (perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_INTX_CP)) &&
+		   transaction_run) {
+		total = avg_stats(&runtime_cycles_stats[cpu]);
+		total2 = avg_stats(&runtime_cycles_intx_stats[cpu]);
+		if (total)
+			fprintf(output,
+				" #   %5.2f%% aborted cycles         ",
+				100.0 * ((total2-avg) / total));
+	} else if (perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
+		   avg > 0 &&
+		   runtime_cycles_intx_stats[cpu].n != 0 &&
+		   transaction_run) {
+		total = avg_stats(&runtime_cycles_intx_stats[cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(output, " # %8.0f cycles / transaction ", ratio);
+	} else if (perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
+		   avg > 0 &&
+		   runtime_cycles_intx_stats[cpu].n != 0 &&
+		   transaction_run) {
+		total = avg_stats(&runtime_cycles_intx_stats[cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(output, " # %8.0f cycles / elision     ", ratio);
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		char unit = 'M';
 
@@ -1069,6 +1150,18 @@ static int add_default_attributes(void)
 	if (null_run)
 		return 0;
 
+	if (transaction_run) {
+		unsigned i;
+
+		for (i = 0; i < ARRAY_SIZE(transaction_attrs); i++) {
+			if (parse_events(evsel_list, transaction_attrs[i], 0)) {
+				fprintf(stderr, "Cannot set up transaction events\n");
+				return -1;
+			}
+		}
+		return 0;
+	}
+
 	if (!evsel_list->nr_entries) {
 		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
 			return -1;
@@ -1147,6 +1240,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 			"command to run prior to the measured command"),
 	OPT_STRING(0, "post", &post_cmd, "command",
 			"command to run after to the measured command"),
+	OPT_BOOLEAN('T', "transaction", &transaction_run,
+		    "hardware transaction statistics"),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6f94d6d..418405e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -158,6 +158,12 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1,
 	       (e1->attr.config == e2->attr.config);
 }
 
+#define perf_evsel__cmp(a, b)			\
+	((a) &&					\
+	 (b) &&					\
+	 (a)->attr.type == (b)->attr.type &&	\
+	 (a)->attr.config == (b)->attr.config)
+
 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 			      int cpu, int thread, bool scale);
 
-- 
1.7.7.6


  parent reply	other threads:[~2012-11-10  1:29 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-10  1:27 perf PMU support for Haswell v6 Andi Kleen
2012-11-10  1:27 ` [PATCH 01/32] perf, x86: Add PEBSv2 record support Andi Kleen
2012-11-10  1:27 ` [PATCH 02/32] perf, x86: Basic Haswell PMU support v2 Andi Kleen
2012-11-10  1:27 ` [PATCH 03/32] perf, x86: Basic Haswell PEBS support v3 Andi Kleen
2012-11-10  1:27 ` [PATCH 04/32] perf, x86: Support the TSX intx/intx_cp qualifiers v2 Andi Kleen
2012-11-10  1:27 ` [PATCH 05/32] perf, kvm: Support the intx/intx_cp modifiers in KVM arch perfmon emulation v4 Andi Kleen
2012-11-12 12:43   ` Gleb Natapov
2012-11-10  1:27 ` [PATCH 06/32] perf, x86: Support PERF_SAMPLE_ADDR on Haswell Andi Kleen
2012-11-10  1:27 ` [PATCH 07/32] perf, x86: Support Haswell v4 LBR format Andi Kleen
2012-11-10  1:27 ` [PATCH 08/32] perf, x86: Disable LBR recording for unknown LBR_FMT Andi Kleen
2012-11-10  1:27 ` [PATCH 09/32] perf, x86: Support LBR filtering by INTX/NOTX/ABORT v2 Andi Kleen
2012-11-10  1:27 ` [PATCH 10/32] perf, tools: Add abort_tx,no_tx,in_tx branch filter options to perf record -j v3 Andi Kleen
2012-11-10  1:27 ` [PATCH 11/32] perf, tools: Support sorting by intx, abort branch flags Andi Kleen
2012-11-10  1:27 ` [PATCH 12/32] perf, x86: Support full width counting Andi Kleen
2012-11-10  1:27 ` [PATCH 13/32] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3 Andi Kleen
2012-11-10  1:27 ` [PATCH 14/32] perf, core: Add a concept of a weightened sample Andi Kleen
2012-11-10  1:27 ` [PATCH 15/32] perf, x86: Support weight samples for PEBS Andi Kleen
2012-11-10  1:27 ` [PATCH 16/32] perf, tools: Add support for weight v3 Andi Kleen
2012-11-10  1:27 ` [PATCH 17/32] perf, tools: Handle XBEGIN like a jump Andi Kleen
2012-11-10  1:27 ` [PATCH 18/32] perf, x86: Support for printing PMU state on spurious PMIs v3 Andi Kleen
2012-11-10  1:27 ` [PATCH 19/32] perf, core: Add generic transaction flags v2 Andi Kleen
2012-11-10  1:27 ` [PATCH 20/32] perf, x86: Add Haswell specific transaction flag reporting Andi Kleen
2012-11-10  1:27 ` [PATCH 21/32] perf, tools: Add support for record transaction flags v2 Andi Kleen
2012-11-10  1:27 ` [PATCH 22/32] perf, tools: Point --sort documentation to --help Andi Kleen
2012-11-10  1:27 ` [PATCH 23/32] perf, tools: Add browser support for transaction flags v3 Andi Kleen
2012-11-10  1:27 ` [PATCH 24/32] perf, tools: Add arbitary aliases and support names with - Andi Kleen
2012-11-10  1:27 ` [PATCH 25/32] tools, perf: Add a precise event qualifier Andi Kleen
2012-11-28 15:40   ` Jiri Olsa
2012-11-28 20:37     ` Andi Kleen
2012-11-10  1:27 ` [PATCH 26/32] perf, x86: improve sysfs event mapping with event string Andi Kleen
2012-11-10  1:27 ` [PATCH 27/32] perf, x86: Support CPU specific sysfs events Andi Kleen
2012-11-10  1:27 ` [PATCH 28/32] perf, x86: Add Haswell TSX event aliases v2 Andi Kleen
2012-11-10  1:27 ` Andi Kleen [this message]
2012-11-10  1:27 ` [PATCH 30/32] perf, x86: Add a Haswell precise instructions event v2 Andi Kleen
2012-11-10  1:27 ` [PATCH 31/32] perf, tools: Default to cpu// for events v3 Andi Kleen
2012-11-28 15:53   ` Jiri Olsa
2012-11-28 19:04     ` Andi Kleen
2012-11-28 19:42       ` Arnaldo Carvalho de Melo
2012-11-10  1:27 ` [PATCH 32/32] perf, tools: List kernel supplied event aliases in perf list v2 Andi Kleen
  -- strict thread matches above, loose matches on Subject: below --
2012-10-31  0:33 perf PMU support for Haswell v5 Andi Kleen
2012-10-31  0:34 ` [PATCH 29/32] perf, tools: Add perf stat --transaction v2 Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1352510868-7911-30-git-send-email-andi@firstfloor.org \
    --to=andi@firstfloor.org \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).