linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Basic perf PMU support for Haswell v1
@ 2013-01-25 22:00 Andi Kleen
  2013-01-25 22:00 ` [PATCH 01/12] perf, x86: Add PEBSv2 record support Andi Kleen
                   ` (12 more replies)
  0 siblings, 13 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo; +Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung

This is based on v7 of the full Haswell PMU support, but 
ported to the latest perf/core and stripped down to the "basic support"
as requested. 

I decided to include LBRs in the basic support. These are 4 patches
self contained at the end, so could be also handled as a separate
unit if that is preferable.

Contains support for:
- Basic Haswell PMU and PEBS support
- Late unmasking of the PMI
- Support for wide counters
- New TSX counter flags, exported as sysfs attributes
- Support for checkpointed counters.
- New LBR flags, exported as new flags with tools support

Available from 
git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc hsw/pmu4-basics

For more details on the Haswell PMU please see the SDM. For more details on TSX
please see http://halobates.de/adding-lock-elision-to-linux.pdf

-Andi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 01/12] perf, x86: Add PEBSv2 record support
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-28 13:15   ` Stephane Eranian
  2013-01-31 17:15   ` Stephane Eranian
  2013-01-25 22:00 ` [PATCH 02/12] perf, x86: Basic Haswell PMU support v2 Andi Kleen
                   ` (11 subsequent siblings)
  12 siblings, 2 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add support for the v2 PEBS format. It has a superset of the v1 PEBS
fields, but has a longer record so we need to adjust the code paths.

The main advantage is the new "EventingRip" support which directly
gives the instruction, not off-by-one instruction. So with precise == 2
we use that directly and don't try to use LBRs and walking basic blocks.
This lowers the overhead significantly.

Some other features are added in later patches.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.c          |    2 +-
 arch/x86/kernel/cpu/perf_event_intel_ds.c |  101 ++++++++++++++++++++++-------
 2 files changed, 79 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6774c17..c95290a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -397,7 +397,7 @@ int x86_pmu_hw_config(struct perf_event *event)
 		 * check that PEBS LBR correction does not conflict with
 		 * whatever the user is asking with attr->branch_sample_type
 		 */
-		if (event->attr.precise_ip > 1) {
+		if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
 			u64 *br_type = &event->attr.branch_sample_type;
 
 			if (has_branch_stack(event)) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 826054a..9d0dae0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -41,6 +41,12 @@ struct pebs_record_nhm {
 	u64 status, dla, dse, lat;
 };
 
+struct pebs_record_v2 {
+	struct pebs_record_nhm nhm;
+	u64 eventingrip;
+	u64 tsx_tuning;
+};
+
 void init_debug_store_on_cpu(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -559,8 +565,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 {
 	/*
 	 * We cast to pebs_record_core since that is a subset of
-	 * both formats and we don't use the other fields in this
-	 * routine.
+	 * both formats.
 	 */
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct pebs_record_core *pebs = __pebs;
@@ -588,7 +593,10 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	regs.bp = pebs->bp;
 	regs.sp = pebs->sp;
 
-	if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
+	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
+		regs.ip = ((struct pebs_record_v2 *)pebs)->eventingrip;
+		regs.flags |= PERF_EFLAGS_EXACT;
+	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
 		regs.flags |= PERF_EFLAGS_EXACT;
 	else
 		regs.flags &= ~PERF_EFLAGS_EXACT;
@@ -641,35 +649,21 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	__intel_pmu_pebs_event(event, iregs, at);
 }
 
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_common(struct pt_regs *iregs, void *at,
+					void *top)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
-	struct pebs_record_nhm *at, *top;
 	struct perf_event *event = NULL;
 	u64 status = 0;
-	int bit, n;
-
-	if (!x86_pmu.pebs_active)
-		return;
-
-	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
-	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+	int bit;
 
 	ds->pebs_index = ds->pebs_buffer_base;
 
-	n = top - at;
-	if (n <= 0)
-		return;
+	for ( ; at < top; at += x86_pmu.pebs_record_size) {
+		struct pebs_record_nhm *p = at;
 
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
-
-	for ( ; at < top; at++) {
-		for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
+		for_each_set_bit(bit, (unsigned long *)&p->status, x86_pmu.max_pebs_events) {
 			event = cpuc->events[bit];
 			if (!test_bit(bit, cpuc->active_mask))
 				continue;
@@ -692,6 +686,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	}
 }
 
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct pebs_record_nhm *at, *top;
+	int n;
+
+	if (!x86_pmu.pebs_active)
+		return;
+
+	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
+	ds->pebs_index = ds->pebs_buffer_base;
+
+	n = top - at;
+	if (n <= 0)
+		return;
+
+	/*
+	 * Should not happen, we program the threshold at 1 and do not
+	 * set a reset value.
+	 */
+	WARN_ONCE(n > x86_pmu.max_pebs_events,
+		  "Unexpected number of pebs records %d\n", n);
+
+	return intel_pmu_drain_pebs_common(iregs, at, top);
+}
+
+static void intel_pmu_drain_pebs_v2(struct pt_regs *iregs)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct pebs_record_v2 *at, *top;
+	int n;
+
+	if (!x86_pmu.pebs_active)
+		return;
+
+	at  = (struct pebs_record_v2 *)(unsigned long)ds->pebs_buffer_base;
+	top = (struct pebs_record_v2 *)(unsigned long)ds->pebs_index;
+
+	n = top - at;
+	if (n <= 0)
+		return;
+	/*
+	 * Should not happen, we program the threshold at 1 and do not
+	 * set a reset value.
+	 */
+	WARN_ONCE(n > x86_pmu.max_pebs_events,
+		  "Unexpected number of pebs records %d\n", n);
+
+	return intel_pmu_drain_pebs_common(iregs, at, top);
+}
+
 /*
  * BTS, PEBS probe and setup
  */
@@ -723,6 +772,12 @@ void intel_ds_init(void)
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
 			break;
 
+		case 2:
+			printk(KERN_CONT "PEBS fmt2%c, ", pebs_type);
+			x86_pmu.pebs_record_size = sizeof(struct pebs_record_v2);
+			x86_pmu.drain_pebs = intel_pmu_drain_pebs_v2;
+			break;
+
 		default:
 			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
 			x86_pmu.pebs = 0;
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 02/12] perf, x86: Basic Haswell PMU support v2
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
  2013-01-25 22:00 ` [PATCH 01/12] perf, x86: Add PEBSv2 record support Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-28 15:34   ` Stephane Eranian
  2013-01-25 22:00 ` [PATCH 03/12] perf, x86: Basic Haswell PEBS support v3 Andi Kleen
                   ` (10 subsequent siblings)
  12 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add basic Haswell PMU support.

Similar to SandyBridge, but has a few new events. Further
differences are handled in followon patches.

There are some new counter flags that need to be prevented
from being set on fixed counters.

Contains fixes from Stephane Eranian

v2: Folded TSX bits into standard FIXED_EVENT_CONSTRAINTS
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/include/asm/perf_event.h      |    3 +++
 arch/x86/kernel/cpu/perf_event.h       |    5 ++++-
 arch/x86/kernel/cpu/perf_event_intel.c |   29 +++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf..4003bb6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,6 +29,9 @@
 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
 
+#define HSW_INTX					(1ULL << 32)
+#define HSW_INTX_CHECKPOINTED				(1ULL << 33)
+
 #define AMD_PERFMON_EVENTSEL_GUESTONLY			(1ULL << 40)
 #define AMD_PERFMON_EVENTSEL_HOSTONLY			(1ULL << 41)
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 115c1ea..8941899 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -219,11 +219,14 @@ struct cpu_hw_events {
  *  - inv
  *  - edge
  *  - cnt-mask
+ *  - intx
+ *  - intx_cp
  *  The other filters are supported by fixed counters.
  *  The any-thread option is supported starting with v3.
  */
+#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_INTX|HSW_INTX_CHECKPOINTED)
 #define FIXED_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
+	EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
 
 /*
  * Constraint on the Event code + UMask
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 93b9e11..3a08534 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -133,6 +133,17 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
+static struct event_constraint intel_hsw_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
+	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+	EVENT_CONSTRAINT_END
+};
+
 static u64 intel_pmu_event_map(int hw_event)
 {
 	return intel_perfmon_event_map[hw_event];
@@ -2107,6 +2118,24 @@ __init int intel_pmu_init(void)
 		break;
 
 
+	case 60: /* Haswell Client */
+	case 70:
+	case 71:
+		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		intel_pmu_lbr_init_nhm();
+
+		x86_pmu.event_constraints = intel_hsw_event_constraints;
+
+		x86_pmu.extra_regs = intel_snb_extra_regs;
+		/* all extra regs are per-cpu when HT is on */
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+		pr_cont("Haswell events, ");
+		break;
+
 	default:
 		switch (x86_pmu.version) {
 		case 1:
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 03/12] perf, x86: Basic Haswell PEBS support v3
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
  2013-01-25 22:00 ` [PATCH 01/12] perf, x86: Add PEBSv2 record support Andi Kleen
  2013-01-25 22:00 ` [PATCH 02/12] perf, x86: Basic Haswell PMU support v2 Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-28 15:56   ` Stephane Eranian
  2013-01-25 22:00 ` [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2 Andi Kleen
                   ` (9 subsequent siblings)
  12 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add basic PEBS support for Haswell.
The constraints are similar to SandyBridge with a few new events.

v2: Readd missing pebs_aliases
v3: Readd missing hunk. Fix some constraints.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.h          |    2 ++
 arch/x86/kernel/cpu/perf_event_intel.c    |    6 ++++--
 arch/x86/kernel/cpu/perf_event_intel_ds.c |   29 +++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8941899..1567b0d 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -596,6 +596,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
 
 extern struct event_constraint intel_ivb_pebs_event_constraints[];
 
+extern struct event_constraint intel_hsw_pebs_event_constraints[];
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
 void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3a08534..634f639 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -826,7 +826,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
 		return true;
 
 	/* implicit branch sampling to correct PEBS skid */
-	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
+	    x86_pmu.intel_cap.pebs_format < 2)
 		return true;
 
 	return false;
@@ -2127,8 +2128,9 @@ __init int intel_pmu_init(void)
 		intel_pmu_lbr_init_nhm();
 
 		x86_pmu.event_constraints = intel_hsw_event_constraints;
-
+		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_snb_extra_regs;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
 		/* all extra regs are per-cpu when HT is on */
 		x86_pmu.er_flags |= ERF_HAS_RSP_1;
 		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 9d0dae0..16d7c58 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -427,6 +427,35 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
         EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_hsw_pebs_event_constraints[] = {
+	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+	INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+	INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
+	INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
+	INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
+	INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
+	INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
+	INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+	INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+	INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+	INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+	INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+	INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+	INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+	INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
+	INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
+	INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
+	INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
+	INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
+	INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
+	INTEL_UEVENT_CONSTRAINT(0x02d3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
+	INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
+	INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
+
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *c;
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (2 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 03/12] perf, x86: Basic Haswell PEBS support v3 Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-26 11:54   ` Ingo Molnar
  2013-01-28 16:52   ` Stephane Eranian
  2013-01-25 22:00 ` [PATCH 05/12] perf, x86: Support Haswell v4 LBR format Andi Kleen
                   ` (8 subsequent siblings)
  12 siblings, 2 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Implement the TSX transaction and checkpointed transaction qualifiers for
Haswell. This allows e.g. to profile the number of cycles in transactions.

The checkpointed qualifier requires forcing the event to
counter 2, implement this with a custom constraint for Haswell.

Also add sysfs format attributes for intx/intx_cp

[Updated from earlier version that used generic attributes, now does
raw + sysfs formats]
v2: Moved bad hunk. Forbid some bad combinations.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel.c |   61 ++++++++++++++++++++++++++++++++
 1 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 634f639..44e18c02 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 
+#include <asm/cpufeature.h>
 #include <asm/hardirq.h>
 #include <asm/apic.h>
 
@@ -1597,6 +1598,44 @@ static void core_pmu_enable_all(int added)
 	}
 }
 
+static int hsw_hw_config(struct perf_event *event)
+{
+	int ret = intel_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+	if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
+		return 0;
+	event->hw.config |= event->attr.config & (HSW_INTX|HSW_INTX_CHECKPOINTED);
+
+	/* 
+	 * INTX/INTX-CP do not play well with PEBS or ANY thread mode.
+	 */
+	if ((event->hw.config & (HSW_INTX|HSW_INTX_CHECKPOINTED)) &&
+	     ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
+	      event->attr.precise_ip > 0))
+		return -EIO;
+	return 0;
+}
+
+static struct event_constraint counter2_constraint = 
+			EVENT_CONSTRAINT(0, 0x4, 0);
+
+static struct event_constraint *
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct event_constraint *c = intel_get_event_constraints(cpuc, event);
+
+	/* Handle special quirk on intx_checkpointed only in counter 2 */
+	if (event->hw.config & HSW_INTX_CHECKPOINTED) {
+		if (c->idxmsk64 & (1U << 2))
+			return &counter2_constraint;
+		return &emptyconstraint;
+	}
+
+	return c;
+}
+
 PMU_FORMAT_ATTR(event,	"config:0-7"	);
 PMU_FORMAT_ATTR(umask,	"config:8-15"	);
 PMU_FORMAT_ATTR(edge,	"config:18"	);
@@ -1604,6 +1643,8 @@ PMU_FORMAT_ATTR(pc,	"config:19"	);
 PMU_FORMAT_ATTR(any,	"config:21"	); /* v3 + */
 PMU_FORMAT_ATTR(inv,	"config:23"	);
 PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
+PMU_FORMAT_ATTR(intx,	"config:32"	);
+PMU_FORMAT_ATTR(intx_cp,"config:33"	);
 
 static struct attribute *intel_arch_formats_attr[] = {
 	&format_attr_event.attr,
@@ -1761,6 +1802,23 @@ static struct attribute *intel_arch3_formats_attr[] = {
 	NULL,
 };
 
+/* Arch3 + TSX support */
+static struct attribute *intel_hsw_formats_attr[] __read_mostly = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_pc.attr,
+	&format_attr_any.attr,
+	&format_attr_inv.attr,
+	&format_attr_cmask.attr,
+	&format_attr_intx.attr,
+	&format_attr_intx_cp.attr,
+
+	&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
+	NULL,
+};
+
+
 static __initconst const struct x86_pmu intel_pmu = {
 	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,
@@ -2135,6 +2193,9 @@ __init int intel_pmu_init(void)
 		x86_pmu.er_flags |= ERF_HAS_RSP_1;
 		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
 
+		x86_pmu.hw_config = hsw_hw_config;
+		x86_pmu.get_event_constraints = hsw_get_event_constraints;
+		x86_pmu.format_attrs = intel_hsw_formats_attr;
 		pr_cont("Haswell events, ");
 		break;
 
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 05/12] perf, x86: Support Haswell v4 LBR format
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (3 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2 Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-28 21:47   ` Stephane Eranian
  2013-01-25 22:00 ` [PATCH 06/12] perf, x86: Support full width counting Andi Kleen
                   ` (7 subsequent siblings)
  12 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Haswell has two additional LBR from flags for TSX: intx and abort, implemented
as a new v4 version of the LBR format.

Handle those in and adjust the sign extension code to still correctly extend.
The flags are exported similarly in the LBR record to the existing misprediction
flag

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |   18 +++++++++++++++---
 include/linux/perf_event.h                 |    7 ++++++-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index da02e9c..2af6695b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,7 @@ enum {
 	LBR_FORMAT_LIP		= 0x01,
 	LBR_FORMAT_EIP		= 0x02,
 	LBR_FORMAT_EIP_FLAGS	= 0x03,
+	LBR_FORMAT_EIP_FLAGS2	= 0x04,
 };
 
 /*
@@ -56,6 +57,8 @@ enum {
 	 LBR_FAR)
 
 #define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
+#define LBR_FROM_FLAG_INTX     (1ULL << 62)
+#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
 
 #define for_each_branch_sample_type(x) \
 	for ((x) = PERF_SAMPLE_BRANCH_USER; \
@@ -270,21 +273,30 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 
 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
 		unsigned long lbr_idx = (tos - i) & mask;
-		u64 from, to, mis = 0, pred = 0;
+		u64 from, to, mis = 0, pred = 0, intx = 0, abort = 0;
 
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
 		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
 
-		if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
+		if (lbr_format == LBR_FORMAT_EIP_FLAGS ||
+		    lbr_format == LBR_FORMAT_EIP_FLAGS2) {
 			mis = !!(from & LBR_FROM_FLAG_MISPRED);
 			pred = !mis;
-			from = (u64)((((s64)from) << 1) >> 1);
+			if (lbr_format == LBR_FORMAT_EIP_FLAGS)
+				from = (u64)((((s64)from) << 1) >> 1);
+			else if (lbr_format == LBR_FORMAT_EIP_FLAGS2) {
+				intx = !!(from & LBR_FROM_FLAG_INTX);
+				abort = !!(from & LBR_FROM_FLAG_ABORT);
+				from = (u64)((((s64)from) << 3) >> 3);
+			}
 		}
 
 		cpuc->lbr_entries[i].from	= from;
 		cpuc->lbr_entries[i].to		= to;
 		cpuc->lbr_entries[i].mispred	= mis;
 		cpuc->lbr_entries[i].predicted	= pred;
+		cpuc->lbr_entries[i].intx	= intx;
+		cpuc->lbr_entries[i].abort	= abort;
 		cpuc->lbr_entries[i].reserved	= 0;
 	}
 	cpuc->lbr_stack.nr = i;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6bfb2faa..91052e1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -74,13 +74,18 @@ struct perf_raw_record {
  *
  * support for mispred, predicted is optional. In case it
  * is not supported mispred = predicted = 0.
+ *
+ *     intx: running in a hardware transaction
+ *     abort: aborting a hardware transaction
  */
 struct perf_branch_entry {
 	__u64	from;
 	__u64	to;
 	__u64	mispred:1,  /* target mispredicted */
 		predicted:1,/* target predicted */
-		reserved:62;
+		intx:1,	    /* in transaction */
+		abort:1,    /* transaction abort */
+		reserved:60;
 };
 
 /*
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 06/12] perf, x86: Support full width counting
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (4 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 05/12] perf, x86: Support Haswell v4 LBR format Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-25 22:00 ` [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3 Andi Kleen
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Recent Intel CPUs have a new alternative MSR range for perfctrs that allows
writing the full counter width. Enable this range if the hardware reports it
using a new capability bit. This lowers overhead of perf stat slightly because
it has to do less interrupts to accumulate the counter value. On Haswell it
also avoids some problems with TSX aborting when the end of the counter
range is reached.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/include/uapi/asm/msr-index.h  |    3 +++
 arch/x86/kernel/cpu/perf_event.h       |    1 +
 arch/x86/kernel/cpu/perf_event_intel.c |    6 ++++++
 3 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 433a59f..af41a77 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -163,6 +163,9 @@
 #define MSR_KNC_EVNTSEL0               0x00000028
 #define MSR_KNC_EVNTSEL1               0x00000029
 
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0			0x000004c1
+
 /* AMD64 MSRs. Not complete. See the architecture manual for a more
    complete list. */
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 1567b0d..ce2a863 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -278,6 +278,7 @@ union perf_capabilities {
 		u64	pebs_arch_reg:1;
 		u64	pebs_format:4;
 		u64	smm_freeze:1;
+		u64	fw_write:1;
 	};
 	u64	capabilities;
 };
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 44e18c02..bc21bce 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2247,5 +2247,11 @@ __init int intel_pmu_init(void)
 		}
 	}
 
+	/* Support full width counters using alternative MSR range */
+	if (x86_pmu.intel_cap.fw_write) {
+		x86_pmu.max_period = x86_pmu.cntval_mask;
+		x86_pmu.perfctr = MSR_IA32_PMC0;
+	}
+
 	return 0;
 }
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (5 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 06/12] perf, x86: Support full width counting Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-28 22:32   ` Stephane Eranian
  2013-01-25 22:00 ` [PATCH 08/12] perf, x86: Move NMI clearing to end of PMI handler after the counter registers are reset Andi Kleen
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

With checkpointed counters there can be a situation where the counter
is overflowing, aborts the transaction, is set back to a non overflowing
checkpoint, causes interupt. The interrupt doesn't see the overflow
because it has been checkpointed.  This is then a spurious PMI, typically with a
ugly NMI message.  It can also lead to excessive aborts.

Avoid this problem by:
- Using the full counter width for counting counters (previous patch)
- Forbid sampling for checkpointed counters. It's not too useful anyways,
checkpointing is mainly for counting.
- On a PMI always set back checkpointed counters to zero.

v2: Add unlikely. Add comment
v3: Allow large sampling periods with CP for KVM
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel.c |   34 ++++++++++++++++++++++++++++++++
 1 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index bc21bce..9b4dda5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1079,6 +1079,17 @@ static void intel_pmu_enable_event(struct perf_event *event)
 int intel_pmu_save_and_restart(struct perf_event *event)
 {
 	x86_perf_event_update(event);
+	/*
+	 * For a checkpointed counter always reset back to 0.  This
+	 * avoids a situation where the counter overflows, aborts the
+	 * transaction and is then set back to shortly before the
+	 * overflow, and overflows and aborts again.
+	 */
+	if (unlikely(event->hw.config & HSW_INTX_CHECKPOINTED)) {
+		/* No race with NMIs because the counter should not be armed */
+		wrmsrl(event->hw.event_base, 0);
+		local64_set(&event->hw.prev_count, 0);
+	}
 	return x86_perf_event_set_period(event);
 }
 
@@ -1162,6 +1173,15 @@ again:
 		x86_pmu.drain_pebs(regs);
 	}
 
+	/*
+ 	 * To avoid spurious interrupts with perf stat always reset checkpointed
+ 	 * counters.
+ 	 *
+	 * XXX move somewhere else.
+	 */
+	if (cpuc->events[2] && (cpuc->events[2]->hw.config & HSW_INTX_CHECKPOINTED))
+		status |= (1ULL << 2);
+
 	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
 
@@ -1615,6 +1635,20 @@ static int hsw_hw_config(struct perf_event *event)
 	     ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
 	      event->attr.precise_ip > 0))
 		return -EIO;
+	if (event->hw.config & HSW_INTX_CHECKPOINTED) {
+		/*
+		 * Sampling of checkpointed events can cause situations where
+		 * the CPU constantly aborts because of a overflow, which is
+		 * then checkpointed back and ignored. Forbid checkpointing
+		 * for sampling.
+		 *
+		 * But still allow a long sampling period, so that perf stat
+		 * from KVM works.
+		 */
+		if (event->attr.sample_period > 0 &&
+		    event->attr.sample_period < 0x7fffffff)
+			return -EIO;
+	}
 	return 0;
 }
 
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 08/12] perf, x86: Move NMI clearing to end of PMI handler after the counter registers are reset
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (6 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3 Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-25 22:00 ` [PATCH 09/12] perf, x86: Disable LBR recording for unknown LBR_FMT Andi Kleen
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

This avoids some problems with spurious PMIs on Haswell.
Haswell seems to behave more like P4 in this regard. Do
the same thing as the P4 perf handler by unmasking
the NMI only at the end. Shouldn't make any difference
for earlier non P4 cores.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel.c |   15 +++++----------
 1 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9b4dda5..6899f57 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1133,16 +1133,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
-	/*
-	 * Some chipsets need to unmask the LVTPC in a particular spot
-	 * inside the nmi handler.  As a result, the unmasking was pushed
-	 * into all the nmi handlers.
-	 *
-	 * This handler doesn't seem to have any issues with the unmasking
-	 * so it was left at the top.
-	 */
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-
 	intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
 	status = intel_pmu_get_status();
@@ -1211,6 +1201,11 @@ again:
 
 done:
 	intel_pmu_enable_all(0);
+	/*
+	 * Only unmask the NMI after the overflow counters
+	 * have been reset.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	return handled;
 }
 
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 09/12] perf, x86: Disable LBR recording for unknown LBR_FMT
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (7 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 08/12] perf, x86: Move NMI clearing to end of PMI handler after the counter registers are reset Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-25 22:00 ` [PATCH 10/12] perf, x86: Support LBR filtering by INTX/NOTX/ABORT v2 Andi Kleen
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

When the LBR format is unknown disable LBR recording. This prevents
crashes when the LBR address is misdecoded and mis-sign extended.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 2af6695b..ad5af13 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -13,6 +13,7 @@ enum {
 	LBR_FORMAT_EIP		= 0x02,
 	LBR_FORMAT_EIP_FLAGS	= 0x03,
 	LBR_FORMAT_EIP_FLAGS2	= 0x04,
+	LBR_FORMAT_MAX_KNOWN	= LBR_FORMAT_EIP_FLAGS2,
 };
 
 /*
@@ -392,7 +393,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
 	/*
 	 * no LBR on this PMU
 	 */
-	if (!x86_pmu.lbr_nr)
+	if (!x86_pmu.lbr_nr || x86_pmu.intel_cap.lbr_format > LBR_FORMAT_MAX_KNOWN)
 		return -EOPNOTSUPP;
 
 	/*
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 10/12] perf, x86: Support LBR filtering by INTX/NOTX/ABORT v2
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (8 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 09/12] perf, x86: Disable LBR recording for unknown LBR_FMT Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-25 22:00 ` [PATCH 11/12] perf, tools: Support sorting by intx, abort branch flags v2 Andi Kleen
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add LBR filtering for branch in transaction, branch not in transaction
or transaction abort. This is exposed as new sample types.

v2: Rename ABORT to ABORTTX
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |   31 +++++++++++++++++++++++++--
 include/uapi/linux/perf_event.h            |    5 +++-
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index ad5af13..5455a00 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -85,9 +85,13 @@ enum {
 	X86_BR_JMP      = 1 << 9, /* jump */
 	X86_BR_IRQ      = 1 << 10,/* hw interrupt or trap or fault */
 	X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+	X86_BR_ABORT    = 1 << 12,/* transaction abort */
+	X86_BR_INTX     = 1 << 13,/* in transaction */
+	X86_BR_NOTX     = 1 << 14,/* not in transaction */
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NOTX | X86_BR_INTX)
 
 #define X86_BR_ANY       \
 	(X86_BR_CALL    |\
@@ -99,6 +103,7 @@ enum {
 	 X86_BR_JCC     |\
 	 X86_BR_JMP	 |\
 	 X86_BR_IRQ	 |\
+	 X86_BR_ABORT	 |\
 	 X86_BR_IND_CALL)
 
 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -347,6 +352,16 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 
 	if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
 		mask |= X86_BR_IND_CALL;
+
+	if (br_type & PERF_SAMPLE_BRANCH_ABORTTX)
+		mask |= X86_BR_ABORT;
+
+	if (br_type & PERF_SAMPLE_BRANCH_INTX)
+		mask |= X86_BR_INTX;
+
+	if (br_type & PERF_SAMPLE_BRANCH_NOTX)
+		mask |= X86_BR_NOTX;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -393,7 +408,8 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
 	/*
 	 * no LBR on this PMU
 	 */
-	if (!x86_pmu.lbr_nr || x86_pmu.intel_cap.lbr_format > LBR_FORMAT_MAX_KNOWN)
+	if (!x86_pmu.lbr_nr ||
+	    x86_pmu.intel_cap.lbr_format > LBR_FORMAT_MAX_KNOWN)
 		return -EOPNOTSUPP;
 
 	/*
@@ -421,7 +437,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
  * decoded (e.g., text page not present), then X86_BR_NONE is
  * returned.
  */
-static int branch_type(unsigned long from, unsigned long to)
+static int branch_type(unsigned long from, unsigned long to, int abort)
 {
 	struct insn insn;
 	void *addr;
@@ -441,6 +457,9 @@ static int branch_type(unsigned long from, unsigned long to)
 	if (from == 0 || to == 0)
 		return X86_BR_NONE;
 
+	if (abort)
+		return X86_BR_ABORT | to_plm;
+
 	if (from_plm == X86_BR_USER) {
 		/*
 		 * can happen if measuring at the user level only
@@ -577,7 +596,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 		from = cpuc->lbr_entries[i].from;
 		to = cpuc->lbr_entries[i].to;
 
-		type = branch_type(from, to);
+		type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+		if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
+			if (cpuc->lbr_entries[i].intx)
+				type |= X86_BR_INTX;
+			else
+				type |= X86_BR_NOTX;
+		}
 
 		/* if type does not correspond, then discard */
 		if (type == X86_BR_NONE || (br_sel & type) != type) {
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9fa9c62..41b25f0 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -155,8 +155,11 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_ANY_CALL	= 1U << 4, /* any call branch */
 	PERF_SAMPLE_BRANCH_ANY_RETURN	= 1U << 5, /* any return branch */
 	PERF_SAMPLE_BRANCH_IND_CALL	= 1U << 6, /* indirect calls */
+	PERF_SAMPLE_BRANCH_ABORTTX	= 1U << 7, /* transaction aborts */
+	PERF_SAMPLE_BRANCH_INTX		= 1U << 8, /* in transaction (flag) */
+	PERF_SAMPLE_BRANCH_NOTX		= 1U << 9, /* not in transaction (flag) */
 
-	PERF_SAMPLE_BRANCH_MAX		= 1U << 7, /* non-ABI */
+	PERF_SAMPLE_BRANCH_MAX		= 1U << 10, /* non-ABI */
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 11/12] perf, tools: Support sorting by intx, abort branch flags v2
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (9 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 10/12] perf, x86: Support LBR filtering by INTX/NOTX/ABORT v2 Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-25 22:00 ` [PATCH 12/12] perf, tools: Add abort_tx,no_tx,in_tx branch filter options to perf record -j v3 Andi Kleen
  2013-01-31 17:19 ` Basic perf PMU support for Haswell v1 Stephane Eranian
  12 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Extend the perf branch sorting code to support sorting by intx
or abort qualifiers. Also print out those qualifiers.

This also fixes up some of the existing sort key documentation.

We do not support notx here, because it's simply not showing
the intx flag.

v2: Readd flags to man pages
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Documentation/perf-report.txt |    4 ++-
 tools/perf/Documentation/perf-top.txt    |    4 ++-
 tools/perf/builtin-report.c              |    2 +-
 tools/perf/builtin-top.c                 |    4 ++-
 tools/perf/perf.h                        |    4 ++-
 tools/perf/util/hist.h                   |    2 +
 tools/perf/util/sort.c                   |   51 ++++++++++++++++++++++++++++++
 tools/perf/util/sort.h                   |    2 +
 8 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 848a0dc..eaf23ab 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -71,7 +71,7 @@ OPTIONS
 	entries are displayed as "[other]".
 	- cpu: cpu number the task ran at the time of sample
 	- srcline: filename and line number executed at the time of sample.  The
-	DWARF debuggin info must be provided.
+	DWARF debugging info must be provided.
 
 	By default, comm, dso and symbol keys are used.
 	(i.e. --sort comm,dso,symbol)
@@ -85,6 +85,8 @@ OPTIONS
 	- symbol_from: name of function branched from
 	- symbol_to: name of function branched to
 	- mispredict: "N" for predicted branch, "Y" for mispredicted branch
+	- intx: branch in TSX transaction
+	- abort: TSX transaction abort.
 
 	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
 	and symbol_to, see '--branch-stack'.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index a414bc9..93970b7 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -112,7 +112,9 @@ Default is to monitor all CPUS.
 
 -s::
 --sort::
-	Sort by key(s): pid, comm, dso, symbol, parent, srcline.
+	Sort by key(s): pid, comm, dso, symbol, parent, srcline,
+        dso_from, dso_to, symbol_to, symbol_from, mispredict,
+        abort, intx
 
 -n::
 --show-nr-samples::
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 47a8644..d51a66d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -596,7 +596,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Use the stdio interface"),
 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
 		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
-		   " dso_to, dso_from, symbol_to, symbol_from, mispredict"),
+		   " dso_to, dso_from, symbol_to, symbol_from, mispredict, abort, intx"),
 	OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
 		    "Show sample percentage for different cpu modes"),
 	OPT_STRING('p', "parent", &parent_pattern, "regex",
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7978c81..9e08cec 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1078,7 +1078,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-		   "sort by key(s): pid, comm, dso, symbol, parent"),
+		   "sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
+		   " dso_from, symbol_to, symbol_from, mispredict, srcline,"
+		   " abort, intx"),
 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
 		    "Show a column with the number of samples"),
 	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 8f3bf38..eb049df 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -193,7 +193,9 @@ struct ip_callchain {
 struct branch_flags {
 	u64 mispred:1;
 	u64 predicted:1;
-	u64 reserved:62;
+	u64 intx:1;
+	u64 abort:1;
+	u64 reserved:60;
 };
 
 struct branch_entry {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 3862468..36a8565 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -44,6 +44,8 @@ enum hist_column {
 	HISTC_PARENT,
 	HISTC_CPU,
 	HISTC_MISPREDICT,
+	HISTC_INTX,
+	HISTC_ABORT,
 	HISTC_SYMBOL_FROM,
 	HISTC_SYMBOL_TO,
 	HISTC_DSO_FROM,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 7ad6239..b7a4100 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -472,6 +472,55 @@ struct sort_entry sort_mispredict = {
 	.se_width_idx	= HISTC_MISPREDICT,
 };
 
+static int64_t
+sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->branch_info->flags.abort !=
+		right->branch_info->flags.abort;
+}
+
+static int hist_entry__abort_snprintf(struct hist_entry *self, char *bf,
+				    size_t size, unsigned int width)
+{
+	static const char *out = ".";
+
+	if (self->branch_info->flags.abort)
+		out = "A";
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_abort = {
+	.se_header	= "Transaction abort",
+	.se_cmp		= sort__abort_cmp,
+	.se_snprintf	= hist_entry__abort_snprintf,
+	.se_width_idx	= HISTC_ABORT,
+};
+
+static int64_t
+sort__intx_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->branch_info->flags.intx !=
+		right->branch_info->flags.intx;
+}
+
+static int hist_entry__intx_snprintf(struct hist_entry *self, char *bf,
+				    size_t size, unsigned int width)
+{
+	static const char *out = ".";
+
+	if (self->branch_info->flags.intx)
+		out = "T";
+
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_intx = {
+	.se_header	= "Branch in transaction",
+	.se_cmp		= sort__intx_cmp,
+	.se_snprintf	= hist_entry__intx_snprintf,
+	.se_width_idx	= HISTC_INTX,
+};
+
 struct sort_dimension {
 	const char		*name;
 	struct sort_entry	*entry;
@@ -500,6 +549,8 @@ static struct sort_dimension bstack_sort_dimensions[] = {
 	DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
 	DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
 	DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+	DIM(SORT_ABORT, "abort", sort_abort),
+	DIM(SORT_INTX, "intx", sort_intx),
 };
 
 #undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e994ad3e..6755d28 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -138,6 +138,8 @@ enum sort_type {
 	SORT_SYM_FROM,
 	SORT_SYM_TO,
 	SORT_MISPREDICT,
+	SORT_ABORT,
+	SORT_INTX,
 };
 
 /*
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 12/12] perf, tools: Add abort_tx,no_tx,in_tx branch filter options to perf record -j v3
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (10 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 11/12] perf, tools: Support sorting by intx, abort branch flags v2 Andi Kleen
@ 2013-01-25 22:00 ` Andi Kleen
  2013-01-31 17:19 ` Basic perf PMU support for Haswell v1 Stephane Eranian
  12 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-25 22:00 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Make perf record -j aware of the new in_tx,no_tx,abort_tx branch qualifiers.

v2: ABORT -> ABORTTX
v3: Add more _
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Documentation/perf-record.txt |    3 +++
 tools/perf/builtin-record.c              |    3 +++
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 938e890..f7d74b2 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -172,6 +172,9 @@ following filters are defined:
         - u:  only when the branch target is at the user level
         - k: only when the branch target is in the kernel
         - hv: only when the target is at the hypervisor level
+	- in_tx: only when the target is in a hardware transaction
+	- no_tx: only when the target is not in a hardware transaction
+	- abort_tx: only when the target is a hardware transaction abort
 
 +
 The option requires at least one branch type among any, any_call, any_ret, ind_call.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2ac690c..4bcfccd 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -671,6 +671,9 @@ static const struct branch_mode branch_modes[] = {
 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORTTX),
+	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_INTX),
+	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NOTX),
 	BRANCH_END
 };
 
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2
  2013-01-25 22:00 ` [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2 Andi Kleen
@ 2013-01-26 11:54   ` Ingo Molnar
  2013-01-26 21:00     ` Andi Kleen
  2013-01-28 16:52   ` Stephane Eranian
  1 sibling, 1 reply; 36+ messages in thread
From: Ingo Molnar @ 2013-01-26 11:54 UTC (permalink / raw)
  To: Andi Kleen
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen


* Andi Kleen <andi@firstfloor.org> wrote:

> From: Andi Kleen <ak@linux.intel.com>
> 
> Implement the TSX transaction and checkpointed transaction 
> qualifiers for Haswell. This allows e.g. to profile the number 
> of cycles in transactions.

The changelog should explain why this is a basic feature that is 
a must-have to have basic perf support. (i.e. default 'perf 
record', 'perf report', 'perf stat' and 'perf top' works and is 
meaningful.)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2
  2013-01-26 11:54   ` Ingo Molnar
@ 2013-01-26 21:00     ` Andi Kleen
  2013-01-27 13:14       ` Ingo Molnar
  0 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2013-01-26 21:00 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andi Kleen, linux-kernel, a.p.zijlstra, akpm, acme, eranian,
	jolsa, namhyung, Andi Kleen

On Sat, Jan 26, 2013 at 12:54:02PM +0100, Ingo Molnar wrote:
> 
> * Andi Kleen <andi@firstfloor.org> wrote:
> 
> > From: Andi Kleen <ak@linux.intel.com>
> > 
> > Implement the TSX transaction and checkpointed transaction 
> > qualifiers for Haswell. This allows e.g. to profile the number 
> > of cycles in transactions.
> 
> The changelog should explain why this is a basic feature that is 
> a must-have to have basic perf support. (i.e. default 'perf 
> record', 'perf report', 'perf stat' and 'perf top' works and is 
> meaningful.)

The patch is not needed for primitive cycle sampling only.

These qualifiers are a fundamental feature for any TSX tuning. If you don't
care about TSX it's not fundamental. TSX tuning relies heavily on the PMU,
so for any TSX work it's important. The 0/0 description has more
references on the topic.

It's not easy to use however with just this patch, the "perf stat -T" patch
in the advanced series makes it easy to use.

I will assume the description is sufficient and will not repost with
a new changelog, as "basicness" of the patch is not a useful property
to document in the long term git history.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2
  2013-01-26 21:00     ` Andi Kleen
@ 2013-01-27 13:14       ` Ingo Molnar
       [not found]         ` <20130128050234.GQ30577@one.firstfloor.org>
  0 siblings, 1 reply; 36+ messages in thread
From: Ingo Molnar @ 2013-01-27 13:14 UTC (permalink / raw)
  To: Andi Kleen
  Cc: linux-kernel, a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung,
	Andi Kleen


* Andi Kleen <andi@firstfloor.org> wrote:

> On Sat, Jan 26, 2013 at 12:54:02PM +0100, Ingo Molnar wrote:
> > 
> > * Andi Kleen <andi@firstfloor.org> wrote:
> > 
> > > From: Andi Kleen <ak@linux.intel.com>
> > > 
> > > Implement the TSX transaction and checkpointed transaction 
> > > qualifiers for Haswell. This allows e.g. to profile the number 
> > > of cycles in transactions.
> > 
> > The changelog should explain why this is a basic feature 
> > that is a must-have to have basic perf support. (i.e. 
> > default 'perf record', 'perf report', 'perf stat' and 'perf 
> > top' works and is meaningful.)
> 
> The patch is not needed for primitive cycle sampling only.

So please re-send a truly minimal hw-enabling series first, as 
requested - a minimal series that enables most of the everyday 
usecases.

> These qualifiers are a fundamental feature for any TSX tuning.

Any new, Haswell-specific profiling features should obviously be 
in a second (or third) series of patches.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2
       [not found]         ` <20130128050234.GQ30577@one.firstfloor.org>
@ 2013-01-28 10:47           ` Ingo Molnar
  0 siblings, 0 replies; 36+ messages in thread
From: Ingo Molnar @ 2013-01-28 10:47 UTC (permalink / raw)
  To: Andi Kleen
  Cc: a.p.zijlstra, akpm, acme, eranian, jolsa, namhyung, Andi Kleen,
	Linus Torvalds, linux-kernel


* Andi Kleen <andi@firstfloor.org> wrote:

> [dropping linux-kernel]

(Re-added, because this concerns others as well.)

> > So please re-send a truly minimal hw-enabling series first, 
> > as requested - a minimal series that enables most of the 
> > everyday usecases.
> 
> I don't have much interesting in arguing what is fundamental 
> and what is not. Variants of this patchkit have been used for 
> over a year to do Haswell work, and I have a reasonably large 
> user base for it both internal in Intel and with some other 
> Haswell users.
> 
> They need all these features. [...]

That argument is silly, dishonest and misleading: the majority 
of perf users don't *ever* specify a different profiling event 
from the default 'cycles' one, let alone do they specify CPU 
specific features - full stop.

They are using 'perf record/report', 'perf top', or use other 
tools like SysProf (that use perf 'cycles' events internally by 
default) and that's it.

A select few know about 'cycles:pp', perhaps. (In fact we'll 
make that the default in the future, to make profiling even 
easier by default.) Some do -g call-graph profiling.

Most developers that use profiling tools want to know where the 
silliest overhead in their app is and they don't care much about 
CPU specific events.

That group of users includes most kernel developers in fact: 
I've rarely seen people go beyond perf record / perf top - and 
that's *good* in a sense, because it means they get what they 
want from our primary source of profiling information already.

Things like cachemiss profiling or multi-event profiling are a 
distant second in terms of popularity. The use of CPU specific 
events is even rarer.

Any Haswell-specific features are a second or third order 
concern, at best, in terms of installed base. We *do* care about 
those usecases as well, but you are wasting precious 
hw-enablement time by insisting on more complex patches while 
stonewalling my request for the simplest ones - the minimal 
patches really belong upstream.

So please send a minimal series that serves the overwhelmingly 
common case, without any other distractions, and after that we 
can argue Haswell specific extensions to profiling.

This isn't such a difficult or in any way controversial concept 
really...

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 01/12] perf, x86: Add PEBSv2 record support
  2013-01-25 22:00 ` [PATCH 01/12] perf, x86: Add PEBSv2 record support Andi Kleen
@ 2013-01-28 13:15   ` Stephane Eranian
  2013-01-28 16:10     ` Andi Kleen
  2013-01-31 17:15   ` Stephane Eranian
  1 sibling, 1 reply; 36+ messages in thread
From: Stephane Eranian @ 2013-01-28 13:15 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Fri, Jan 25, 2013 at 11:00 PM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Add support for the v2 PEBS format. It has a superset of the v1 PEBS
> fields, but has a longer record so we need to adjust the code paths.
>
> The main advantage is the new "EventingRip" support which directly
> gives the instruction, not off-by-one instruction. So with precise == 2
> we use that directly and don't try to use LBRs and walking basic blocks.
> This lowers the overhead significantly.
>
> Some other features are added in later patches.
>
I think you should call this: PEBS v3 and not v2.
We already have 2 existing formats: core and nhm.
You're adding the hsw format.

So either you add it as struct pebs_record_hsw or you
cleanup the naming of all the structs and call them:

struct pebs_record_v1 /* core */
struct pebs_record_v2 /* nhm, snb, ivb */
struct pebs_record_v3 /* hsw */

That would seem more consistent to me.

As for using precise=2 for eventingrip, I am fine with this.

> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event.c          |    2 +-
>  arch/x86/kernel/cpu/perf_event_intel_ds.c |  101 ++++++++++++++++++++++-------
>  2 files changed, 79 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 6774c17..c95290a 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -397,7 +397,7 @@ int x86_pmu_hw_config(struct perf_event *event)
>                  * check that PEBS LBR correction does not conflict with
>                  * whatever the user is asking with attr->branch_sample_type
>                  */
> -               if (event->attr.precise_ip > 1) {
> +               if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
>                         u64 *br_type = &event->attr.branch_sample_type;
>
>                         if (has_branch_stack(event)) {
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 826054a..9d0dae0 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -41,6 +41,12 @@ struct pebs_record_nhm {
>         u64 status, dla, dse, lat;
>  };
>
> +struct pebs_record_v2 {
> +       struct pebs_record_nhm nhm;
> +       u64 eventingrip;
> +       u64 tsx_tuning;
> +};
> +
>  void init_debug_store_on_cpu(int cpu)
>  {
>         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
> @@ -559,8 +565,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
>  {
>         /*
>          * We cast to pebs_record_core since that is a subset of
> -        * both formats and we don't use the other fields in this
> -        * routine.
> +        * both formats.
>          */
>         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
>         struct pebs_record_core *pebs = __pebs;
> @@ -588,7 +593,10 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
>         regs.bp = pebs->bp;
>         regs.sp = pebs->sp;
>
> -       if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
> +       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
> +               regs.ip = ((struct pebs_record_v2 *)pebs)->eventingrip;
> +               regs.flags |= PERF_EFLAGS_EXACT;
> +       } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
>                 regs.flags |= PERF_EFLAGS_EXACT;
>         else
>                 regs.flags &= ~PERF_EFLAGS_EXACT;
> @@ -641,35 +649,21 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
>         __intel_pmu_pebs_event(event, iregs, at);
>  }
>
> -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> +static void intel_pmu_drain_pebs_common(struct pt_regs *iregs, void *at,
> +                                       void *top)
>  {
>         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
>         struct debug_store *ds = cpuc->ds;
> -       struct pebs_record_nhm *at, *top;
>         struct perf_event *event = NULL;
>         u64 status = 0;
> -       int bit, n;
> -
> -       if (!x86_pmu.pebs_active)
> -               return;
> -
> -       at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
> -       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
> +       int bit;
>
>         ds->pebs_index = ds->pebs_buffer_base;
>
> -       n = top - at;
> -       if (n <= 0)
> -               return;
> +       for ( ; at < top; at += x86_pmu.pebs_record_size) {
> +               struct pebs_record_nhm *p = at;
>
> -       /*
> -        * Should not happen, we program the threshold at 1 and do not
> -        * set a reset value.
> -        */
> -       WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
> -
> -       for ( ; at < top; at++) {
> -               for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
> +               for_each_set_bit(bit, (unsigned long *)&p->status, x86_pmu.max_pebs_events) {
>                         event = cpuc->events[bit];
>                         if (!test_bit(bit, cpuc->active_mask))
>                                 continue;
> @@ -692,6 +686,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
>         }
>  }
>
> +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> +{
> +       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +       struct debug_store *ds = cpuc->ds;
> +       struct pebs_record_nhm *at, *top;
> +       int n;
> +
> +       if (!x86_pmu.pebs_active)
> +               return;
> +
> +       at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
> +       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
> +
> +       ds->pebs_index = ds->pebs_buffer_base;
> +
> +       n = top - at;
> +       if (n <= 0)
> +               return;
> +
> +       /*
> +        * Should not happen, we program the threshold at 1 and do not
> +        * set a reset value.
> +        */
> +       WARN_ONCE(n > x86_pmu.max_pebs_events,
> +                 "Unexpected number of pebs records %d\n", n);
> +
> +       return intel_pmu_drain_pebs_common(iregs, at, top);
> +}
> +
> +static void intel_pmu_drain_pebs_v2(struct pt_regs *iregs)
> +{
> +       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +       struct debug_store *ds = cpuc->ds;
> +       struct pebs_record_v2 *at, *top;
> +       int n;
> +
> +       if (!x86_pmu.pebs_active)
> +               return;
> +
> +       at  = (struct pebs_record_v2 *)(unsigned long)ds->pebs_buffer_base;
> +       top = (struct pebs_record_v2 *)(unsigned long)ds->pebs_index;
> +
> +       n = top - at;
> +       if (n <= 0)
> +               return;
> +       /*
> +        * Should not happen, we program the threshold at 1 and do not
> +        * set a reset value.
> +        */
> +       WARN_ONCE(n > x86_pmu.max_pebs_events,
> +                 "Unexpected number of pebs records %d\n", n);
> +
> +       return intel_pmu_drain_pebs_common(iregs, at, top);
> +}
> +
>  /*
>   * BTS, PEBS probe and setup
>   */
> @@ -723,6 +772,12 @@ void intel_ds_init(void)
>                         x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
>                         break;
>
> +               case 2:
> +                       printk(KERN_CONT "PEBS fmt2%c, ", pebs_type);
> +                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_v2);
> +                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_v2;
> +                       break;
> +
>                 default:
>                         printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
>                         x86_pmu.pebs = 0;
> --
> 1.7.7.6
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 02/12] perf, x86: Basic Haswell PMU support v2
  2013-01-25 22:00 ` [PATCH 02/12] perf, x86: Basic Haswell PMU support v2 Andi Kleen
@ 2013-01-28 15:34   ` Stephane Eranian
  2013-01-28 16:16     ` Andi Kleen
  0 siblings, 1 reply; 36+ messages in thread
From: Stephane Eranian @ 2013-01-28 15:34 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Fri, Jan 25, 2013 at 11:00 PM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Add basic Haswell PMU support.
>
> Similar to SandyBridge, but has a few new events. Further
> differences are handled in followon patches.
>
> There are some new counter flags that need to be prevented
> from being set on fixed counters.
>
> Contains fixes from Stephane Eranian
>
> v2: Folded TSX bits into standard FIXED_EVENT_CONSTRAINTS
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  arch/x86/include/asm/perf_event.h      |    3 +++
>  arch/x86/kernel/cpu/perf_event.h       |    5 ++++-
>  arch/x86/kernel/cpu/perf_event_intel.c |   29 +++++++++++++++++++++++++++++
>  3 files changed, 36 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 4fabcdf..4003bb6 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -29,6 +29,9 @@
>  #define ARCH_PERFMON_EVENTSEL_INV                      (1ULL << 23)
>  #define ARCH_PERFMON_EVENTSEL_CMASK                    0xFF000000ULL
>
> +#define HSW_INTX                                       (1ULL << 32)
> +#define HSW_INTX_CHECKPOINTED                          (1ULL << 33)
> +
>  #define AMD_PERFMON_EVENTSEL_GUESTONLY                 (1ULL << 40)
>  #define AMD_PERFMON_EVENTSEL_HOSTONLY                  (1ULL << 41)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index 115c1ea..8941899 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -219,11 +219,14 @@ struct cpu_hw_events {
>   *  - inv
>   *  - edge
>   *  - cnt-mask
> + *  - intx
> + *  - intx_cp
>   *  The other filters are supported by fixed counters.
>   *  The any-thread option is supported starting with v3.
>   */
> +#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_INTX|HSW_INTX_CHECKPOINTED)
>  #define FIXED_EVENT_CONSTRAINT(c, n)   \
> -       EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
> +       EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
>
>  /*
>   * Constraint on the Event code + UMask
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 93b9e11..3a08534 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -133,6 +133,17 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
>         EVENT_EXTRA_END
>  };
>
> +static struct event_constraint intel_hsw_event_constraints[] =
> +{
> +       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> +       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> +       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
> +       INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */

You're covering the entire event here, so comment should be: L1D_PEND_MISS.*

> +       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
> +       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> +       EVENT_CONSTRAINT_END

You are missing constraint on the following public events:
- CYCLE_ACTIVITY (0xa3)

> +};
> +
>  static u64 intel_pmu_event_map(int hw_event)
>  {
>         return intel_perfmon_event_map[hw_event];
> @@ -2107,6 +2118,24 @@ __init int intel_pmu_init(void)
>                 break;
>
>
> +       case 60: /* Haswell Client */
> +       case 70:
> +       case 71:
> +               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
> +                      sizeof(hw_cache_event_ids));
> +
> +               intel_pmu_lbr_init_nhm();
> +
I suspect this one should be intel_pmu_lbr_init_snb(), otherwise
you inherit the broken filter workarounds.

> +               x86_pmu.event_constraints = intel_hsw_event_constraints;
> +
> +               x86_pmu.extra_regs = intel_snb_extra_regs;
> +               /* all extra regs are per-cpu when HT is on */
> +               x86_pmu.er_flags |= ERF_HAS_RSP_1;
> +               x86_pmu.er_flags |= ERF_NO_HT_SHARING;
> +
> +               pr_cont("Haswell events, ");
> +               break;
> +
>         default:
>                 switch (x86_pmu.version) {
>                 case 1:
> --
> 1.7.7.6
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 03/12] perf, x86: Basic Haswell PEBS support v3
  2013-01-25 22:00 ` [PATCH 03/12] perf, x86: Basic Haswell PEBS support v3 Andi Kleen
@ 2013-01-28 15:56   ` Stephane Eranian
  0 siblings, 0 replies; 36+ messages in thread
From: Stephane Eranian @ 2013-01-28 15:56 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Fri, Jan 25, 2013 at 11:00 PM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Add basic PEBS support for Haswell.
> The constraints are similar to SandyBridge with a few new events.
>
> v2: Readd missing pebs_aliases
> v3: Readd missing hunk. Fix some constraints.
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event.h          |    2 ++
>  arch/x86/kernel/cpu/perf_event_intel.c    |    6 ++++--
>  arch/x86/kernel/cpu/perf_event_intel_ds.c |   29 +++++++++++++++++++++++++++++
>  3 files changed, 35 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index 8941899..1567b0d 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -596,6 +596,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
>
>  extern struct event_constraint intel_ivb_pebs_event_constraints[];
>
> +extern struct event_constraint intel_hsw_pebs_event_constraints[];
> +
>  struct event_constraint *intel_pebs_constraints(struct perf_event *event);
>
>  void intel_pmu_pebs_enable(struct perf_event *event);
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 3a08534..634f639 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -826,7 +826,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
>                 return true;
>
>         /* implicit branch sampling to correct PEBS skid */
> -       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
> +       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
> +           x86_pmu.intel_cap.pebs_format < 2)
>                 return true;
>
>         return false;
> @@ -2127,8 +2128,9 @@ __init int intel_pmu_init(void)
>                 intel_pmu_lbr_init_nhm();
>
>                 x86_pmu.event_constraints = intel_hsw_event_constraints;
> -
> +               x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
>                 x86_pmu.extra_regs = intel_snb_extra_regs;
> +               x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
>                 /* all extra regs are per-cpu when HT is on */
>                 x86_pmu.er_flags |= ERF_HAS_RSP_1;
>                 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 9d0dae0..16d7c58 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -427,6 +427,35 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
>          EVENT_CONSTRAINT_END
>  };
>
> +struct event_constraint intel_hsw_pebs_event_constraints[] = {
> +       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> +       INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> +       INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> +       INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
> +       INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
> +       INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
> +       INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
> +       INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
> +       INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
> +       INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
> +       INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
> +       INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
> +       INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
> +       INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
> +       INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
> +       INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
> +       INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
> +       INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
> +       INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
> +       INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
> +       INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
> +       INTEL_UEVENT_CONSTRAINT(0x02d3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */

Should be 0x01d3 here.

> +       INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
> +       INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
> +
> +       EVENT_CONSTRAINT_END
> +};
> +
>  struct event_constraint *intel_pebs_constraints(struct perf_event *event)
>  {
>         struct event_constraint *c;
> --
> 1.7.7.6
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 01/12] perf, x86: Add PEBSv2 record support
  2013-01-28 13:15   ` Stephane Eranian
@ 2013-01-28 16:10     ` Andi Kleen
  0 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-28 16:10 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

> I think you should call this: PEBS v3 and not v2.

v2 is the version number in the enumeration register.
It would be confusing to use a different numbering than Intel does.
That's also the numbering that is printed in dmesg.

> We already have 2 existing formats: core and nhm.
> You're adding the hsw format.
> 
> So either you add it as struct pebs_record_hsw or you
> cleanup the naming of all the structs and call them:

core is v0.

> 
> struct pebs_record_v1 /* core */
> struct pebs_record_v2 /* nhm, snb, ivb */
> struct pebs_record_v3 /* hsw */
> 
> That would seem more consistent to me.

I personally don't see a lot of need for a rename, but I can send 
a followon patch to rename nhm to v1 and core to v0. I will do that
separately because people are already complaining that the patchkit
is too big, and it doesn't really affect the functionality.

-Andi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 02/12] perf, x86: Basic Haswell PMU support v2
  2013-01-28 15:34   ` Stephane Eranian
@ 2013-01-28 16:16     ` Andi Kleen
  0 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-28 16:16 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

> > +static struct event_constraint intel_hsw_event_constraints[] =
> > +{
> > +       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> > +       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> > +       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
> > +       INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
> 
> You're covering the entire event here, so comment should be: L1D_PEND_MISS.*

Ok.

> 
> > +       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
> > +       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> > +       EVENT_CONSTRAINT_END
> 
> You are missing constraint on the following public events:
> - CYCLE_ACTIVITY (0xa3)

AFAIK that's not a PEBS event.

> 
> > +};
> > +
> >  static u64 intel_pmu_event_map(int hw_event)
> >  {
> >         return intel_perfmon_event_map[hw_event];
> > @@ -2107,6 +2118,24 @@ __init int intel_pmu_init(void)
> >                 break;
> >
> >
> > +       case 60: /* Haswell Client */
> > +       case 70:
> > +       case 71:
> > +               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
> > +                      sizeof(hw_cache_event_ids));
> > +
> > +               intel_pmu_lbr_init_nhm();
> > +
> I suspect this one should be intel_pmu_lbr_init_snb(), otherwise
> you inherit the broken filter workarounds.

Good point.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2
  2013-01-25 22:00 ` [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2 Andi Kleen
  2013-01-26 11:54   ` Ingo Molnar
@ 2013-01-28 16:52   ` Stephane Eranian
  2013-01-28 17:37     ` Andi Kleen
  1 sibling, 1 reply; 36+ messages in thread
From: Stephane Eranian @ 2013-01-28 16:52 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Fri, Jan 25, 2013 at 11:00 PM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Implement the TSX transaction and checkpointed transaction qualifiers for
> Haswell. This allows e.g. to profile the number of cycles in transactions.
>
> The checkpointed qualifier requires forcing the event to
> counter 2, implement this with a custom constraint for Haswell.
>
> Also add sysfs format attributes for intx/intx_cp
>
> [Updated from earlier version that used generic attributes, now does
> raw + sysfs formats]
> v2: Moved bad hunk. Forbid some bad combinations.
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event_intel.c |   61 ++++++++++++++++++++++++++++++++
>  1 files changed, 61 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 634f639..44e18c02 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -13,6 +13,7 @@
>  #include <linux/slab.h>
>  #include <linux/export.h>
>
> +#include <asm/cpufeature.h>
>  #include <asm/hardirq.h>
>  #include <asm/apic.h>
>
> @@ -1597,6 +1598,44 @@ static void core_pmu_enable_all(int added)
>         }
>  }
>
> +static int hsw_hw_config(struct perf_event *event)
> +{
> +       int ret = intel_pmu_hw_config(event);
> +
> +       if (ret)
> +               return ret;
> +       if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
> +               return 0;
> +       event->hw.config |= event->attr.config & (HSW_INTX|HSW_INTX_CHECKPOINTED);
> +
> +       /*
White space damage here

> +        * INTX/INTX-CP do not play well with PEBS or ANY thread mode.
> +        */
> +       if ((event->hw.config & (HSW_INTX|HSW_INTX_CHECKPOINTED)) &&
> +            ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
> +             event->attr.precise_ip > 0))
> +               return -EIO;

Shouldn't this be -EOPNOTSUPP instead to be more consistent with how
similar issues
are handled elsewhere in the code?

> +       return 0;
> +}
> +
> +static struct event_constraint counter2_constraint =

White space damage here too.

> +                       EVENT_CONSTRAINT(0, 0x4, 0);
> +
> +static struct event_constraint *
> +hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> +       struct event_constraint *c = intel_get_event_constraints(cpuc, event);
> +
> +       /* Handle special quirk on intx_checkpointed only in counter 2 */
> +       if (event->hw.config & HSW_INTX_CHECKPOINTED) {
> +               if (c->idxmsk64 & (1U << 2))
> +                       return &counter2_constraint;
> +               return &emptyconstraint;
> +       }
> +
> +       return c;
> +}
> +
>  PMU_FORMAT_ATTR(event, "config:0-7"    );
>  PMU_FORMAT_ATTR(umask, "config:8-15"   );
>  PMU_FORMAT_ATTR(edge,  "config:18"     );
> @@ -1604,6 +1643,8 @@ PMU_FORMAT_ATTR(pc,       "config:19"     );
>  PMU_FORMAT_ATTR(any,   "config:21"     ); /* v3 + */
>  PMU_FORMAT_ATTR(inv,   "config:23"     );
>  PMU_FORMAT_ATTR(cmask, "config:24-31"  );
> +PMU_FORMAT_ATTR(intx,  "config:32"     );
> +PMU_FORMAT_ATTR(intx_cp,"config:33"    );
>
I have a problem with this for fixed counter only events:

$ perf stat -e cpu/event=0x00,umask=3,intx=1/,cycles noploop 1

This silently drops intx=1 for unhalted_ref_cycles. This event
only works on fixed counter. So I am wondering what I am measuring
here. In general, I don't like when settings are silently dropped because
the user will be misled into thinking the modifier was applied when in
fact it was not.

The way it's handled with the other modifiers, such as inv, cmask
for this event, is that the event can never be scheduled, time_running=0.
For instance, on my Nehalem:

$ perf stat -v -e cpu/event=0x00,umask=3,inv/,cycles noploop 1
noploop for 1 seconds
cpu/event=0x00,umask=3,inv/: 0 1000362571 1000362571
cycles: 3075425423 1000362571 1000362571

 Performance counter stats for 'noploop 1':

                 0 cpu/event=0x00,umask=3,inv/
        3075425423 cycles                    #    0.000 GHz

       1.027822053 seconds time elapsed

For some reasons, I noticed the results for this command are different
on Core:
$ perf stat -v -e cpu/event=0x00,umask=3,inv/,cycles noploop 1
noploop for 1 seconds
cpu/event=0x00,umask=3,inv/: 9561572340 1000036603 1000036603
cycles: 2390392858 1000036603 1000036603

 Performance counter stats for 'noploop 1':

        9561572340 cpu/event=0x00,umask=3,inv/
        2390392858 cycles                    #    0.000 GHz

       1.017143853 seconds time elapsed

Should not have worked. So there is another discrepancy
here that needs to be looked at. This is beyond the scope
of your patch.

>  static struct attribute *intel_arch_formats_attr[] = {
>         &format_attr_event.attr,
> @@ -1761,6 +1802,23 @@ static struct attribute *intel_arch3_formats_attr[] = {
>         NULL,
>  };
>
> +/* Arch3 + TSX support */
> +static struct attribute *intel_hsw_formats_attr[] __read_mostly = {
> +       &format_attr_event.attr,
> +       &format_attr_umask.attr,
> +       &format_attr_edge.attr,
> +       &format_attr_pc.attr,
> +       &format_attr_any.attr,
> +       &format_attr_inv.attr,
> +       &format_attr_cmask.attr,
> +       &format_attr_intx.attr,
> +       &format_attr_intx_cp.attr,
> +
> +       &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
> +       NULL,
> +};
> +
> +
>  static __initconst const struct x86_pmu intel_pmu = {
>         .name                   = "Intel",
>         .handle_irq             = intel_pmu_handle_irq,
> @@ -2135,6 +2193,9 @@ __init int intel_pmu_init(void)
>                 x86_pmu.er_flags |= ERF_HAS_RSP_1;
>                 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
>
> +               x86_pmu.hw_config = hsw_hw_config;
> +               x86_pmu.get_event_constraints = hsw_get_event_constraints;
> +               x86_pmu.format_attrs = intel_hsw_formats_attr;
>                 pr_cont("Haswell events, ");
>                 break;
>
> --
> 1.7.7.6
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2
  2013-01-28 16:52   ` Stephane Eranian
@ 2013-01-28 17:37     ` Andi Kleen
  0 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-28 17:37 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

> This silently drops intx=1 for unhalted_ref_cycles. This event
> only works on fixed counter. So I am wondering what I am measuring
> here. In general, I don't like when settings are silently dropped because
> the user will be misled into thinking the modifier was applied when in
> fact it was not.

Ok. I can look at it later, but I don't think it's a show stopper.

Error handling in general was never a strength of perf.

-Andi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 05/12] perf, x86: Support Haswell v4 LBR format
  2013-01-25 22:00 ` [PATCH 05/12] perf, x86: Support Haswell v4 LBR format Andi Kleen
@ 2013-01-28 21:47   ` Stephane Eranian
  2013-01-28 22:08     ` Andi Kleen
  0 siblings, 1 reply; 36+ messages in thread
From: Stephane Eranian @ 2013-01-28 21:47 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Fri, Jan 25, 2013 at 11:00 PM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Haswell has two additional LBR from flags for TSX: intx and abort, implemented
> as a new v4 version of the LBR format.
>
> Handle those in and adjust the sign extension code to still correctly extend.
> The flags are exported similarly in the LBR record to the existing misprediction
> flag
>
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event_intel_lbr.c |   18 +++++++++++++++---
>  include/linux/perf_event.h                 |    7 ++++++-
>  2 files changed, 21 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> index da02e9c..2af6695b 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> @@ -12,6 +12,7 @@ enum {
>         LBR_FORMAT_LIP          = 0x01,
>         LBR_FORMAT_EIP          = 0x02,
>         LBR_FORMAT_EIP_FLAGS    = 0x03,
> +       LBR_FORMAT_EIP_FLAGS2   = 0x04,
>  };
>
>  /*
> @@ -56,6 +57,8 @@ enum {
>          LBR_FAR)
>
>  #define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
> +#define LBR_FROM_FLAG_INTX     (1ULL << 62)
> +#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
>
>  #define for_each_branch_sample_type(x) \
>         for ((x) = PERF_SAMPLE_BRANCH_USER; \
> @@ -270,21 +273,30 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
>
>         for (i = 0; i < x86_pmu.lbr_nr; i++) {
>                 unsigned long lbr_idx = (tos - i) & mask;
> -               u64 from, to, mis = 0, pred = 0;
> +               u64 from, to, mis = 0, pred = 0, intx = 0, abort = 0;
>
>                 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
>                 rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
>
> -               if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
> +               if (lbr_format == LBR_FORMAT_EIP_FLAGS ||
> +                   lbr_format == LBR_FORMAT_EIP_FLAGS2) {
>                         mis = !!(from & LBR_FROM_FLAG_MISPRED);
>                         pred = !mis;
> -                       from = (u64)((((s64)from) << 1) >> 1);
> +                       if (lbr_format == LBR_FORMAT_EIP_FLAGS)
> +                               from = (u64)((((s64)from) << 1) >> 1);
> +                       else if (lbr_format == LBR_FORMAT_EIP_FLAGS2) {
> +                               intx = !!(from & LBR_FROM_FLAG_INTX);
> +                               abort = !!(from & LBR_FROM_FLAG_ABORT);
> +                               from = (u64)((((s64)from) << 3) >> 3);
> +                       }
>                 }
>
Wouldn't all that be more readable with a switch-case, especially given
that lbr_format could be extended.


>                 cpuc->lbr_entries[i].from       = from;
>                 cpuc->lbr_entries[i].to         = to;
>                 cpuc->lbr_entries[i].mispred    = mis;
>                 cpuc->lbr_entries[i].predicted  = pred;
> +               cpuc->lbr_entries[i].intx       = intx;
> +               cpuc->lbr_entries[i].abort      = abort;
>                 cpuc->lbr_entries[i].reserved   = 0;
>         }
>         cpuc->lbr_stack.nr = i;
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 6bfb2faa..91052e1 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -74,13 +74,18 @@ struct perf_raw_record {
>   *
>   * support for mispred, predicted is optional. In case it
>   * is not supported mispred = predicted = 0.
> + *
> + *     intx: running in a hardware transaction
> + *     abort: aborting a hardware transaction
>   */
>  struct perf_branch_entry {
>         __u64   from;
>         __u64   to;
>         __u64   mispred:1,  /* target mispredicted */
>                 predicted:1,/* target predicted */
> -               reserved:62;
> +               intx:1,     /* in transaction */
> +               abort:1,    /* transaction abort */
> +               reserved:60;
>  };
>
>  /*
> --
> 1.7.7.6
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 05/12] perf, x86: Support Haswell v4 LBR format
  2013-01-28 21:47   ` Stephane Eranian
@ 2013-01-28 22:08     ` Andi Kleen
  2013-01-28 22:20       ` Stephane Eranian
  0 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2013-01-28 22:08 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

> > -               if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
> > +               if (lbr_format == LBR_FORMAT_EIP_FLAGS ||
> > +                   lbr_format == LBR_FORMAT_EIP_FLAGS2) {
> >                         mis = !!(from & LBR_FROM_FLAG_MISPRED);
> >                         pred = !mis;
> > -                       from = (u64)((((s64)from) << 1) >> 1);
> > +                       if (lbr_format == LBR_FORMAT_EIP_FLAGS)
> > +                               from = (u64)((((s64)from) << 1) >> 1);
> > +                       else if (lbr_format == LBR_FORMAT_EIP_FLAGS2) {
> > +                               intx = !!(from & LBR_FROM_FLAG_INTX);
> > +                               abort = !!(from & LBR_FROM_FLAG_ABORT);
> > +                               from = (u64)((((s64)from) << 3) >> 3);
> > +                       }
> >                 }
> >
> Wouldn't all that be more readable with a switch-case, especially given
> that lbr_format could be extended.

The current version works for me.

-Andi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 05/12] perf, x86: Support Haswell v4 LBR format
  2013-01-28 22:08     ` Andi Kleen
@ 2013-01-28 22:20       ` Stephane Eranian
  0 siblings, 0 replies; 36+ messages in thread
From: Stephane Eranian @ 2013-01-28 22:20 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Mon, Jan 28, 2013 at 11:08 PM, Andi Kleen <andi@firstfloor.org> wrote:
>> > -               if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
>> > +               if (lbr_format == LBR_FORMAT_EIP_FLAGS ||
>> > +                   lbr_format == LBR_FORMAT_EIP_FLAGS2) {
>> >                         mis = !!(from & LBR_FROM_FLAG_MISPRED);
>> >                         pred = !mis;
>> > -                       from = (u64)((((s64)from) << 1) >> 1);
>> > +                       if (lbr_format == LBR_FORMAT_EIP_FLAGS)
>> > +                               from = (u64)((((s64)from) << 1) >> 1);
>> > +                       else if (lbr_format == LBR_FORMAT_EIP_FLAGS2) {
>> > +                               intx = !!(from & LBR_FROM_FLAG_INTX);
>> > +                               abort = !!(from & LBR_FROM_FLAG_ABORT);
>> > +                               from = (u64)((((s64)from) << 3) >> 3);
>> > +                       }
>> >                 }
>> >
>> Wouldn't all that be more readable with a switch-case, especially given
>> that lbr_format could be extended.
>
> The current version works for me.

I know it works. I am questioning the style: hard to read.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3
  2013-01-25 22:00 ` [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3 Andi Kleen
@ 2013-01-28 22:32   ` Stephane Eranian
  2013-01-28 23:16     ` Andi Kleen
  0 siblings, 1 reply; 36+ messages in thread
From: Stephane Eranian @ 2013-01-28 22:32 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Fri, Jan 25, 2013 at 11:00 PM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> With checkpointed counters there can be a situation where the counter
> is overflowing, aborts the transaction, is set back to a non overflowing
> checkpoint, causes interupt. The interrupt doesn't see the overflow
> because it has been checkpointed.  This is then a spurious PMI, typically with a
> ugly NMI message.  It can also lead to excessive aborts.
>
> Avoid this problem by:
> - Using the full counter width for counting counters (previous patch)
> - Forbid sampling for checkpointed counters. It's not too useful anyways,
> checkpointing is mainly for counting.
> - On a PMI always set back checkpointed counters to zero.
>
> v2: Add unlikely. Add comment
> v3: Allow large sampling periods with CP for KVM
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event_intel.c |   34 ++++++++++++++++++++++++++++++++
>  1 files changed, 34 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index bc21bce..9b4dda5 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1079,6 +1079,17 @@ static void intel_pmu_enable_event(struct perf_event *event)
>  int intel_pmu_save_and_restart(struct perf_event *event)
>  {
>         x86_perf_event_update(event);
> +       /*
> +        * For a checkpointed counter always reset back to 0.  This
> +        * avoids a situation where the counter overflows, aborts the
> +        * transaction and is then set back to shortly before the
> +        * overflow, and overflows and aborts again.
> +        */
> +       if (unlikely(event->hw.config & HSW_INTX_CHECKPOINTED)) {
> +               /* No race with NMIs because the counter should not be armed */
> +               wrmsrl(event->hw.event_base, 0);
> +               local64_set(&event->hw.prev_count, 0);
> +       }
>         return x86_perf_event_set_period(event);
>  }
>
> @@ -1162,6 +1173,15 @@ again:
>                 x86_pmu.drain_pebs(regs);
>         }
>
> +       /*
> +        * To avoid spurious interrupts with perf stat always reset checkpointed
> +        * counters.
> +        *
> +        * XXX move somewhere else.
> +        */
> +       if (cpuc->events[2] && (cpuc->events[2]->hw.config & HSW_INTX_CHECKPOINTED))
> +               status |= (1ULL << 2);
> +
I don't buy really this workaround. You are assuming you're always
measuring INTC_CHECKPOINTED
event by itself. So what if you get into the handler because of an PMI
due to an overflow
of another counter which is active at the same time as counter2?
You're going to artificially
add an overflow to counter2. Unless you're enforcing only counter2 in use.

I understand what you are trying to do, but looks to me something is
missing in HW.
The counter is reinstated to its state before the critical section but
the PMI cannot be
cancelled and there is no state left behind to tell what to do with it.

Also I think the code would gain in readability if you were to define
a inline function:

static inline bool is_event_intx_cp(struct perf_event *event)
{
   return event && (event->hw.config & HSW_INTX_CHECKPOINTED);
}


>         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
>                 struct perf_event *event = cpuc->events[bit];
>
> @@ -1615,6 +1635,20 @@ static int hsw_hw_config(struct perf_event *event)
>              ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
>               event->attr.precise_ip > 0))
>                 return -EIO;
> +       if (event->hw.config & HSW_INTX_CHECKPOINTED) {
> +               /*
> +                * Sampling of checkpointed events can cause situations where
> +                * the CPU constantly aborts because of a overflow, which is
> +                * then checkpointed back and ignored. Forbid checkpointing
> +                * for sampling.
> +                *
> +                * But still allow a long sampling period, so that perf stat
> +                * from KVM works.
> +                */

What has perf stat have to do with sample_period?

> +               if (event->attr.sample_period > 0 &&
> +                   event->attr.sample_period < 0x7fffffff)
> +                       return -EIO;
> +       }
same comment about -EIO vs. EOPNOTSUPP. sample_period is u64
so, it's always >= 0. Where does this 31-bit limit come from? Experimentation?
Could be written:
      if (event->attr.sample_period && event->attr.sample_period < 0x7fffffff)

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3
  2013-01-28 22:32   ` Stephane Eranian
@ 2013-01-28 23:16     ` Andi Kleen
  2013-01-29  0:30       ` Stephane Eranian
  0 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2013-01-28 23:16 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

> I don't buy really this workaround. You are assuming you're always
> measuring INTC_CHECKPOINTED
> event by itself. 

There's no such assumption.

> So what if you get into the handler because of an PMI
> due to an overflow
> of another counter which is active at the same time as counter2?
> You're going to artificially
> add an overflow to counter2. Unless you're enforcing only counter2 in use.

All the code does it to always check the counter. There's no
"overflow added". For counting it may be set back and accumulated 
a bit earlier than normal, but that's no problem. This will only
happen for a checkpointed counter 2, not for anything else.

> The counter is reinstated to its state before the critical section but
> the PMI cannot be
> cancelled and there is no state left behind to tell what to do with it.

The PMI is effectively spurious, but we use it to set back. Don't know 
what you mean with "cancel". It already happened of course.


> static inline bool is_event_intx_cp(struct perf_event *event)
> {
>    return event && (event->hw.config & HSW_INTX_CHECKPOINTED);
> }

They both look the same to me.
> 
> 
> >         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
> >                 struct perf_event *event = cpuc->events[bit];
> >
> > @@ -1615,6 +1635,20 @@ static int hsw_hw_config(struct perf_event *event)
> >              ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
> >               event->attr.precise_ip > 0))
> >                 return -EIO;
> > +       if (event->hw.config & HSW_INTX_CHECKPOINTED) {
> > +               /*
> > +                * Sampling of checkpointed events can cause situations where
> > +                * the CPU constantly aborts because of a overflow, which is
> > +                * then checkpointed back and ignored. Forbid checkpointing
> > +                * for sampling.
> > +                *
> > +                * But still allow a long sampling period, so that perf stat
> > +                * from KVM works.
> > +                */
> 
> What has perf stat have to do with sample_period?

It always uses a period to accumulate in a larger counter as you probably know.
Also with the other code we only allow checkpoint with stat.


> 
> > +               if (event->attr.sample_period > 0 &&
> > +                   event->attr.sample_period < 0x7fffffff)
> > +                       return -EIO;
> > +       }
> same comment about -EIO vs. EOPNOTSUPP. sample_period is u64
> so, it's always >= 0. Where does this 31-bit limit come from? 

That's what perf stat uses when running in the KVM guest.

> Experimentation?

The code does > 0, not >= 0

> Could be written:
>       if (event->attr.sample_period && event->attr.sample_period < 0x7fffffff)

That's 100% equivalent to what I wrote.

I can change the error value.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3
  2013-01-28 23:16     ` Andi Kleen
@ 2013-01-29  0:30       ` Stephane Eranian
  2013-01-29  1:00         ` Andi Kleen
  0 siblings, 1 reply; 36+ messages in thread
From: Stephane Eranian @ 2013-01-29  0:30 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Tue, Jan 29, 2013 at 12:16 AM, Andi Kleen <andi@firstfloor.org> wrote:
>> I don't buy really this workaround. You are assuming you're always
>> measuring INTC_CHECKPOINTED
>> event by itself.
>
> There's no such assumption.
>
>> So what if you get into the handler because of an PMI
>> due to an overflow
>> of another counter which is active at the same time as counter2?
>> You're going to artificially
>> add an overflow to counter2. Unless you're enforcing only counter2 in use.
>
> All the code does it to always check the counter. There's no
> "overflow added". For counting it may be set back and accumulated
> a bit earlier than normal, but that's no problem. This will only
> happen for a checkpointed counter 2, not for anything else.
>
Ok, you're right. I misunderstood the point of the check. Yes, it systematically
adds INTX_CP to the list of events to check. That does not mean it will detect
an overflow.

>> The counter is reinstated to its state before the critical section but
>> the PMI cannot be
>> cancelled and there is no state left behind to tell what to do with it.
>
> The PMI is effectively spurious, but we use it to set back. Don't know
> what you mean with "cancel". It already happened of course.
>
But when you do this, it seems you making INT_CP events unusable
for sampling, because you're resetting their value under the cover.
So what happens when you sample, especially with a fixed period?

>
>> static inline bool is_event_intx_cp(struct perf_event *event)
>> {
>>    return event && (event->hw.config & HSW_INTX_CHECKPOINTED);
>> }
>
> They both look the same to me.
>>
I think you understand what I meant by this. You substitue all the
long checks by the inline. It does not change anything to the code, it
makes is easier to read and avoid long lines.

>>
>> >         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
>> >                 struct perf_event *event = cpuc->events[bit];
>> >
>> > @@ -1615,6 +1635,20 @@ static int hsw_hw_config(struct perf_event *event)
>> >              ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
>> >               event->attr.precise_ip > 0))
>> >                 return -EIO;
>> > +       if (event->hw.config & HSW_INTX_CHECKPOINTED) {
>> > +               /*
>> > +                * Sampling of checkpointed events can cause situations where
>> > +                * the CPU constantly aborts because of a overflow, which is
>> > +                * then checkpointed back and ignored. Forbid checkpointing
>> > +                * for sampling.
>> > +                *
>> > +                * But still allow a long sampling period, so that perf stat
>> > +                * from KVM works.
>> > +                */
>>
>> What has perf stat have to do with sample_period?
>
> It always uses a period to accumulate in a larger counter as you probably know.
> Also with the other code we only allow checkpoint with stat.
>
Yes, I know.

>
>>
>> > +               if (event->attr.sample_period > 0 &&
>> > +                   event->attr.sample_period < 0x7fffffff)
>> > +                       return -EIO;
>> > +       }
Explain the 0x7fffffff to me? Is that the max period set by default when you
just count?


>> same comment about -EIO vs. EOPNOTSUPP. sample_period is u64
>> so, it's always >= 0. Where does this 31-bit limit come from?
>
> That's what perf stat uses when running in the KVM guest.
>
>> Experimentation?
>
> The code does > 0, not >= 0
>
>> Could be written:
>>       if (event->attr.sample_period && event->attr.sample_period < 0x7fffffff)
>
> That's 100% equivalent to what I wrote.
>
I know.
Usually when I see x > 0, I interpret as to mean the field could be negative.
That's what I was trying to say. However, here we know it cannot be. No
big deal.

> I can change the error value.

Ok.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3
  2013-01-29  0:30       ` Stephane Eranian
@ 2013-01-29  1:00         ` Andi Kleen
  2013-01-30  8:51           ` Stephane Eranian
  0 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2013-01-29  1:00 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Tue, Jan 29, 2013 at 01:30:19AM +0100, Stephane Eranian wrote:
> >> The counter is reinstated to its state before the critical section but
> >> the PMI cannot be
> >> cancelled and there is no state left behind to tell what to do with it.
> >
> > The PMI is effectively spurious, but we use it to set back. Don't know
> > what you mean with "cancel". It already happened of course.
> >
> But when you do this, it seems you making INT_CP events unusable
> for sampling, because you're resetting their value under the cover.
> So what happens when you sample, especially with a fixed period?

Sampling is forbidden for checkpointed events, the setup code
enforces that. It's unlikely to be useful anyways.

The main use case for checkpointing is perf stat -T and related
counting usages.

> >>
> >> > +               if (event->attr.sample_period > 0 &&
> >> > +                   event->attr.sample_period < 0x7fffffff)
> >> > +                       return -EIO;
> >> > +       }
> Explain the 0x7fffffff to me? Is that the max period set by default when you
> just count?

Originally I had just > 0, but then I found that perf stat from the
guest doesn't work anymore because it sets an very high overflow
to accumulate counters.

The 0x7fffffff is a somewhat arbitary threshold to detect this case.


-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3
  2013-01-29  1:00         ` Andi Kleen
@ 2013-01-30  8:51           ` Stephane Eranian
  2013-01-30 20:58             ` Andi Kleen
  0 siblings, 1 reply; 36+ messages in thread
From: Stephane Eranian @ 2013-01-30  8:51 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Tue, Jan 29, 2013 at 2:00 AM, Andi Kleen <andi@firstfloor.org> wrote:
>
> On Tue, Jan 29, 2013 at 01:30:19AM +0100, Stephane Eranian wrote:
> > >> The counter is reinstated to its state before the critical section but
> > >> the PMI cannot be
> > >> cancelled and there is no state left behind to tell what to do with it.
> > >
> > > The PMI is effectively spurious, but we use it to set back. Don't know
> > > what you mean with "cancel". It already happened of course.
> > >
> > But when you do this, it seems you making INT_CP events unusable
> > for sampling, because you're resetting their value under the cover.
> > So what happens when you sample, especially with a fixed period?
>
> Sampling is forbidden for checkpointed events, the setup code
> enforces that. It's unlikely to be useful anyways.
>
> The main use case for checkpointing is perf stat -T and related
> counting usages.
>
> > >>
> > >> > +               if (event->attr.sample_period > 0 &&
> > >> > +                   event->attr.sample_period < 0x7fffffff)
> > >> > +                       return -EIO;
> > >> > +       }
> > Explain the 0x7fffffff to me? Is that the max period set by default when you
> > just count?
>
> Originally I had just > 0, but then I found that perf stat from the
> guest doesn't work anymore because it sets an very high overflow
> to accumulate counters.
>
> The 0x7fffffff is a somewhat arbitary threshold to detect this case.
>
When I come in with the default frequency mode, then sample_period
is a frequency and not a period anymore. So I could do:

$ perf record -F 100 -e instructions:intx_cp ....

sample_period = 100; freq=1;

That's a very low sampling rate, yet I think it would be rejected by your code.
But I guess in frequency mode, there is no guarantee on the period. The kernel
may lower the period to 1 in an attempt to achieve the desired rate for the tick
period. And that may cause the spurious interrupts.

But if I come in with frequency 0x7fffffff+1, then that's a very high
frequency, thus
small period, I would pass the test. So I think you need to reinforce the test
for freq=1.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3
  2013-01-30  8:51           ` Stephane Eranian
@ 2013-01-30 20:58             ` Andi Kleen
  0 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-30 20:58 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

> That's a very low sampling rate, yet I think it would be rejected by your code.

You mean allowed? 

> But if I come in with frequency 0x7fffffff+1, then that's a very high
> frequency, thus
> small period, I would pass the test. So I think you need to reinforce the test
> for freq=1.

I'm aware that there are some configs that can slip through, but there's
no other choice if we still allow guest perf stat. The cutoff is
somewhat arbitary.

It's not a correctness problem, just things can be unexpectedly slower
and you may see unexpected NMI messages.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 01/12] perf, x86: Add PEBSv2 record support
  2013-01-25 22:00 ` [PATCH 01/12] perf, x86: Add PEBSv2 record support Andi Kleen
  2013-01-28 13:15   ` Stephane Eranian
@ 2013-01-31 17:15   ` Stephane Eranian
  1 sibling, 0 replies; 36+ messages in thread
From: Stephane Eranian @ 2013-01-31 17:15 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Andi Kleen

On Fri, Jan 25, 2013 at 11:00 PM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Add support for the v2 PEBS format. It has a superset of the v1 PEBS
> fields, but has a longer record so we need to adjust the code paths.
>
> The main advantage is the new "EventingRip" support which directly
> gives the instruction, not off-by-one instruction. So with precise == 2
> we use that directly and don't try to use LBRs and walking basic blocks.
> This lowers the overhead significantly.
>
> Some other features are added in later patches.
>
> Signed-off-by: Andi Kleen <ak@linux.intel.com>

Signed-off-by: Stephane Eranian <eranian@google.com>

> ---
>  arch/x86/kernel/cpu/perf_event.c          |    2 +-
>  arch/x86/kernel/cpu/perf_event_intel_ds.c |  101 ++++++++++++++++++++++-------
>  2 files changed, 79 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 6774c17..c95290a 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -397,7 +397,7 @@ int x86_pmu_hw_config(struct perf_event *event)
>                  * check that PEBS LBR correction does not conflict with
>                  * whatever the user is asking with attr->branch_sample_type
>                  */
> -               if (event->attr.precise_ip > 1) {
> +               if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
>                         u64 *br_type = &event->attr.branch_sample_type;
>
>                         if (has_branch_stack(event)) {
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 826054a..9d0dae0 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -41,6 +41,12 @@ struct pebs_record_nhm {
>         u64 status, dla, dse, lat;
>  };
>
> +struct pebs_record_v2 {
> +       struct pebs_record_nhm nhm;
> +       u64 eventingrip;
> +       u64 tsx_tuning;
> +};
> +
>  void init_debug_store_on_cpu(int cpu)
>  {
>         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
> @@ -559,8 +565,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
>  {
>         /*
>          * We cast to pebs_record_core since that is a subset of
> -        * both formats and we don't use the other fields in this
> -        * routine.
> +        * both formats.
>          */
>         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
>         struct pebs_record_core *pebs = __pebs;
> @@ -588,7 +593,10 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
>         regs.bp = pebs->bp;
>         regs.sp = pebs->sp;
>
> -       if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
> +       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
> +               regs.ip = ((struct pebs_record_v2 *)pebs)->eventingrip;
> +               regs.flags |= PERF_EFLAGS_EXACT;
> +       } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
>                 regs.flags |= PERF_EFLAGS_EXACT;
>         else
>                 regs.flags &= ~PERF_EFLAGS_EXACT;
> @@ -641,35 +649,21 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
>         __intel_pmu_pebs_event(event, iregs, at);
>  }
>
> -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> +static void intel_pmu_drain_pebs_common(struct pt_regs *iregs, void *at,
> +                                       void *top)
>  {
>         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
>         struct debug_store *ds = cpuc->ds;
> -       struct pebs_record_nhm *at, *top;
>         struct perf_event *event = NULL;
>         u64 status = 0;
> -       int bit, n;
> -
> -       if (!x86_pmu.pebs_active)
> -               return;
> -
> -       at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
> -       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
> +       int bit;
>
>         ds->pebs_index = ds->pebs_buffer_base;
>
> -       n = top - at;
> -       if (n <= 0)
> -               return;
> +       for ( ; at < top; at += x86_pmu.pebs_record_size) {
> +               struct pebs_record_nhm *p = at;
>
> -       /*
> -        * Should not happen, we program the threshold at 1 and do not
> -        * set a reset value.
> -        */
> -       WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
> -
> -       for ( ; at < top; at++) {
> -               for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
> +               for_each_set_bit(bit, (unsigned long *)&p->status, x86_pmu.max_pebs_events) {
>                         event = cpuc->events[bit];
>                         if (!test_bit(bit, cpuc->active_mask))
>                                 continue;
> @@ -692,6 +686,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
>         }
>  }
>
> +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
> +{
> +       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +       struct debug_store *ds = cpuc->ds;
> +       struct pebs_record_nhm *at, *top;
> +       int n;
> +
> +       if (!x86_pmu.pebs_active)
> +               return;
> +
> +       at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
> +       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
> +
> +       ds->pebs_index = ds->pebs_buffer_base;
> +
> +       n = top - at;
> +       if (n <= 0)
> +               return;
> +
> +       /*
> +        * Should not happen, we program the threshold at 1 and do not
> +        * set a reset value.
> +        */
> +       WARN_ONCE(n > x86_pmu.max_pebs_events,
> +                 "Unexpected number of pebs records %d\n", n);
> +
> +       return intel_pmu_drain_pebs_common(iregs, at, top);
> +}
> +
> +static void intel_pmu_drain_pebs_v2(struct pt_regs *iregs)
> +{
> +       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +       struct debug_store *ds = cpuc->ds;
> +       struct pebs_record_v2 *at, *top;
> +       int n;
> +
> +       if (!x86_pmu.pebs_active)
> +               return;
> +
> +       at  = (struct pebs_record_v2 *)(unsigned long)ds->pebs_buffer_base;
> +       top = (struct pebs_record_v2 *)(unsigned long)ds->pebs_index;
> +
> +       n = top - at;
> +       if (n <= 0)
> +               return;
> +       /*
> +        * Should not happen, we program the threshold at 1 and do not
> +        * set a reset value.
> +        */
> +       WARN_ONCE(n > x86_pmu.max_pebs_events,
> +                 "Unexpected number of pebs records %d\n", n);
> +
> +       return intel_pmu_drain_pebs_common(iregs, at, top);
> +}
> +
>  /*
>   * BTS, PEBS probe and setup
>   */
> @@ -723,6 +772,12 @@ void intel_ds_init(void)
>                         x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
>                         break;
>
> +               case 2:
> +                       printk(KERN_CONT "PEBS fmt2%c, ", pebs_type);
> +                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_v2);
> +                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_v2;
> +                       break;
> +
>                 default:
>                         printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
>                         x86_pmu.pebs = 0;
> --
> 1.7.7.6
>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Basic perf PMU support for Haswell v1
  2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
                   ` (11 preceding siblings ...)
  2013-01-25 22:00 ` [PATCH 12/12] perf, tools: Add abort_tx,no_tx,in_tx branch filter options to perf record -j v3 Andi Kleen
@ 2013-01-31 17:19 ` Stephane Eranian
  2013-01-31 17:47   ` Andi Kleen
  12 siblings, 1 reply; 36+ messages in thread
From: Stephane Eranian @ 2013-01-31 17:19 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim

Andi,

Are you going to post a new version based on my feedback or do you stay
with what you posted on 1/25?


On Fri, Jan 25, 2013 at 11:00 PM, Andi Kleen <andi@firstfloor.org> wrote:
> This is based on v7 of the full Haswell PMU support, but
> ported to the latest perf/core and stripped down to the "basic support"
> as requested.
>
> I decided to include LBRs in the basic support. These are 4 patches
> self contained at the end, so could be also handled as a separate
> unit if that is preferable.
>
> Contains support for:
> - Basic Haswell PMU and PEBS support
> - Late unmasking of the PMI
> - Support for wide counters
> - New TSX counter flags, exported as sysfs attributes
> - Support for checkpointed counters.
> - New LBR flags, exported as new flags with tools support
>
> Available from
> git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc hsw/pmu4-basics
>
> For more details on the Haswell PMU please see the SDM. For more details on TSX
> please see http://halobates.de/adding-lock-elision-to-linux.pdf
>
> -Andi

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Basic perf PMU support for Haswell v1
  2013-01-31 17:19 ` Basic perf PMU support for Haswell v1 Stephane Eranian
@ 2013-01-31 17:47   ` Andi Kleen
  0 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2013-01-31 17:47 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, Ingo Molnar, LKML, Peter Zijlstra, Andrew Morton,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim

On Thu, Jan 31, 2013 at 06:19:01PM +0100, Stephane Eranian wrote:
> Andi,
> 
> Are you going to post a new version based on my feedback or do you stay
> with what you posted on 1/25?

I'm posting a new version today, already added all changes.

-Andi

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2013-01-31 17:47 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-01-25 22:00 Basic perf PMU support for Haswell v1 Andi Kleen
2013-01-25 22:00 ` [PATCH 01/12] perf, x86: Add PEBSv2 record support Andi Kleen
2013-01-28 13:15   ` Stephane Eranian
2013-01-28 16:10     ` Andi Kleen
2013-01-31 17:15   ` Stephane Eranian
2013-01-25 22:00 ` [PATCH 02/12] perf, x86: Basic Haswell PMU support v2 Andi Kleen
2013-01-28 15:34   ` Stephane Eranian
2013-01-28 16:16     ` Andi Kleen
2013-01-25 22:00 ` [PATCH 03/12] perf, x86: Basic Haswell PEBS support v3 Andi Kleen
2013-01-28 15:56   ` Stephane Eranian
2013-01-25 22:00 ` [PATCH 04/12] perf, x86: Support the TSX intx/intx_cp qualifiers v2 Andi Kleen
2013-01-26 11:54   ` Ingo Molnar
2013-01-26 21:00     ` Andi Kleen
2013-01-27 13:14       ` Ingo Molnar
     [not found]         ` <20130128050234.GQ30577@one.firstfloor.org>
2013-01-28 10:47           ` Ingo Molnar
2013-01-28 16:52   ` Stephane Eranian
2013-01-28 17:37     ` Andi Kleen
2013-01-25 22:00 ` [PATCH 05/12] perf, x86: Support Haswell v4 LBR format Andi Kleen
2013-01-28 21:47   ` Stephane Eranian
2013-01-28 22:08     ` Andi Kleen
2013-01-28 22:20       ` Stephane Eranian
2013-01-25 22:00 ` [PATCH 06/12] perf, x86: Support full width counting Andi Kleen
2013-01-25 22:00 ` [PATCH 07/12] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3 Andi Kleen
2013-01-28 22:32   ` Stephane Eranian
2013-01-28 23:16     ` Andi Kleen
2013-01-29  0:30       ` Stephane Eranian
2013-01-29  1:00         ` Andi Kleen
2013-01-30  8:51           ` Stephane Eranian
2013-01-30 20:58             ` Andi Kleen
2013-01-25 22:00 ` [PATCH 08/12] perf, x86: Move NMI clearing to end of PMI handler after the counter registers are reset Andi Kleen
2013-01-25 22:00 ` [PATCH 09/12] perf, x86: Disable LBR recording for unknown LBR_FMT Andi Kleen
2013-01-25 22:00 ` [PATCH 10/12] perf, x86: Support LBR filtering by INTX/NOTX/ABORT v2 Andi Kleen
2013-01-25 22:00 ` [PATCH 11/12] perf, tools: Support sorting by intx, abort branch flags v2 Andi Kleen
2013-01-25 22:00 ` [PATCH 12/12] perf, tools: Add abort_tx,no_tx,in_tx branch filter options to perf record -j v3 Andi Kleen
2013-01-31 17:19 ` Basic perf PMU support for Haswell v1 Stephane Eranian
2013-01-31 17:47   ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).