All of lore.kernel.org
 help / color / mirror / Atom feed
* (no subject)
@ 2015-05-28  4:13 Andi Kleen
  2015-05-28  4:13 ` [PATCH 1/5] x86, perf: Allow time stamp for free running PEBSv3 Andi Kleen
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Andi Kleen @ 2015-05-28  4:13 UTC (permalink / raw)
  To: peterz; +Cc: acme, linux-kernel, jolsa, eranian

[Repost; I forgot to copy linux-kernel earlier. Apologies if you
see it twice.]

Skylake moved to 32 Last Branch Records, from previously 16. 
The current call stack LBR implementation reads all LBRs and
also saves/restores them on context switch. This patchkit
adds some optimizations to avoid extra costs in most cases
from the larger number of LBRs for call-stack, unless a nesting
larger than 16 is actually needed. It applies on top of the
earlier Skylake code. Some of the optimization will also benefit
earlier CPUs, such as Haswell.

note: one patch is for perf user space, the rest is kernel.

-Andi


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/5] x86, perf: Allow time stamp for free running PEBSv3
  2015-05-28  4:13 Andi Kleen
@ 2015-05-28  4:13 ` Andi Kleen
  2015-08-04  8:56   ` [tip:perf/core] perf/x86/intel/lbr: " tip-bot for Andi Kleen
  2015-05-28  4:13 ` [PATCH 2/5] x86, perf: Add option to disable reading branch flags/cycles Andi Kleen
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2015-05-28  4:13 UTC (permalink / raw)
  To: peterz; +Cc: acme, linux-kernel, jolsa, eranian, Andi Kleen, kan.liang

From: Andi Kleen <ak@linux.intel.com>

With PEBSv3 the PEBS record contains a time stamp. That means we can allow
free-running PEBS without a PMI even if the user program requested a time stamp.
This avoids the need to use -T to get free running PEBS, and also avoids
any problems with mis-identifying MMAPs later.

Move the free_running_flags state into a variable in x86_pmu and use it.
This only works when no explicit clock_id is set.

Cc: kan.liang@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.h          |  1 +
 arch/x86/kernel/cpu/perf_event_intel.c    | 15 ++++++++++++++-
 arch/x86/kernel/cpu/perf_event_intel_ds.c |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index d1388fe..2860b89 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -589,6 +589,7 @@ struct x86_pmu {
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
 	int 		max_pebs_events;
+	unsigned long	free_running_flags;
 
 	/*
 	 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 626e7db..7ab1ba1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2507,6 +2507,15 @@ static void intel_pebs_aliases_snb(struct perf_event *event)
 	}
 }
 
+static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+{
+	unsigned long flags = x86_pmu.free_running_flags;
+
+	if (event->attr.clockid)
+		flags &= ~PERF_SAMPLE_TIME;
+	return flags;
+}
+
 static int intel_pmu_hw_config(struct perf_event *event)
 {
 	int ret = x86_pmu_hw_config(event);
@@ -2517,7 +2526,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	if (event->attr.precise_ip) {
 		if (!event->attr.freq) {
 			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
-			if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
+			if (!(event->attr.sample_type &
+			      ~intel_pmu_free_running_flags(event)))
 				event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
 		}
 		if (x86_pmu.pebs_aliases)
@@ -2954,6 +2964,8 @@ static __initconst const struct x86_pmu core_pmu = {
 	.event_map		= intel_pmu_event_map,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
+	.free_running_flags	= PEBS_FREERUNNING_FLAGS,
+
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32-bit width,
 	 * so we install an artificial 1<<31 period regardless of
@@ -2992,6 +3004,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.event_map		= intel_pmu_event_map,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
+	.free_running_flags	= PEBS_FREERUNNING_FLAGS,
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
 	 * so we install an artificial 1<<31 period regardless of
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index f7f28c3..22f236b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1295,6 +1295,7 @@ void __init intel_ds_init(void)
 			x86_pmu.pebs_record_size =
 						sizeof(struct pebs_record_skl);
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+			x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
 			break;
 
 		default:
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] x86, perf: Add option to disable reading branch flags/cycles
  2015-05-28  4:13 Andi Kleen
  2015-05-28  4:13 ` [PATCH 1/5] x86, perf: Allow time stamp for free running PEBSv3 Andi Kleen
@ 2015-05-28  4:13 ` Andi Kleen
  2015-06-15 10:48   ` Peter Zijlstra
  2015-05-28  4:13 ` [PATCH 3/5] perf, tools: Disable branch flags/cycles for lbr call graph Andi Kleen
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2015-05-28  4:13 UTC (permalink / raw)
  To: peterz; +Cc: acme, linux-kernel, jolsa, eranian, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

With LBRv5 reading the extra LBR flags like mispredict, TSX, cycles
is not free anymore, as it has moved to a separate MSR.

For callstack mode we don't need any of this information; so we can
avoid the unnecessary MSR read. Add flags to the perf interface
where perf record can request not collecting this information.
I added sample_type flags for CYCLES and FLAGS. It's a bit unusual for
sample_types to be negative (disable), not positive (enable), but
since the legacy ABI reported the flags we need some form of explicit
disabling to avoid breaking the ABI. In theory it would be possible
to make CYCLES opt-in (as it's not deployed yet), but I also made it
opt-out to be symmetric to FLAGS.

After we have the flags the x86 perf code can keep track if any
users need the flags. If noone needs it the information is not
collected.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.h           |  2 ++
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 49 ++++++++++++++++++++++--------
 include/uapi/linux/perf_event.h            |  2 ++
 3 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 2860b89..c83bf07 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -201,6 +201,8 @@ struct cpu_hw_events {
 	 * Intel LBR bits
 	 */
 	int				lbr_users;
+	int				lbr_flags_users;
+	int				lbr_cycles_users;
 	void				*lbr_context;
 	struct perf_branch_stack	lbr_stack;
 	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 1fd8b5a..5de2048 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -340,6 +340,10 @@ void intel_pmu_lbr_enable(struct perf_event *event)
 	}
 
 	cpuc->lbr_users++;
+	if (!(event->attr.sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS))
+		cpuc->lbr_flags_users++;
+	if (!(event->attr.sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES))
+		cpuc->lbr_cycles_users++;
 	perf_sched_cb_inc(event->ctx->pmu);
 }
 
@@ -358,7 +362,14 @@ void intel_pmu_lbr_disable(struct perf_event *event)
 	}
 
 	cpuc->lbr_users--;
-	WARN_ON_ONCE(cpuc->lbr_users < 0);
+	if (!(event->attr.sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS))
+		cpuc->lbr_flags_users--;
+	if (!(event->attr.sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES))
+		cpuc->lbr_cycles_users--;
+
+	WARN_ON_ONCE(cpuc->lbr_users < 0 ||
+		     cpuc->lbr_flags_users < 0 ||
+		     cpuc->lbr_cycles_users < 0);
 	perf_sched_cb_dec(event->ctx->pmu);
 
 	if (cpuc->enabled && !cpuc->lbr_users) {
@@ -416,7 +427,9 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  * is the same as the linear address, allowing us to merge the LIP and EIP
  * LBR formats.
  */
-static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc,
+				  bool need_flags,
+				  bool need_cycles)
 {
 	unsigned long mask = x86_pmu.lbr_nr - 1;
 	int lbr_format = x86_pmu.intel_cap.lbr_format;
@@ -434,24 +447,32 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
 		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
 
-		if (lbr_format == LBR_FORMAT_INFO) {
+		if (lbr_format == LBR_FORMAT_INFO &&
+		    (need_flags || need_cycles)) {
 			u64 info;
 
 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
-			mis = !!(info & LBR_INFO_MISPRED);
-			pred = !mis;
-			in_tx = !!(info & LBR_INFO_IN_TX);
-			abort = !!(info & LBR_INFO_ABORT);
-			cycles = (info & LBR_INFO_CYCLES);
+			if (need_flags) {
+				mis = !!(info & LBR_INFO_MISPRED);
+				pred = !mis;
+				in_tx = !!(info & LBR_INFO_IN_TX);
+				abort = !!(info & LBR_INFO_ABORT);
+			}
+			if (need_cycles)
+				cycles = (info & LBR_INFO_CYCLES);
 		}
 		if (lbr_flags & LBR_EIP_FLAGS) {
-			mis = !!(from & LBR_FROM_FLAG_MISPRED);
-			pred = !mis;
+			if (need_flags) {
+				mis = !!(from & LBR_FROM_FLAG_MISPRED);
+				pred = !mis;
+			}
 			skip = 1;
 		}
 		if (lbr_flags & LBR_TSX) {
-			in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
-			abort = !!(from & LBR_FROM_FLAG_ABORT);
+			if (need_flags) {
+				in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+				abort = !!(from & LBR_FROM_FLAG_ABORT);
+			}
 			skip = 3;
 		}
 		from = (u64)((((s64)from) << skip) >> skip);
@@ -490,7 +511,9 @@ void intel_pmu_lbr_read(void)
 	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
 		intel_pmu_lbr_read_32(cpuc);
 	else
-		intel_pmu_lbr_read_64(cpuc);
+		intel_pmu_lbr_read_64(cpuc,
+				      cpuc->lbr_flags_users > 0,
+				      cpuc->lbr_cycles_users > 0);
 
 	intel_pmu_lbr_filter(cpuc);
 }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1b8bd4a..8dd5765 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -138,6 +138,8 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
 	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
+	PERF_SAMPLE_BRANCH_NO_FLAGS		= 1U << 19,
+	PERF_SAMPLE_BRANCH_NO_CYCLES		= 1U << 20,
 
 	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
 };
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] perf, tools: Disable branch flags/cycles for lbr call graph
  2015-05-28  4:13 Andi Kleen
  2015-05-28  4:13 ` [PATCH 1/5] x86, perf: Allow time stamp for free running PEBSv3 Andi Kleen
  2015-05-28  4:13 ` [PATCH 2/5] x86, perf: Add option to disable reading branch flags/cycles Andi Kleen
@ 2015-05-28  4:13 ` Andi Kleen
  2015-05-28  4:13 ` [PATCH 4/5] x86, perf: Use correct index to save/restore LBR_INFO with callstack Andi Kleen
  2015-05-28  4:13 ` [PATCH 5/5] x86, perf: Limit LBR accesses to TOS in callstack mode Andi Kleen
  4 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2015-05-28  4:13 UTC (permalink / raw)
  To: peterz; +Cc: acme, linux-kernel, jolsa, eranian, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Automatically disable collecting branch flags and cycles with
--call-graph lbr. This allows avoiding a bunch of extra MSR
reads in the PMI on Skylake.

When the kernel doesn't support the new flags they are automatically
cleared in the fallback code.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/util/evsel.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c886b9f..4d8a86c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -557,6 +557,8 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
 					   "Falling back to framepointers.\n");
 			} else {
 				perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+				perf_evsel__set_sample_bit(evsel, BRANCH_NO_CYCLES);
+				perf_evsel__set_sample_bit(evsel, BRANCH_NO_FLAGS);
 				attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
 							PERF_SAMPLE_BRANCH_CALL_STACK;
 			}
@@ -1167,6 +1169,8 @@ fallback_missing_features:
 		evsel->attr.mmap2 = 0;
 	if (perf_missing_features.exclude_guest)
 		evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
+	evsel->attr.sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS|
+				     PERF_SAMPLE_BRANCH_NO_CYCLES);
 retry_sample_id:
 	if (perf_missing_features.sample_id_all)
 		evsel->attr.sample_id_all = 0;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] x86, perf: Use correct index to save/restore LBR_INFO with callstack
  2015-05-28  4:13 Andi Kleen
                   ` (2 preceding siblings ...)
  2015-05-28  4:13 ` [PATCH 3/5] perf, tools: Disable branch flags/cycles for lbr call graph Andi Kleen
@ 2015-05-28  4:13 ` Andi Kleen
  2015-08-04  8:59   ` [tip:perf/core] perf/x86/intel/lbr: Use correct index to save/ restore LBR_INFO with call stack tip-bot for Andi Kleen
  2015-05-28  4:13 ` [PATCH 5/5] x86, perf: Limit LBR accesses to TOS in callstack mode Andi Kleen
  4 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2015-05-28  4:13 UTC (permalink / raw)
  To: peterz; +Cc: acme, linux-kernel, jolsa, eranian, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Use the correct index to save/restore the LBR_INFO_x MSR in
callstack mode. This is more a cleanup, as even with the wrong
index the register was correctly saved/restored, and also
LBR callgraph mode in perf tools do not really need anything in
LBR_INFO. But still better to use the right index.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 5de2048..fa5c731 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -243,7 +243,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-			wrmsrl(MSR_LBR_INFO_0 + i, task_ctx->lbr_info[i]);
+			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
 	task_ctx->lbr_stack_state = LBR_NONE;
 }
@@ -266,7 +266,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-			rdmsrl(MSR_LBR_INFO_0 + i, task_ctx->lbr_info[i]);
+			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
 	task_ctx->lbr_stack_state = LBR_VALID;
 }
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] x86, perf: Limit LBR accesses to TOS in callstack mode
  2015-05-28  4:13 Andi Kleen
                   ` (3 preceding siblings ...)
  2015-05-28  4:13 ` [PATCH 4/5] x86, perf: Use correct index to save/restore LBR_INFO with callstack Andi Kleen
@ 2015-05-28  4:13 ` Andi Kleen
  2015-08-04  8:59   ` [tip:perf/core] perf/x86/intel/lbr: " tip-bot for Andi Kleen
  4 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2015-05-28  4:13 UTC (permalink / raw)
  To: peterz; +Cc: acme, linux-kernel, jolsa, eranian, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

In callstack mode the LBR is not a ring buffer, but a stack that grows up
and down. This means in  this case we don't need to access all LBRs, only the
ones up to TOS. Do this optimization for the normal LBR read, and the context
switch save/restore code. For save/restore it can be done unconditionally, as
it only runs when call stack mode is active.

This recovers some of the cost of going to 32 LBRs on Skylake.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index fa5c731..8e1740e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -238,7 +238,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 
 	mask = x86_pmu.lbr_nr - 1;
 	tos = intel_pmu_lbr_tos();
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
 		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
@@ -261,7 +261,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 	mask = x86_pmu.lbr_nr - 1;
 	tos = intel_pmu_lbr_tos();
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
@@ -436,8 +436,12 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc,
 	u64 tos = intel_pmu_lbr_tos();
 	int i;
 	int out = 0;
+	int num = x86_pmu.lbr_nr;
 
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+		num = tos;
+
+	for (i = 0; i < num; i++) {
 		unsigned long lbr_idx = (tos - i) & mask;
 		u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
 		int skip = 0;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] x86, perf: Add option to disable reading branch flags/cycles
  2015-05-28  4:13 ` [PATCH 2/5] x86, perf: Add option to disable reading branch flags/cycles Andi Kleen
@ 2015-06-15 10:48   ` Peter Zijlstra
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2015-06-15 10:48 UTC (permalink / raw)
  To: Andi Kleen; +Cc: acme, linux-kernel, jolsa, eranian, Andi Kleen

On Wed, May 27, 2015 at 09:13:15PM -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> With LBRv5 reading the extra LBR flags like mispredict, TSX, cycles
> is not free anymore, as it has moved to a separate MSR.
> 
> For callstack mode we don't need any of this information; so we can
> avoid the unnecessary MSR read. Add flags to the perf interface
> where perf record can request not collecting this information.
> I added sample_type flags for CYCLES and FLAGS. It's a bit unusual for
> sample_types to be negative (disable), not positive (enable), but
> since the legacy ABI reported the flags we need some form of explicit
> disabling to avoid breaking the ABI. In theory it would be possible
> to make CYCLES opt-in (as it's not deployed yet), but I also made it
> opt-out to be symmetric to FLAGS.
> 
> After we have the flags the x86 perf code can keep track if any
> users need the flags. If noone needs it the information is not
> collected.

> diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> index 1fd8b5a..5de2048 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> @@ -340,6 +340,10 @@ void intel_pmu_lbr_enable(struct perf_event *event)
>  	}
>  
>  	cpuc->lbr_users++;
> +	if (!(event->attr.sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS))
> +		cpuc->lbr_flags_users++;
> +	if (!(event->attr.sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES))
> +		cpuc->lbr_cycles_users++;
>  	perf_sched_cb_inc(event->ctx->pmu);
>  }

This patch seems to add an unfortunate amount of branches. And while I
appreciate it'll all be cheaper than the SKL MSR read, it does add
overhead to the older chips.

Also, I think its broken. We can now have two events, with the 'same'
LBR config but with different flags/cycles settings.
__intel_shared_reg_get_constraints() will find a match, but the enable
code above will disable flags/cycles for both of them.

> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 1b8bd4a..8dd5765 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -138,6 +138,8 @@ enum perf_event_sample_format {
>  	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
>  	PERF_SAMPLE_TRANSACTION			= 1U << 17,
>  	PERF_SAMPLE_REGS_INTR			= 1U << 18,
> +	PERF_SAMPLE_BRANCH_NO_FLAGS		= 1U << 19,
> +	PERF_SAMPLE_BRANCH_NO_CYCLES		= 1U << 20,
>  
>  	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
>  };

Should this not be part of perf_branch_sample_type instead? That seems
to otherwise specify all the branch stack details.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tip:perf/core] perf/x86/intel/lbr: Allow time stamp for free running PEBSv3
  2015-05-28  4:13 ` [PATCH 1/5] x86, perf: Allow time stamp for free running PEBSv3 Andi Kleen
@ 2015-08-04  8:56   ` tip-bot for Andi Kleen
  0 siblings, 0 replies; 11+ messages in thread
From: tip-bot for Andi Kleen @ 2015-08-04  8:56 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: tglx, linux-kernel, peterz, mingo, ak, hpa, torvalds

Commit-ID:  a7b58d211ba18c9175b139e18b68c86a6bcc3c3f
Gitweb:     http://git.kernel.org/tip/a7b58d211ba18c9175b139e18b68c86a6bcc3c3f
Author:     Andi Kleen <ak@linux.intel.com>
AuthorDate: Wed, 27 May 2015 21:13:14 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 4 Aug 2015 10:16:56 +0200

perf/x86/intel/lbr: Allow time stamp for free running PEBSv3

With PEBSv3 the PEBS record contains a time stamp. That means we can allow
free-running PEBS without a PMI even if the user program requested a time stamp.
This avoids the need to use -T to get free running PEBS, and also avoids
any problems with mis-identifying MMAPs later.

Move the free_running_flags state into a variable in x86_pmu and use it.
This only works when no explicit clock_id is set.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: eranian@google.com
Cc: jolsa@redhat.com
Cc: kan.liang@intel.com
Link: http://lkml.kernel.org/r/1432786398-23861-2-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h          |  1 +
 arch/x86/kernel/cpu/perf_event_intel.c    | 15 ++++++++++++++-
 arch/x86/kernel/cpu/perf_event_intel_ds.c |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3474cf2..7378b10 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -594,6 +594,7 @@ struct x86_pmu {
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
 	int 		max_pebs_events;
+	unsigned long	free_running_flags;
 
 	/*
 	 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 71815cf..cb112bf 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2257,6 +2257,15 @@ static void intel_pebs_aliases_snb(struct perf_event *event)
 	}
 }
 
+static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+{
+	unsigned long flags = x86_pmu.free_running_flags;
+
+	if (event->attr.use_clockid)
+		flags &= ~PERF_SAMPLE_TIME;
+	return flags;
+}
+
 static int intel_pmu_hw_config(struct perf_event *event)
 {
 	int ret = x86_pmu_hw_config(event);
@@ -2267,7 +2276,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	if (event->attr.precise_ip) {
 		if (!event->attr.freq) {
 			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
-			if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
+			if (!(event->attr.sample_type &
+			      ~intel_pmu_free_running_flags(event)))
 				event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
 		}
 		if (x86_pmu.pebs_aliases)
@@ -2689,6 +2699,8 @@ static __initconst const struct x86_pmu core_pmu = {
 	.event_map		= intel_pmu_event_map,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
+	.free_running_flags	= PEBS_FREERUNNING_FLAGS,
+
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32-bit width,
 	 * so we install an artificial 1<<31 period regardless of
@@ -2727,6 +2739,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.event_map		= intel_pmu_event_map,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
+	.free_running_flags	= PEBS_FREERUNNING_FLAGS,
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
 	 * so we install an artificial 1<<31 period regardless of
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 410270a..03773c2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1273,6 +1273,7 @@ void __init intel_ds_init(void)
 			x86_pmu.pebs_record_size =
 						sizeof(struct pebs_record_skl);
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+			x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
 			break;
 
 		default:

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [tip:perf/core] perf/x86/intel/lbr: Use correct index to save/ restore LBR_INFO with call stack
  2015-05-28  4:13 ` [PATCH 4/5] x86, perf: Use correct index to save/restore LBR_INFO with callstack Andi Kleen
@ 2015-08-04  8:59   ` tip-bot for Andi Kleen
  0 siblings, 0 replies; 11+ messages in thread
From: tip-bot for Andi Kleen @ 2015-08-04  8:59 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: tglx, ak, mingo, hpa, peterz, torvalds, linux-kernel

Commit-ID:  e0573364b8c5b17401569ef581f1625803210f4d
Gitweb:     http://git.kernel.org/tip/e0573364b8c5b17401569ef581f1625803210f4d
Author:     Andi Kleen <ak@linux.intel.com>
AuthorDate: Wed, 27 May 2015 21:13:17 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 4 Aug 2015 10:16:59 +0200

perf/x86/intel/lbr: Use correct index to save/restore LBR_INFO with call stack

Use the correct index to save/restore the LBR_INFO_x MSR in
callstack mode. This is more a cleanup, as even with the wrong
index the register was correctly saved/restored, and also
LBR callgraph mode in perf tools do not really need anything in
LBR_INFO. But still better to use the right index.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: eranian@google.com
Cc: jolsa@redhat.com
Link: http://lkml.kernel.org/r/1432786398-23861-5-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index b432c47..a5bc424 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -245,7 +245,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-			wrmsrl(MSR_LBR_INFO_0 + i, task_ctx->lbr_info[i]);
+			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
 	task_ctx->lbr_stack_state = LBR_NONE;
 }
@@ -268,7 +268,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-			rdmsrl(MSR_LBR_INFO_0 + i, task_ctx->lbr_info[i]);
+			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
 	task_ctx->lbr_stack_state = LBR_VALID;
 }

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [tip:perf/core] perf/x86/intel/lbr: Limit LBR accesses to TOS in callstack mode
  2015-05-28  4:13 ` [PATCH 5/5] x86, perf: Limit LBR accesses to TOS in callstack mode Andi Kleen
@ 2015-08-04  8:59   ` tip-bot for Andi Kleen
  0 siblings, 0 replies; 11+ messages in thread
From: tip-bot for Andi Kleen @ 2015-08-04  8:59 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, tglx, peterz, torvalds, mingo, ak, hpa

Commit-ID:  90405aa02247c1a6313c33e2253f9fd2299ae60b
Gitweb:     http://git.kernel.org/tip/90405aa02247c1a6313c33e2253f9fd2299ae60b
Author:     Andi Kleen <ak@linux.intel.com>
AuthorDate: Wed, 27 May 2015 21:13:18 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 4 Aug 2015 10:16:59 +0200

perf/x86/intel/lbr: Limit LBR accesses to TOS in callstack mode

In callstack mode the LBR is not a ring buffer, but a stack that grows up
and down. This means in  this case we don't need to access all LBRs, only the
ones up to TOS. Do this optimization for the normal LBR read, and the context
switch save/restore code. For save/restore it can be done unconditionally, as
it only runs when call stack mode is active.

This recovers some of the cost of going to 32 LBRs on Skylake.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: eranian@google.com
Cc: jolsa@redhat.com
Link: http://lkml.kernel.org/r/1432786398-23861-6-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index a5bc424..b2c9475 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -240,7 +240,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 
 	mask = x86_pmu.lbr_nr - 1;
 	tos = intel_pmu_lbr_tos();
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
 		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
@@ -263,7 +263,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 	mask = x86_pmu.lbr_nr - 1;
 	tos = intel_pmu_lbr_tos();
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
 		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
@@ -425,8 +425,12 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 	u64 tos = intel_pmu_lbr_tos();
 	int i;
 	int out = 0;
+	int num = x86_pmu.lbr_nr;
 
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+		num = tos;
+
+	for (i = 0; i < num; i++) {
 		unsigned long lbr_idx = (tos - i) & mask;
 		u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
 		int skip = 0;

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] x86, perf: Add option to disable reading branch flags/cycles
  2015-10-20 18:46 [PATCH 1/5] x86, perf: Fix LBR call stack save/restore Andi Kleen
@ 2015-10-20 18:46 ` Andi Kleen
  0 siblings, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2015-10-20 18:46 UTC (permalink / raw)
  To: peterz; +Cc: acme, jolsa, linux-kernel, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

With LBRv5 reading the extra LBR flags like mispredict, TSX, cycles
is not free anymore, as it has moved to a separate MSR.

For callstack mode we don't need any of this information; so we can
avoid the unnecessary MSR read. Add flags to the perf interface
where perf record can request not collecting this information.

I added branch_sample_type flags for CYCLES and FLAGS. It's a bit unusual for
branch_sample_types to be negative (disable), not positive (enable), but
since the legacy ABI reported the flags we need some form of explicit
disabling to avoid breaking the ABI.

After we have the flags the x86 perf code can keep track if any
users need the flags. If noone needs it the information is not
collected.

This cuts down the cost of LBR callstack on Skylake significantly.
Profiling a kernel build with LBR call stack the average run time of
the PMI handler drops by 43%

v2: Rework based on Peter's feedback.
Moved the NO_FLAGS/NO_CYCLES bits to branch_sample_type.
Remove extra checks for no info, so that the flags are filled in if
there is no lbr_info or if only one is set to reduce number of branches.
Instead of counters the information about skipping LBR_INFO is now
kept as a extra register bit that is filtered out before writing
the register to hardware.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 19 +++++++++++++++++--
 include/uapi/linux/perf_event.h            |  6 ++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 0e4ea00..60e71b7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -42,6 +42,13 @@ static enum {
 #define LBR_FAR_BIT		8 /* do not capture far branches */
 #define LBR_CALL_STACK_BIT	9 /* enable call stack */
 
+/*
+ * Following bit only exists in Linux; we mask it out before writing it to
+ * the actual MSR. But it helps the constraint perf code to understand
+ * that this is a separate configuration.
+ */
+#define LBR_NO_INFO_BIT	       63 /* don't read LBR_INFO. */
+
 #define LBR_KERNEL	(1 << LBR_KERNEL_BIT)
 #define LBR_USER	(1 << LBR_USER_BIT)
 #define LBR_JCC		(1 << LBR_JCC_BIT)
@@ -52,6 +59,7 @@ static enum {
 #define LBR_IND_JMP	(1 << LBR_IND_JMP_BIT)
 #define LBR_FAR		(1 << LBR_FAR_BIT)
 #define LBR_CALL_STACK	(1 << LBR_CALL_STACK_BIT)
+#define LBR_NO_INFO	(1ULL << LBR_NO_INFO_BIT)
 
 #define LBR_PLM (LBR_KERNEL | LBR_USER)
 
@@ -152,7 +160,7 @@ static void __intel_pmu_lbr_enable(bool pmi)
 	 * did not change.
 	 */
 	if (cpuc->lbr_sel)
-		lbr_select = cpuc->lbr_sel->config;
+		lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
 	if (!pmi)
 		wrmsrl(MSR_LBR_SELECT, lbr_select);
 
@@ -422,6 +430,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  */
 static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 {
+	bool need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
 	unsigned long mask = x86_pmu.lbr_nr - 1;
 	int lbr_format = x86_pmu.intel_cap.lbr_format;
 	u64 tos = intel_pmu_lbr_tos();
@@ -442,7 +451,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
 		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
 
-		if (lbr_format == LBR_FORMAT_INFO) {
+		if (lbr_format == LBR_FORMAT_INFO && need_info) {
 			u64 info;
 
 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
@@ -588,6 +597,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 		if (v != LBR_IGN)
 			mask |= v;
 	}
+
 	reg = &event->hw.branch_reg;
 	reg->idx = EXTRA_REG_LBR;
 
@@ -598,6 +608,11 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 	 */
 	reg->config = mask ^ x86_pmu.lbr_sel_mask;
 
+	if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
+	    (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
+	    (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+		reg->config |= LBR_NO_INFO;
+
 	return 0;
 }
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2881145..a8ffc76 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -169,6 +169,9 @@ enum perf_branch_sample_type_shift {
 	PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT	= 11, /* call/ret stack */
 	PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT	= 12, /* indirect jumps */
 
+	PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT	= 13, /* no flags */
+	PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT	= 14, /* no cycles */
+
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
 
@@ -189,6 +192,9 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_CALL_STACK	= 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
 	PERF_SAMPLE_BRANCH_IND_JUMP	= 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
 
+	PERF_SAMPLE_BRANCH_NO_FLAGS	= 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+	PERF_SAMPLE_BRANCH_NO_CYCLES	= 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-10-20 18:46 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-28  4:13 Andi Kleen
2015-05-28  4:13 ` [PATCH 1/5] x86, perf: Allow time stamp for free running PEBSv3 Andi Kleen
2015-08-04  8:56   ` [tip:perf/core] perf/x86/intel/lbr: " tip-bot for Andi Kleen
2015-05-28  4:13 ` [PATCH 2/5] x86, perf: Add option to disable reading branch flags/cycles Andi Kleen
2015-06-15 10:48   ` Peter Zijlstra
2015-05-28  4:13 ` [PATCH 3/5] perf, tools: Disable branch flags/cycles for lbr call graph Andi Kleen
2015-05-28  4:13 ` [PATCH 4/5] x86, perf: Use correct index to save/restore LBR_INFO with callstack Andi Kleen
2015-08-04  8:59   ` [tip:perf/core] perf/x86/intel/lbr: Use correct index to save/ restore LBR_INFO with call stack tip-bot for Andi Kleen
2015-05-28  4:13 ` [PATCH 5/5] x86, perf: Limit LBR accesses to TOS in callstack mode Andi Kleen
2015-08-04  8:59   ` [tip:perf/core] perf/x86/intel/lbr: " tip-bot for Andi Kleen
2015-10-20 18:46 [PATCH 1/5] x86, perf: Fix LBR call stack save/restore Andi Kleen
2015-10-20 18:46 ` [PATCH 2/5] x86, perf: Add option to disable reading branch flags/cycles Andi Kleen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.