linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
@ 2012-06-06  0:56 Andi Kleen
  2012-06-06  0:56 ` [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events Andi Kleen
                   ` (4 more replies)
  0 siblings, 5 replies; 34+ messages in thread
From: Andi Kleen @ 2012-06-06  0:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: eranian, a.p.zijlstra, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

cycles:p uses an special cycles encoding by default. However that is not
architectural, so it can only be used when the CPU is known
(it already caused problems on Sandy Bridge). It may or may not work
on future CPUs.

So make it opt-in only. Right now I enabled it on Core2, Nehalem, Westmere
and not on Sandy-Bridge or Atom.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.h       |    1 +
 arch/x86/kernel/cpu/perf_event_intel.c |    6 +++++-
 2 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf..cdddcef 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -355,6 +355,7 @@ struct x86_pmu {
 	 */
 	u64			intel_ctrl;
 	union perf_capabilities intel_cap;
+	bool			pebs_cycles;
 
 	/*
 	 * Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..2e40391 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1308,7 +1308,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		return ret;
 
 	if (event->attr.precise_ip &&
-	    (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+	    (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c &&
+            x86_pmu.pebs_cycles) {
 		/*
 		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
 		 * (0x003c) so that we can use it with PEBS.
@@ -1772,6 +1773,7 @@ __init int intel_pmu_init(void)
 
 		x86_pmu.event_constraints = intel_core2_event_constraints;
 		x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
+		x86_pmu.pebs_cycles = true;
 		pr_cont("Core2 events, ");
 		break;
 
@@ -1799,6 +1801,7 @@ __init int intel_pmu_init(void)
 
 		x86_add_quirk(intel_nehalem_quirk);
 
+		x86_pmu.pebs_cycles = true;
 		pr_cont("Nehalem events, ");
 		break;
 
@@ -1836,6 +1839,7 @@ __init int intel_pmu_init(void)
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
 			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
+		x86_pmu.pebs_cycles = true;
 		pr_cont("Westmere events, ");
 		break;
 
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events
  2012-06-06  0:56 [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Andi Kleen
@ 2012-06-06  0:56 ` Andi Kleen
  2012-06-06 15:00   ` Peter Zijlstra
  2012-06-06 16:17   ` [tip:perf/core] perf/x86: Don' t " tip-bot for Andi Kleen
  2012-06-06  0:56 ` [PATCH 3/5] perf, x86: Check LBR format capability Andi Kleen
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 34+ messages in thread
From: Andi Kleen @ 2012-06-06  0:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: eranian, a.p.zijlstra, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

On Sandy Bridge in non HT mode there are 8 counters available. Since every
counter can write a PEBS record assuming there are 4 max is incorrect. Use
the reported counter number -- with an upper limit for a static array -- instead.

Also I made the warning messages a bit more informational.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.h          |    3 ++-
 arch/x86/kernel/cpu/perf_event_intel.c    |    2 ++
 arch/x86/kernel/cpu/perf_event_intel_ds.c |    8 ++++----
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index cdddcef..43cfed2 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -57,7 +57,7 @@ struct amd_nb {
 };
 
 /* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS		4
+#define MAX_PEBS_EVENTS		8
 
 /*
  * A debug store configuration.
@@ -365,6 +365,7 @@ struct x86_pmu {
 	int		pebs_record_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
+	int 		max_pebs_events;
 
 	/*
 	 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2e40391..71b8de5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1732,6 +1732,8 @@ __init int intel_pmu_init(void)
 	x86_pmu.events_maskl		= ebx.full;
 	x86_pmu.events_mask_len		= eax.split.mask_length;
 
+	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5a3edc2..0042942 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -627,7 +627,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	 * Should not happen, we program the threshold at 1 and do not
 	 * set a reset value.
 	 */
-	WARN_ON_ONCE(n > 1);
+	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
 	at += n - 1;
 
 	__intel_pmu_pebs_event(event, iregs, at);
@@ -658,10 +658,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	 * Should not happen, we program the threshold at 1 and do not
 	 * set a reset value.
 	 */
-	WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
+	WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
 
 	for ( ; at < top; at++) {
-		for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
+		for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
 			event = cpuc->events[bit];
 			if (!test_bit(bit, cpuc->active_mask))
 				continue;
@@ -677,7 +677,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			break;
 		}
 
-		if (!event || bit >= MAX_PEBS_EVENTS)
+		if (!event || bit >= x86_pmu.max_pebs_events)
 			continue;
 
 		__intel_pmu_pebs_event(event, iregs, at);
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 3/5] perf, x86: Check LBR format capability
  2012-06-06  0:56 [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Andi Kleen
  2012-06-06  0:56 ` [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events Andi Kleen
@ 2012-06-06  0:56 ` Andi Kleen
  2012-06-06  4:29   ` Andi Kleen
  2012-06-06 10:40   ` Peter Zijlstra
  2012-06-06  0:56 ` [PATCH 4/5] x86: Add rdpmcl() Andi Kleen
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 34+ messages in thread
From: Andi Kleen @ 2012-06-06  0:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: eranian, a.p.zijlstra, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Double check the CPU has a LBR format we support before using it.

Also I made the init functions __init while I was on it.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |   28 ++++++++++++++++++++++++----
 1 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 520b426..4176b09 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,7 @@ enum {
 	LBR_FORMAT_LIP		= 0x01,
 	LBR_FORMAT_EIP		= 0x02,
 	LBR_FORMAT_EIP_FLAGS	= 0x03,
+	LBR_FORMAT_MAX		= LBR_FORMAT_EIP_FLAGS
 };
 
 /*
@@ -622,9 +623,20 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
 	[PERF_SAMPLE_BRANCH_IND_CALL]	= LBR_IND_CALL,
 };
 
+static bool lbr_common_init(void)
+{
+	if (x86_pmu.intel_cap.lbr_format > LBR_FORMAT_MAX) {
+		pr_cont("LBR has unknown format, ");
+		return false;
+	}
+}
+
 /* core */
-void intel_pmu_lbr_init_core(void)
+__init void intel_pmu_lbr_init_core(void)
 {
+	if (!lbr_common_init())
+		return;
+
 	x86_pmu.lbr_nr     = 4;
 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
 	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
@@ -638,8 +650,11 @@ void intel_pmu_lbr_init_core(void)
 }
 
 /* nehalem/westmere */
-void intel_pmu_lbr_init_nhm(void)
+__init void intel_pmu_lbr_init_nhm(void)
 {
+	if (!lbr_common_init())
+		return;
+
 	x86_pmu.lbr_nr     = 16;
 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
 	x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
@@ -659,8 +674,11 @@ void intel_pmu_lbr_init_nhm(void)
 }
 
 /* sandy bridge */
-void intel_pmu_lbr_init_snb(void)
+__init void intel_pmu_lbr_init_snb(void)
 {
+	if (!lbr_common_init())
+		return;
+
 	x86_pmu.lbr_nr	 = 16;
 	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
 	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
@@ -679,7 +697,7 @@ void intel_pmu_lbr_init_snb(void)
 }
 
 /* atom */
-void intel_pmu_lbr_init_atom(void)
+__init void intel_pmu_lbr_init_atom(void)
 {
 	/*
 	 * only models starting at stepping 10 seems
@@ -690,6 +708,8 @@ void intel_pmu_lbr_init_atom(void)
 		pr_cont("LBR disabled due to erratum");
 		return;
 	}
+	if (!lbr_common_init())
+		return;
 
 	x86_pmu.lbr_nr	   = 8;
 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 4/5] x86: Add rdpmcl()
  2012-06-06  0:56 [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Andi Kleen
  2012-06-06  0:56 ` [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events Andi Kleen
  2012-06-06  0:56 ` [PATCH 3/5] perf, x86: Check LBR format capability Andi Kleen
@ 2012-06-06  0:56 ` Andi Kleen
  2012-06-06 16:16   ` [tip:perf/core] " tip-bot for Andi Kleen
  2012-06-06  0:56 ` [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters Andi Kleen
  2012-06-06 10:39 ` [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Peter Zijlstra
  4 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06  0:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: eranian, a.p.zijlstra, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Add a version of rdpmc() that directly reads into a u64

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/include/asm/msr.h      |    2 ++
 arch/x86/include/asm/paravirt.h |    2 ++
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 084ef95..e489c14 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -237,6 +237,8 @@ do {							\
 	(high) = (u32)(_l >> 32);			\
 } while (0)
 
+#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
+
 #define rdtscp(low, high, aux)					\
 do {                                                            \
 	unsigned long long _val = native_read_tscp(&(aux));     \
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6cbbabf..14ce05d 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -252,6 +252,8 @@ do {						\
 	high = _l >> 32;			\
 } while (0)
 
+#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
+
 static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
 {
 	return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters
  2012-06-06  0:56 [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Andi Kleen
                   ` (2 preceding siblings ...)
  2012-06-06  0:56 ` [PATCH 4/5] x86: Add rdpmcl() Andi Kleen
@ 2012-06-06  0:56 ` Andi Kleen
  2012-06-06 10:46   ` Peter Zijlstra
  2012-06-06 10:39 ` [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Peter Zijlstra
  4 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06  0:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: eranian, a.p.zijlstra, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

RDPMC is much faster than RDMSR for reading performance counters,
since it's not serializing.  Use it if possible in the perf handler.

Only tested on Sandy Bridge, so I only enabled it there so far.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.c       |   20 +++++++++++++++++---
 arch/x86/kernel/cpu/perf_event.h       |    2 ++
 arch/x86/kernel/cpu/perf_event_intel.c |    1 +
 include/linux/perf_event.h             |    1 +
 4 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6d..7d12888 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -86,7 +86,14 @@ u64 x86_perf_event_update(struct perf_event *event)
 	 */
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
-	rdmsrl(hwc->event_base, new_raw_count);
+	
+	/* 
+	 * Prefer RDPMC when available since it's faster.
+	 */
+	if (hwc->read_event_base)
+		rdpmcl(hwc->read_event_base, new_raw_count);
+	else
+		rdmsrl(hwc->event_base, new_raw_count);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 					new_raw_count) != prev_raw_count)
@@ -819,20 +826,27 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 				struct cpu_hw_events *cpuc, int i)
 {
 	struct hw_perf_event *hwc = &event->hw;
+	int index;
 
 	hwc->idx = cpuc->assign[i];
 	hwc->last_cpu = smp_processor_id();
 	hwc->last_tag = ++cpuc->tags[i];
 
+	hwc->event_base	= 0;
+	hwc->read_event_base = 0;
 	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
 		hwc->config_base = 0;
-		hwc->event_base	= 0;
 	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
 		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
+		index = hwc->idx - X86_PMC_IDX_FIXED;
+		if (x86_pmu.prefer_rdpmc)
+			hwc->read_event_base = 0x40000000 + index;
+	       	hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + index;
 	} else {
 		hwc->config_base = x86_pmu_config_addr(hwc->idx);
 		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
+		if (x86_pmu.prefer_rdpmc)
+			hwc->read_event_base = hwc->idx;
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 43cfed2..75fe10a 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -385,6 +385,8 @@ struct x86_pmu {
 	 * Intel host/guest support (KVM)
 	 */
 	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
+
+	bool		prefer_rdpmc;
 };
 
 #define x86_add_quirk(func_)						\
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 71b8de5..8a8eda2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1868,6 +1868,7 @@ __init int intel_pmu_init(void)
 			X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
 
 		pr_cont("SandyBridge events, ");
+		x86_pmu.prefer_rdpmc = true;
 		break;
 
 	default:
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f325786..0627736 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -677,6 +677,7 @@ struct hw_perf_event {
 			u64		last_tag;
 			unsigned long	config_base;
 			unsigned long	event_base;
+			unsigned long	read_event_base;
 			int		idx;
 			int		last_cpu;
 
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH 3/5] perf, x86: Check LBR format capability
  2012-06-06  0:56 ` [PATCH 3/5] perf, x86: Check LBR format capability Andi Kleen
@ 2012-06-06  4:29   ` Andi Kleen
  2012-06-06 10:40   ` Peter Zijlstra
  1 sibling, 0 replies; 34+ messages in thread
From: Andi Kleen @ 2012-06-06  4:29 UTC (permalink / raw)
  To: linux-kernel; +Cc: eranian, a.p.zijlstra

Andi Kleen <andi@firstfloor.org> writes:

> From: Andi Kleen <ak@linux.intel.com>
>
> Double check the CPU has a LBR format we support before using it.

Sorry this was an outdated version with a missing return. Here's
the correct one.

---

From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 29 May 2012 20:00:05 -0700
Subject: [PATCH] perf, x86: Check LBR format capability

Double check the CPU has a LBR format we support before using it.

Also I made the init functions __init while I was on it.

Signed-off-by: Andi Kleen <ak@linux.intel.com>

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 520b426..753eed9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,7 @@ enum {
 	LBR_FORMAT_LIP		= 0x01,
 	LBR_FORMAT_EIP		= 0x02,
 	LBR_FORMAT_EIP_FLAGS	= 0x03,
+	LBR_FORMAT_MAX		= LBR_FORMAT_EIP_FLAGS
 };
 
 /*
@@ -622,9 +623,21 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
 	[PERF_SAMPLE_BRANCH_IND_CALL]	= LBR_IND_CALL,
 };
 
+static bool lbr_common_init(void)
+{
+	if (x86_pmu.intel_cap.lbr_format > LBR_FORMAT_MAX) {
+		pr_cont("LBR has unknown format, ");
+		return false;
+	}
+	return true;
+}
+
 /* core */
-void intel_pmu_lbr_init_core(void)
+__init void intel_pmu_lbr_init_core(void)
 {
+	if (!lbr_common_init())
+		return;
+
 	x86_pmu.lbr_nr     = 4;
 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
 	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
@@ -638,8 +651,11 @@ void intel_pmu_lbr_init_core(void)
 }
 
 /* nehalem/westmere */
-void intel_pmu_lbr_init_nhm(void)
+__init void intel_pmu_lbr_init_nhm(void)
 {
+	if (!lbr_common_init())
+		return;
+
 	x86_pmu.lbr_nr     = 16;
 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
 	x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
@@ -659,8 +675,11 @@ void intel_pmu_lbr_init_nhm(void)
 }
 
 /* sandy bridge */
-void intel_pmu_lbr_init_snb(void)
+__init void intel_pmu_lbr_init_snb(void)
 {
+	if (!lbr_common_init())
+		return;
+
 	x86_pmu.lbr_nr	 = 16;
 	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
 	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
@@ -679,7 +698,7 @@ void intel_pmu_lbr_init_snb(void)
 }
 
 /* atom */
-void intel_pmu_lbr_init_atom(void)
+__init void intel_pmu_lbr_init_atom(void)
 {
 	/*
 	 * only models starting at stepping 10 seems
@@ -690,6 +709,8 @@ void intel_pmu_lbr_init_atom(void)
 		pr_cont("LBR disabled due to erratum");
 		return;
 	}
+	if (!lbr_common_init())
+		return;
 
 	x86_pmu.lbr_nr	   = 8;
 	x86_pmu.lbr_tos    = MSR_LBR_TOS;



-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06  0:56 [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Andi Kleen
                   ` (3 preceding siblings ...)
  2012-06-06  0:56 ` [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters Andi Kleen
@ 2012-06-06 10:39 ` Peter Zijlstra
  2012-06-06 14:12   ` Andi Kleen
  4 siblings, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 10:39 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, eranian, Andi Kleen

On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> cycles:p uses an special cycles encoding by default. However that is not
> architectural, so it can only be used when the CPU is known
> (it already caused problems on Sandy Bridge). It may or may not work
> on future CPUs.
> 
> So make it opt-in only. Right now I enabled it on Core2, Nehalem, Westmere
> and not on Sandy-Bridge or Atom.

No. Also, c0 is a PEBS capable event on Atom just fine.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 3/5] perf, x86: Check LBR format capability
  2012-06-06  0:56 ` [PATCH 3/5] perf, x86: Check LBR format capability Andi Kleen
  2012-06-06  4:29   ` Andi Kleen
@ 2012-06-06 10:40   ` Peter Zijlstra
  2012-06-06 14:14     ` Andi Kleen
  1 sibling, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 10:40 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, eranian, Andi Kleen

On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> Double check the CPU has a LBR format we support before using it.
> 
> Also I made the init functions __init while I was on it. 

Why? Its all after a model test anyway.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters
  2012-06-06  0:56 ` [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters Andi Kleen
@ 2012-06-06 10:46   ` Peter Zijlstra
  2012-06-06 14:16     ` Andi Kleen
  0 siblings, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 10:46 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, eranian, Andi Kleen

On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> RDPMC is much faster than RDMSR for reading performance counters,
> since it's not serializing.  Use it if possible in the perf handler.
> 
> Only tested on Sandy Bridge, so I only enabled it there so far.

That's just stupid.. I took Vince's patch from a while back.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 10:39 ` [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Peter Zijlstra
@ 2012-06-06 14:12   ` Andi Kleen
  2012-06-06 14:14     ` Peter Zijlstra
  0 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 14:12 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 12:39:48PM +0200, Peter Zijlstra wrote:
> On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > From: Andi Kleen <ak@linux.intel.com>
> > 
> > cycles:p uses an special cycles encoding by default. However that is not
> > architectural, so it can only be used when the CPU is known
> > (it already caused problems on Sandy Bridge). It may or may not work
> > on future CPUs.
> > 
> > So make it opt-in only. Right now I enabled it on Core2, Nehalem, Westmere
> > and not on Sandy-Bridge or Atom.
> 
> No. 

What do you mean? Are you claiming it's architectural?

> Also, c0 is a PEBS capable event on Atom just fine.

Ok can enable it there.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 3/5] perf, x86: Check LBR format capability
  2012-06-06 10:40   ` Peter Zijlstra
@ 2012-06-06 14:14     ` Andi Kleen
  2012-06-06 14:22       ` Peter Zijlstra
  0 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 14:14 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 12:40:24PM +0200, Peter Zijlstra wrote:
> On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > From: Andi Kleen <ak@linux.intel.com>
> > 
> > Double check the CPU has a LBR format we support before using it.
> > 
> > Also I made the init functions __init while I was on it. 
> 
> Why? Its all after a model test anyway.

Because the spec says we should check it.

It could catch bad VMs and other inconsistencies and it's the right thing
to do.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 14:12   ` Andi Kleen
@ 2012-06-06 14:14     ` Peter Zijlstra
  2012-06-06 14:23       ` Andi Kleen
  0 siblings, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 14:14 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, 2012-06-06 at 07:12 -0700, Andi Kleen wrote:
> On Wed, Jun 06, 2012 at 12:39:48PM +0200, Peter Zijlstra wrote:
> > On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > > From: Andi Kleen <ak@linux.intel.com>
> > > 
> > > cycles:p uses an special cycles encoding by default. However that is not
> > > architectural, so it can only be used when the CPU is known
> > > (it already caused problems on Sandy Bridge). It may or may not work
> > > on future CPUs.
> > > 
> > > So make it opt-in only. Right now I enabled it on Core2, Nehalem, Westmere
> > > and not on Sandy-Bridge or Atom.
> > 
> > No. 
> 
> What do you mean? Are you claiming it's architectural?

I mean this patch is shite.

You don't disable it because it doesn't work some place, you fix it.

But don't bother, its already done.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters
  2012-06-06 10:46   ` Peter Zijlstra
@ 2012-06-06 14:16     ` Andi Kleen
  2012-06-06 14:21       ` Peter Zijlstra
  0 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 14:16 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 12:46:19PM +0200, Peter Zijlstra wrote:
> On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > From: Andi Kleen <ak@linux.intel.com>
> > 
> > RDPMC is much faster than RDMSR for reading performance counters,
> > since it's not serializing.  Use it if possible in the perf handler.
> > 
> > Only tested on Sandy Bridge, so I only enabled it there so far.
> 
> That's just stupid.. I took Vince's patch from a while back.

What do you mean? It's significantly faster to read the counters this
way, because it avoids serialization and other overhead. 

Vince's patch only enabled it for user space I believe, This is for lowering
the kernel PMI handler overhead.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters
  2012-06-06 14:16     ` Andi Kleen
@ 2012-06-06 14:21       ` Peter Zijlstra
  2012-06-06 14:33         ` Stephane Eranian
  2012-06-06 14:41         ` Andi Kleen
  0 siblings, 2 replies; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 14:21 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, 2012-06-06 at 07:16 -0700, Andi Kleen wrote:
> On Wed, Jun 06, 2012 at 12:46:19PM +0200, Peter Zijlstra wrote:
> > On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > > From: Andi Kleen <ak@linux.intel.com>
> > > 
> > > RDPMC is much faster than RDMSR for reading performance counters,
> > > since it's not serializing.  Use it if possible in the perf handler.
> > > 
> > > Only tested on Sandy Bridge, so I only enabled it there so far.
> > 
> > That's just stupid.. I took Vince's patch from a while back.
> 
> What do you mean? It's significantly faster to read the counters this
> way, because it avoids serialization and other overhead. 

What I'm saying is you only enabling it for snb and being too lazy to
test anything else. Nor do I think its worth the conditional, all chips
we have PMU support for have rdpmc instructions.

> Vince's patch only enabled it for user space I believe, This is for lowering
> the kernel PMI handler overhead.

No, his patch did the kernel thing. Furthermore he actually tested it on
a bunch of machines.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 3/5] perf, x86: Check LBR format capability
  2012-06-06 14:14     ` Andi Kleen
@ 2012-06-06 14:22       ` Peter Zijlstra
  2012-06-06 14:37         ` Andi Kleen
  0 siblings, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 14:22 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, 2012-06-06 at 07:14 -0700, Andi Kleen wrote:
> On Wed, Jun 06, 2012 at 12:40:24PM +0200, Peter Zijlstra wrote:
> > On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > > From: Andi Kleen <ak@linux.intel.com>
> > > 
> > > Double check the CPU has a LBR format we support before using it.
> > > 
> > > Also I made the init functions __init while I was on it. 
> > 
> > Why? Its all after a model test anyway.
> 
> Because the spec says we should check it.

The spec says lots of things.. but being that LBR is very much model
specific I really can't be bothered.

> It could catch bad VMs and other inconsistencies and it's the right thing
> to do.

That might be a reason, but does it?, and why wasn't that in the
changelog?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 14:14     ` Peter Zijlstra
@ 2012-06-06 14:23       ` Andi Kleen
  2012-06-06 14:28         ` Peter Zijlstra
  0 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 14:23 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 04:14:21PM +0200, Peter Zijlstra wrote:
> On Wed, 2012-06-06 at 07:12 -0700, Andi Kleen wrote:
> > On Wed, Jun 06, 2012 at 12:39:48PM +0200, Peter Zijlstra wrote:
> > > On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > > > From: Andi Kleen <ak@linux.intel.com>
> > > > 
> > > > cycles:p uses an special cycles encoding by default. However that is not
> > > > architectural, so it can only be used when the CPU is known
> > > > (it already caused problems on Sandy Bridge). It may or may not work
> > > > on future CPUs.
> > > > 
> > > > So make it opt-in only. Right now I enabled it on Core2, Nehalem, Westmere
> > > > and not on Sandy-Bridge or Atom.
> > > 
> > > No. 
> > 
> > What do you mean? Are you claiming it's architectural?
> 
> I mean this patch is shite.
> 
> You don't disable it because it doesn't work some place, you fix it.

I disable it because it's non architectural, like the description
says.

This code is not behind model numbers. You cannot do random model specific 
hacks like this without a model number check. And you already got burned
for it. It may well break again in the future because there's no
guarantee for these things working.

Arch perfmon just means you can use the parts guaranteed in
arch perfmon and nothing more.

Now on what model numbers exactly it should be on can be debated.
I was very conservative, but the set could likely be larger. 

But it's very clear this cannot be done without a model check.
I don't understand how you can even argue against that.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 14:23       ` Andi Kleen
@ 2012-06-06 14:28         ` Peter Zijlstra
  2012-06-06 14:35           ` Andi Kleen
  0 siblings, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 14:28 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, 2012-06-06 at 07:23 -0700, Andi Kleen wrote:
> 
> But it's very clear this cannot be done without a model check.
> I don't understand how you can even argue against that. 

Who said I fixed it without a model check?

But simply disabling it for a model isn't how you do things. 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters
  2012-06-06 14:21       ` Peter Zijlstra
@ 2012-06-06 14:33         ` Stephane Eranian
  2012-06-06 14:38           ` Peter Zijlstra
  2012-06-06 14:41         ` Andi Kleen
  1 sibling, 1 reply; 34+ messages in thread
From: Stephane Eranian @ 2012-06-06 14:33 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, Andi Kleen, linux-kernel

On Wed, Jun 6, 2012 at 4:21 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Wed, 2012-06-06 at 07:16 -0700, Andi Kleen wrote:
>> On Wed, Jun 06, 2012 at 12:46:19PM +0200, Peter Zijlstra wrote:
>> > On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
>> > > From: Andi Kleen <ak@linux.intel.com>
>> > >
>> > > RDPMC is much faster than RDMSR for reading performance counters,
>> > > since it's not serializing.  Use it if possible in the perf handler.
>> > >
>> > > Only tested on Sandy Bridge, so I only enabled it there so far.
>> >
>> > That's just stupid.. I took Vince's patch from a while back.
>>
>> What do you mean? It's significantly faster to read the counters this
>> way, because it avoids serialization and other overhead.
>
> What I'm saying is you only enabling it for snb and being too lazy to
> test anything else. Nor do I think its worth the conditional, all chips
> we have PMU support for have rdpmc instructions.
>
>> Vince's patch only enabled it for user space I believe, This is for lowering
>> the kernel PMI handler overhead.
>
> No, his patch did the kernel thing. Furthermore he actually tested it on
> a bunch of machines.

Yes, his patch did but somehow I don't see this code in tip-x86.
The thing that I would worry about between rdmsrl() and rdpmc()
is what happens to the upper bits. rdpmc() returns bits [N-1:0] of
the N-bit counters. N is 48 (or 40) nowadays. When you read 64 bit
worth, what do you get in bits [63:N]? are those sign-extended or
zero-extended. Is that the same behavior across all Intel and AMD
processors? With perf_events, I think the (N-1)th bit is always set.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 14:28         ` Peter Zijlstra
@ 2012-06-06 14:35           ` Andi Kleen
  2012-06-06 14:42             ` Peter Zijlstra
  0 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 14:35 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 04:28:30PM +0200, Peter Zijlstra wrote:
> On Wed, 2012-06-06 at 07:23 -0700, Andi Kleen wrote:
> > 
> > But it's very clear this cannot be done without a model check.
> > I don't understand how you can even argue against that. 
> 
> Who said I fixed it without a model check?

Do you mean the explicit PEBS disable?
That's only for one model, but it did not address the underlying
problem that Linux did non architecturally guaranteed things
just based on ArchPerfmon.

The PEBS disable still is needed even with my patch of course.

> 
> But simply disabling it for a model isn't how you do things. 

Do you want it enabled per model?  I can turn the flag around.
Anything else?

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 3/5] perf, x86: Check LBR format capability
  2012-06-06 14:22       ` Peter Zijlstra
@ 2012-06-06 14:37         ` Andi Kleen
  0 siblings, 0 replies; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 14:37 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 04:22:36PM +0200, Peter Zijlstra wrote:
> On Wed, 2012-06-06 at 07:14 -0700, Andi Kleen wrote:
> > On Wed, Jun 06, 2012 at 12:40:24PM +0200, Peter Zijlstra wrote:
> > > On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > > > From: Andi Kleen <ak@linux.intel.com>
> > > > 
> > > > Double check the CPU has a LBR format we support before using it.
> > > > 
> > > > Also I made the init functions __init while I was on it. 
> > > 
> > > Why? Its all after a model test anyway.
> > 
> > Because the spec says we should check it.
> 
> The spec says lots of things.. but being that LBR is very much model
> specific I really can't be bothered.

Well you don't need to be, I did it already :-)

> 
> > It could catch bad VMs and other inconsistencies and it's the right thing
> > to do.
> 
> That might be a reason, but does it?, and why wasn't that in the
> changelog?

No it will not catch KVM because zero is a valid version :-/
That would still need the MISC_ENABLE re-read I think.

Still it's the right thing to do I believe.

-andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters
  2012-06-06 14:33         ` Stephane Eranian
@ 2012-06-06 14:38           ` Peter Zijlstra
  0 siblings, 0 replies; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 14:38 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Andi Kleen, Andi Kleen, linux-kernel

On Wed, 2012-06-06 at 16:33 +0200, Stephane Eranian wrote:
> Yes, his patch did but somehow I don't see this code in tip-x86.
> The thing that I would worry about between rdmsrl() and rdpmc()
> is what happens to the upper bits. rdpmc() returns bits [N-1:0] of
> the N-bit counters. N is 48 (or 40) nowadays. When you read 64 bit
> worth, what do you get in bits [63:N]? are those sign-extended or
> zero-extended. Is that the same behavior across all Intel and AMD
> processors? With perf_events, I think the (N-1)th bit is always set.
> 
Queued his patch after I saw Andi's trainwreck -- had totally forgotten
about it :/

For the kernel it doesn't matter, we manually sign-extend for however
many bits the counter has.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters
  2012-06-06 14:21       ` Peter Zijlstra
  2012-06-06 14:33         ` Stephane Eranian
@ 2012-06-06 14:41         ` Andi Kleen
  2012-06-06 14:45           ` Peter Zijlstra
  1 sibling, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 14:41 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 04:21:12PM +0200, Peter Zijlstra wrote:
> On Wed, 2012-06-06 at 07:16 -0700, Andi Kleen wrote:
> > On Wed, Jun 06, 2012 at 12:46:19PM +0200, Peter Zijlstra wrote:
> > > On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > > > From: Andi Kleen <ak@linux.intel.com>
> > > > 
> > > > RDPMC is much faster than RDMSR for reading performance counters,
> > > > since it's not serializing.  Use it if possible in the perf handler.
> > > > 
> > > > Only tested on Sandy Bridge, so I only enabled it there so far.
> > > 
> > > That's just stupid.. I took Vince's patch from a while back.
> > 
> > What do you mean? It's significantly faster to read the counters this
> > way, because it avoids serialization and other overhead. 
> 
> What I'm saying is you only enabling it for snb and being too lazy to
> test anything else. Nor do I think its worth the conditional, all chips
> we have PMU support for have rdpmc instructions.

The reason I avoided it on everyone is that some old chips (NetBurst'ish 
I think) only were able to read the low 32bits through RDPMC.

If you prefer I can enable it on all the P6 cores?  I cannot test them
all however.

> 
> > Vince's patch only enabled it for user space I believe, This is for lowering
> > the kernel PMI handler overhead.
> 
> No, his patch did the kernel thing. Furthermore he actually tested it on
> a bunch of machines.

Ok. I wasn't aware of that. But it's not merged, what happened to ti?

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 14:35           ` Andi Kleen
@ 2012-06-06 14:42             ` Peter Zijlstra
  2012-06-06 14:49               ` Andi Kleen
  0 siblings, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 14:42 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, 2012-06-06 at 07:35 -0700, Andi Kleen wrote:
> > But simply disabling it for a model isn't how you do things. 
> 
> Do you want it enabled per model?  I can turn the flag around.
> Anything else? 

The below is a refresh from a patch I did about a year ago when we ran
into the SNB trainwreck. I never merged it because SNB.. but IVB seems
to work.


---
Subject: perf, x86: Implement cycles:p for SNB/IVB
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 05 Jun 2012 10:26:43 +0200

Now that there's finally a chip with working PEBS (IvyBridge), we can
implement cycles:p for SNB/IVB.

Cc: Stephane Eranian <eranian@google.com>
Requested-and-tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins
---
 arch/x86/kernel/cpu/perf_event.h       |    1 
 arch/x86/kernel/cpu/perf_event_intel.c |   50 +++++++++++++++++++++++++++------
 2 files changed, 43 insertions(+), 8 deletions(-)

--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -365,6 +365,7 @@ struct x86_pmu {
 	int		pebs_record_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
+	void		(*pebs_aliases)(struct perf_event *event);
 
 	/*
 	 * Intel LBR
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1336,15 +1336,9 @@ static void intel_put_event_constraints(
 	intel_put_shared_regs_event_constraints(cpuc, event);
 }
 
-static int intel_pmu_hw_config(struct perf_event *event)
+static void intel_pebs_aliases_core2(struct perf_event *event)
 {
-	int ret = x86_pmu_hw_config(event);
-
-	if (ret)
-		return ret;
-
-	if (event->attr.precise_ip &&
-	    (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
 		/*
 		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
 		 * (0x003c) so that we can use it with PEBS.
@@ -1365,10 +1359,48 @@ static int intel_pmu_hw_config(struct pe
 		 */
 		u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
 
+		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+		event->hw.config = alt_config;
+	}
+}
+
+static void intel_pebs_aliases_snb(struct perf_event *event)
+{
+	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+		/*
+		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+		 * (0x003c) so that we can use it with PEBS.
+		 *
+		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+		 * PEBS capable. However we can use UOPS_RETIRED.ALL
+		 * (0x01c2), which is a PEBS capable event, to get the same
+		 * count.
+		 *
+		 * UOPS_RETIRED.ALL counts the number of cycles that retires
+		 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
+		 * larger than the maximum number of micro-ops that can be
+		 * retired per cycle (4) and then inverting the condition, we
+		 * count all cycles that retire 16 or less micro-ops, which
+		 * is every cycle.
+		 *
+		 * Thereby we gain a PEBS capable cycle counter.
+		 */
+		u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
 
 		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
 		event->hw.config = alt_config;
 	}
+}
+
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+	int ret = x86_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+
+	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
+		x86_pmu.pebs_aliases(event);
 
 	if (intel_pmu_needs_lbr_smpl(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
@@ -1643,6 +1675,7 @@ static __initconst const struct x86_pmu
 	.max_period		= (1ULL << 31) - 1,
 	.get_event_constraints	= intel_get_event_constraints,
 	.put_event_constraints	= intel_put_event_constraints,
+	.pebs_aliases		= intel_pebs_aliases_core2,
 
 	.format_attrs		= intel_arch3_formats_attr,
 
@@ -1885,6 +1918,7 @@ __init int intel_pmu_init(void)
 
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
 		x86_pmu.extra_regs = intel_snb_extra_regs;
 		/* all extra regs are per-cpu when HT is on */
 		x86_pmu.er_flags |= ERF_HAS_RSP_1;


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters
  2012-06-06 14:41         ` Andi Kleen
@ 2012-06-06 14:45           ` Peter Zijlstra
  0 siblings, 0 replies; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 14:45 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, 2012-06-06 at 07:41 -0700, Andi Kleen wrote:

> The reason I avoided it on everyone is that some old chips (NetBurst'ish 
> I think) only were able to read the low 32bits through RDPMC.

Can you verify that? Surely Intel still has one of those space heaters
laying about. If so we need to make the P4 driver do something else.

> Ok. I wasn't aware of that. But it's not merged, what happened to ti?

I forgot about it and queued it instead of your patch, so it should be
'out' there soon.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 14:42             ` Peter Zijlstra
@ 2012-06-06 14:49               ` Andi Kleen
  2012-06-06 14:53                 ` Peter Zijlstra
  0 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 14:49 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 04:42:06PM +0200, Peter Zijlstra wrote:
> On Wed, 2012-06-06 at 07:35 -0700, Andi Kleen wrote:
> > > But simply disabling it for a model isn't how you do things. 
> > 
> > Do you want it enabled per model?  I can turn the flag around.
> > Anything else? 
> 
> The below is a refresh from a patch I did about a year ago when we ran
> into the SNB trainwreck. I never merged it because SNB.. but IVB seems
> to work.

Hmm, your line numbers do not fully match mine

Is this for the intel_pmu or for the core_pmu?

> +     .pebs_aliases           = intel_pebs_aliases_core2,

In any case since the code later does

        version = eax.split.version_id;
        if (version < 2)
                x86_pmu = core_pmu;
        else
                x86_pmu = intel_pmu;

so core_pmu would be active even without model check, which would
be still wrong.

So your patch would fix the problem iff you only fill in the pebs_aliases
in the model number switch for known Core model numbers.

If you fix that it would be a valid replacement for mine I believe.

-Andi

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 14:49               ` Andi Kleen
@ 2012-06-06 14:53                 ` Peter Zijlstra
  2012-06-06 16:08                   ` Andi Kleen
  0 siblings, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 14:53 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, 2012-06-06 at 07:49 -0700, Andi Kleen wrote:

> Is this for the intel_pmu or for the core_pmu?
> 
> > +     .pebs_aliases           = intel_pebs_aliases_core2,

intel_pmu, we don't actually support PEBS on first gen. core I think.

> In any case since the code later does
> 
>         version = eax.split.version_id;
>         if (version < 2)
>                 x86_pmu = core_pmu;
>         else
>                 x86_pmu = intel_pmu;
> 
> so core_pmu would be active even without model check, which would
> be still wrong.

By not setting pebs_aliases for core_pmu,

> So your patch would fix the problem iff you only fill in the pebs_aliases
> in the model number switch for known Core model numbers.

+       if (event->attr.precise_ip && x86_pmu.pebs_aliases)
+               x86_pmu.pebs_aliases(event);

Never happens, and we're happy again.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events
  2012-06-06  0:56 ` [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events Andi Kleen
@ 2012-06-06 15:00   ` Peter Zijlstra
  2012-06-06 16:10     ` Andi Kleen
  2012-06-06 16:17   ` [tip:perf/core] perf/x86: Don' t " tip-bot for Andi Kleen
  1 sibling, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 15:00 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, eranian, Andi Kleen

On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> On Sandy Bridge in non HT mode there are 8 counters available. Since every
> counter can write a PEBS record assuming there are 4 max is incorrect. Use
> the reported counter number -- with an upper limit for a static array -- instead.


While I queued this patch, its effectively a NOP since all SNB PEBS
constraints are still on 4 counters.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 14:53                 ` Peter Zijlstra
@ 2012-06-06 16:08                   ` Andi Kleen
  2012-06-06 17:10                     ` Peter Zijlstra
  0 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 16:08 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 04:53:05PM +0200, Peter Zijlstra wrote:
> 
> > So your patch would fix the problem iff you only fill in the pebs_aliases
> > in the model number switch for known Core model numbers.
> 
> +       if (event->attr.precise_ip && x86_pmu.pebs_aliases)
> +               x86_pmu.pebs_aliases(event);
> 
> Never happens, and we're happy again.

How about newer unknown Intel CPUs with arch_perfmon >= 3? Those would get 
intel_pmu, correct?  There's no guarantee that they support the trick.

-Andi


-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events
  2012-06-06 15:00   ` Peter Zijlstra
@ 2012-06-06 16:10     ` Andi Kleen
  2012-06-06 17:25       ` Peter Zijlstra
  0 siblings, 1 reply; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 16:10 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, linux-kernel, eranian, Andi Kleen

On Wed, Jun 06, 2012 at 05:00:03PM +0200, Peter Zijlstra wrote:
> On Tue, 2012-06-05 at 17:56 -0700, Andi Kleen wrote:
> > On Sandy Bridge in non HT mode there are 8 counters available. Since every
> > counter can write a PEBS record assuming there are 4 max is incorrect. Use
> > the reported counter number -- with an upper limit for a static array -- instead.
> 
> 
> While I queued this patch, its effectively a NOP since all SNB PEBS
> constraints are still on 4 counters.

Yes need to update the constraints too. I think Stephane was looking at that.
Perhaps two sets depending on the counters in CPUID?

FWIW I saw the warn on triggering.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [tip:perf/core] x86: Add rdpmcl()
  2012-06-06  0:56 ` [PATCH 4/5] x86: Add rdpmcl() Andi Kleen
@ 2012-06-06 16:16   ` tip-bot for Andi Kleen
  0 siblings, 0 replies; 34+ messages in thread
From: tip-bot for Andi Kleen @ 2012-06-06 16:16 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, a.p.zijlstra, ak, tglx

Commit-ID:  1ff4d58a192aea7f245981e2579765f961f6eb9c
Gitweb:     http://git.kernel.org/tip/1ff4d58a192aea7f245981e2579765f961f6eb9c
Author:     Andi Kleen <ak@linux.intel.com>
AuthorDate: Tue, 5 Jun 2012 17:56:50 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 6 Jun 2012 17:23:27 +0200

x86: Add rdpmcl()

Add a version of rdpmc() that directly reads into a u64

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338944211-28275-4-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/msr.h      |    2 ++
 arch/x86/include/asm/paravirt.h |    2 ++
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 084ef95..e489c14 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -237,6 +237,8 @@ do {							\
 	(high) = (u32)(_l >> 32);			\
 } while (0)
 
+#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
+
 #define rdtscp(low, high, aux)					\
 do {                                                            \
 	unsigned long long _val = native_read_tscp(&(aux));     \
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6cbbabf..14ce05d 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -252,6 +252,8 @@ do {						\
 	high = _l >> 32;			\
 } while (0)
 
+#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
+
 static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
 {
 	return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [tip:perf/core] perf/x86: Don' t assume there can be only 4 PEBS events
  2012-06-06  0:56 ` [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events Andi Kleen
  2012-06-06 15:00   ` Peter Zijlstra
@ 2012-06-06 16:17   ` tip-bot for Andi Kleen
  1 sibling, 0 replies; 34+ messages in thread
From: tip-bot for Andi Kleen @ 2012-06-06 16:17 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, a.p.zijlstra, ak, tglx

Commit-ID:  70ab7003dec58afeae7f5d681dfa309b3a259f03
Gitweb:     http://git.kernel.org/tip/70ab7003dec58afeae7f5d681dfa309b3a259f03
Author:     Andi Kleen <ak@linux.intel.com>
AuthorDate: Tue, 5 Jun 2012 17:56:48 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 6 Jun 2012 17:23:40 +0200

perf/x86: Don't assume there can be only 4 PEBS events

On Sandy Bridge in non HT mode there are 8 counters available.
Since every counter can write a PEBS record assuming there are
4 max is incorrect. Use the reported counter number -- with an
upper limit for a static array -- instead.

Also I made the warning messages a bit more informational.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338944211-28275-2-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h          |    3 ++-
 arch/x86/kernel/cpu/perf_event_intel.c    |    2 ++
 arch/x86/kernel/cpu/perf_event_intel_ds.c |    8 ++++----
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 23b5710..3df3de9 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -69,7 +69,7 @@ struct amd_nb {
 };
 
 /* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS		4
+#define MAX_PEBS_EVENTS		8
 
 /*
  * A debug store configuration.
@@ -378,6 +378,7 @@ struct x86_pmu {
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
+	int 		max_pebs_events;
 
 	/*
 	 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 187c294..e23e71f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1800,6 +1800,8 @@ __init int intel_pmu_init(void)
 	x86_pmu.events_maskl		= ebx.full;
 	x86_pmu.events_mask_len		= eax.split.mask_length;
 
+	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 35e2192..026373e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -620,7 +620,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	 * Should not happen, we program the threshold at 1 and do not
 	 * set a reset value.
 	 */
-	WARN_ON_ONCE(n > 1);
+	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
 	at += n - 1;
 
 	__intel_pmu_pebs_event(event, iregs, at);
@@ -651,10 +651,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	 * Should not happen, we program the threshold at 1 and do not
 	 * set a reset value.
 	 */
-	WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
+	WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
 
 	for ( ; at < top; at++) {
-		for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
+		for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
 			event = cpuc->events[bit];
 			if (!test_bit(bit, cpuc->active_mask))
 				continue;
@@ -670,7 +670,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			break;
 		}
 
-		if (!event || bit >= MAX_PEBS_EVENTS)
+		if (!event || bit >= x86_pmu.max_pebs_events)
 			continue;
 
 		__intel_pmu_pebs_event(event, iregs, at);

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 16:08                   ` Andi Kleen
@ 2012-06-06 17:10                     ` Peter Zijlstra
  2012-06-06 17:48                       ` Andi Kleen
  0 siblings, 1 reply; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 17:10 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andi Kleen, linux-kernel, eranian

On Wed, 2012-06-06 at 18:08 +0200, Andi Kleen wrote:
> How about newer unknown Intel CPUs with arch_perfmon >= 3? Those would get 
> intel_pmu, correct?  There's no guarantee that they support the trick.

But they will have no PEBS constraints set, so effectively don't have
PEBS.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events
  2012-06-06 16:10     ` Andi Kleen
@ 2012-06-06 17:25       ` Peter Zijlstra
  0 siblings, 0 replies; 34+ messages in thread
From: Peter Zijlstra @ 2012-06-06 17:25 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, eranian, Andi Kleen

On Wed, 2012-06-06 at 18:10 +0200, Andi Kleen wrote:
> FWIW I saw the warn on triggering.

What hardware and what workload?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural
  2012-06-06 17:10                     ` Peter Zijlstra
@ 2012-06-06 17:48                       ` Andi Kleen
  0 siblings, 0 replies; 34+ messages in thread
From: Andi Kleen @ 2012-06-06 17:48 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, Andi Kleen, linux-kernel, eranian

On Wed, Jun 06, 2012 at 07:10:45PM +0200, Peter Zijlstra wrote:
> On Wed, 2012-06-06 at 18:08 +0200, Andi Kleen wrote:
> > How about newer unknown Intel CPUs with arch_perfmon >= 3? Those would get 
> > intel_pmu, correct?  There's no guarantee that they support the trick.
> 
> But they will have no PEBS constraints set, so effectively don't have
> PEBS.

Ok. I see you want to do it tricky instead of obvious.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2012-06-06 17:48 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-06  0:56 [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Andi Kleen
2012-06-06  0:56 ` [PATCH 2/5] perf, x86: Don't assume there can be only 4 PEBS events Andi Kleen
2012-06-06 15:00   ` Peter Zijlstra
2012-06-06 16:10     ` Andi Kleen
2012-06-06 17:25       ` Peter Zijlstra
2012-06-06 16:17   ` [tip:perf/core] perf/x86: Don' t " tip-bot for Andi Kleen
2012-06-06  0:56 ` [PATCH 3/5] perf, x86: Check LBR format capability Andi Kleen
2012-06-06  4:29   ` Andi Kleen
2012-06-06 10:40   ` Peter Zijlstra
2012-06-06 14:14     ` Andi Kleen
2012-06-06 14:22       ` Peter Zijlstra
2012-06-06 14:37         ` Andi Kleen
2012-06-06  0:56 ` [PATCH 4/5] x86: Add rdpmcl() Andi Kleen
2012-06-06 16:16   ` [tip:perf/core] " tip-bot for Andi Kleen
2012-06-06  0:56 ` [PATCH 5/5] perf, x86: Prefer RDPMC over RDMSR for reading counters Andi Kleen
2012-06-06 10:46   ` Peter Zijlstra
2012-06-06 14:16     ` Andi Kleen
2012-06-06 14:21       ` Peter Zijlstra
2012-06-06 14:33         ` Stephane Eranian
2012-06-06 14:38           ` Peter Zijlstra
2012-06-06 14:41         ` Andi Kleen
2012-06-06 14:45           ` Peter Zijlstra
2012-06-06 10:39 ` [PATCH 1/5] perf, x86: Don't assume the alternative cycles encoding is architectural Peter Zijlstra
2012-06-06 14:12   ` Andi Kleen
2012-06-06 14:14     ` Peter Zijlstra
2012-06-06 14:23       ` Andi Kleen
2012-06-06 14:28         ` Peter Zijlstra
2012-06-06 14:35           ` Andi Kleen
2012-06-06 14:42             ` Peter Zijlstra
2012-06-06 14:49               ` Andi Kleen
2012-06-06 14:53                 ` Peter Zijlstra
2012-06-06 16:08                   ` Andi Kleen
2012-06-06 17:10                     ` Peter Zijlstra
2012-06-06 17:48                       ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).