All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
@ 2010-03-29 16:36 Robert Richter
  2010-03-29 16:36 ` [PATCH 1/3] perf/core, x86: undo some some *_counter* -> *_event* renames Robert Richter
                   ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Robert Richter @ 2010-03-29 16:36 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML

This patch set unifies performance counter bit masks for x86. All mask
are almost the same for all x86 models and thus can use the same macro
definitions in arch/x86/include/asm/perf_event.h. It removes duplicate
code. There is also a patch that reverts some changes of the big
perf_counter -> perf_event rename.

-Robert



^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/3] perf/core, x86: undo some some *_counter* -> *_event* renames
  2010-03-29 16:36 [PATCH 0/3] perf/core, x86: unify perfctr bitmasks Robert Richter
@ 2010-03-29 16:36 ` Robert Richter
  2010-04-02 19:09   ` [tip:perf/core] perf, x86: Undo " tip-bot for Robert Richter
  2010-03-29 16:36 ` [PATCH 2/3] perf/core, x86: removing p6_pmu_raw_event() Robert Richter
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 29+ messages in thread
From: Robert Richter @ 2010-03-29 16:36 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter

The big rename

 cdd6c48 perf: Do the big rename: Performance Counters -> Performance Events

accidentally renamed some members of stucts that were named after
registers in the spec. To avoid confusion this patch reverts some
changes. The related specs are MSR descriptions in AMD's BKDGs and the
ARCHITECTURAL PERFORMANCE MONITORING section in the Intel 64 and IA-32
Architectures Software Developer's Manuals.

This patch does:

 $ sed -i -e 's:num_events:num_counters:g' \
   arch/x86/include/asm/perf_event.h \
   arch/x86/kernel/cpu/perf_event_amd.c \
   arch/x86/kernel/cpu/perf_event.c \
   arch/x86/kernel/cpu/perf_event_intel.c \
   arch/x86/kernel/cpu/perf_event_p6.c \
   arch/x86/kernel/cpu/perf_event_p4.c \
   arch/x86/oprofile/op_model_ppro.c

 $ sed -i -e 's:event_bits:cntval_bits:g' -e 's:event_mask:cntval_mask:g' \
   arch/x86/kernel/cpu/perf_event_amd.c \
   arch/x86/kernel/cpu/perf_event.c \
   arch/x86/kernel/cpu/perf_event_intel.c \
   arch/x86/kernel/cpu/perf_event_p6.c \
   arch/x86/kernel/cpu/perf_event_p4.c

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/perf_event.h      |    4 +-
 arch/x86/kernel/cpu/perf_event.c       |   74 ++++++++++++++++----------------
 arch/x86/kernel/cpu/perf_event_amd.c   |   12 +++---
 arch/x86/kernel/cpu/perf_event_intel.c |   14 +++---
 arch/x86/kernel/cpu/perf_event_p4.c    |   14 +++---
 arch/x86/kernel/cpu/perf_event_p6.c    |    6 +-
 arch/x86/oprofile/op_model_ppro.c      |    4 +-
 7 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 124dddd..987bf67 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -67,7 +67,7 @@
 union cpuid10_eax {
 	struct {
 		unsigned int version_id:8;
-		unsigned int num_events:8;
+		unsigned int num_counters:8;
 		unsigned int bit_width:8;
 		unsigned int mask_length:8;
 	} split;
@@ -76,7 +76,7 @@ union cpuid10_eax {
 
 union cpuid10_edx {
 	struct {
-		unsigned int num_events_fixed:4;
+		unsigned int num_counters_fixed:4;
 		unsigned int reserved:28;
 	} split;
 	unsigned int full;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6f66d4a..f2fc2d8 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -194,10 +194,10 @@ struct x86_pmu {
 	u64		(*event_map)(int);
 	u64		(*raw_event)(u64);
 	int		max_events;
-	int		num_events;
-	int		num_events_fixed;
-	int		event_bits;
-	u64		event_mask;
+	int		num_counters;
+	int		num_counters_fixed;
+	int		cntval_bits;
+	u64		cntval_mask;
 	int		apic;
 	u64		max_period;
 	struct event_constraint *
@@ -267,7 +267,7 @@ static u64
 x86_perf_event_update(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
-	int shift = 64 - x86_pmu.event_bits;
+	int shift = 64 - x86_pmu.cntval_bits;
 	u64 prev_raw_count, new_raw_count;
 	int idx = hwc->idx;
 	s64 delta;
@@ -319,12 +319,12 @@ static bool reserve_pmc_hardware(void)
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		disable_lapic_nmi_watchdog();
 
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
 			goto perfctr_fail;
 	}
 
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
 			goto eventsel_fail;
 	}
@@ -335,7 +335,7 @@ eventsel_fail:
 	for (i--; i >= 0; i--)
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 
-	i = x86_pmu.num_events;
+	i = x86_pmu.num_counters;
 
 perfctr_fail:
 	for (i--; i >= 0; i--)
@@ -351,7 +351,7 @@ static void release_pmc_hardware(void)
 {
 	int i;
 
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		release_perfctr_nmi(x86_pmu.perfctr + i);
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 	}
@@ -546,7 +546,7 @@ static void x86_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
 		if (!test_bit(idx, cpuc->active_mask))
@@ -581,7 +581,7 @@ static void x86_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		struct perf_event *event = cpuc->events[idx];
 		u64 val;
 
@@ -656,14 +656,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	 * assign events to counters starting with most
 	 * constrained events.
 	 */
-	wmax = x86_pmu.num_events;
+	wmax = x86_pmu.num_counters;
 
 	/*
 	 * when fixed event counters are present,
 	 * wmax is incremented by 1 to account
 	 * for one more choice
 	 */
-	if (x86_pmu.num_events_fixed)
+	if (x86_pmu.num_counters_fixed)
 		wmax++;
 
 	for (w = 1, num = n; num && w <= wmax; w++) {
@@ -713,7 +713,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 	struct perf_event *event;
 	int n, max_count;
 
-	max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
+	max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
 
 	/* current number of events already accepted */
 	n = cpuc->n_events;
@@ -903,7 +903,7 @@ x86_perf_event_set_period(struct perf_event *event)
 	atomic64_set(&hwc->prev_count, (u64)-left);
 
 	wrmsrl(hwc->event_base + idx,
-			(u64)(-left) & x86_pmu.event_mask);
+			(u64)(-left) & x86_pmu.cntval_mask);
 
 	perf_event_update_userpage(event);
 
@@ -986,7 +986,7 @@ void perf_event_print_debug(void)
 	unsigned long flags;
 	int cpu, idx;
 
-	if (!x86_pmu.num_events)
+	if (!x86_pmu.num_counters)
 		return;
 
 	local_irq_save(flags);
@@ -1010,7 +1010,7 @@ void perf_event_print_debug(void)
 	}
 	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
 		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
 
@@ -1023,7 +1023,7 @@ void perf_event_print_debug(void)
 		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
 			cpu, idx, prev_left);
 	}
-	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
 
 		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1088,7 +1088,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 
@@ -1096,7 +1096,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 		hwc = &event->hw;
 
 		val = x86_perf_event_update(event);
-		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 			continue;
 
 		/*
@@ -1398,46 +1398,46 @@ void __init init_hw_perf_events(void)
 	if (x86_pmu.quirks)
 		x86_pmu.quirks();
 
-	if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
+	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
 		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
-		     x86_pmu.num_events, X86_PMC_MAX_GENERIC);
-		x86_pmu.num_events = X86_PMC_MAX_GENERIC;
+		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
+		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
 	}
-	x86_pmu.intel_ctrl = (1 << x86_pmu.num_events) - 1;
-	perf_max_events = x86_pmu.num_events;
+	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+	perf_max_events = x86_pmu.num_counters;
 
-	if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
+	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
 		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
-		     x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
-		x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
+		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
+		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
 	}
 
 	x86_pmu.intel_ctrl |=
-		((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
+		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
 	perf_events_lapic_init();
 	register_die_notifier(&perf_event_nmi_notifier);
 
 	unconstrained = (struct event_constraint)
-		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
-				   0, x86_pmu.num_events);
+		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
+				   0, x86_pmu.num_counters);
 
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
 			if (c->cmask != INTEL_ARCH_FIXED_MASK)
 				continue;
 
-			c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
-			c->weight += x86_pmu.num_events;
+			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+			c->weight += x86_pmu.num_counters;
 		}
 	}
 
 	pr_info("... version:                %d\n",     x86_pmu.version);
-	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
-	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
-	pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
+	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
+	pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
+	pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
 	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
+	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
 	perf_cpu_notifier(x86_pmu_notifier);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 358a8e3..9d363ce 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -158,7 +158,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
 	 * be removed on one CPU at a time AND PMU is disabled
 	 * when we come here
 	 */
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (nb->owners[i] == event) {
 			cmpxchg(nb->owners+i, event, NULL);
 			break;
@@ -208,7 +208,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	struct amd_nb *nb = cpuc->amd_nb;
 	struct perf_event *old = NULL;
-	int max = x86_pmu.num_events;
+	int max = x86_pmu.num_counters;
 	int i, j, k = -1;
 
 	/*
@@ -286,7 +286,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
 	/*
 	 * initialize all possible NB constraints
 	 */
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		__set_bit(i, nb->event_constraints[i].idxmsk);
 		nb->event_constraints[i].weight = 1;
 	}
@@ -370,9 +370,9 @@ static __initconst struct x86_pmu amd_pmu = {
 	.event_map		= amd_pmu_event_map,
 	.raw_event		= amd_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_events		= 4,
-	.event_bits		= 48,
-	.event_mask		= (1ULL << 48) - 1,
+	.num_counters		= 4,
+	.cntval_bits		= 48,
+	.cntval_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
 	/* use highest bit to detect overflow */
 	.max_period		= (1ULL << 47) - 1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 676aac2..0d68531 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -653,18 +653,18 @@ static void intel_pmu_reset(void)
 	unsigned long flags;
 	int idx;
 
-	if (!x86_pmu.num_events)
+	if (!x86_pmu.num_counters)
 		return;
 
 	local_irq_save(flags);
 
 	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
 		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
 	}
-	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
 		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
 	}
 	if (ds)
@@ -901,16 +901,16 @@ static __init int intel_pmu_init(void)
 		x86_pmu = intel_pmu;
 
 	x86_pmu.version			= version;
-	x86_pmu.num_events		= eax.split.num_events;
-	x86_pmu.event_bits		= eax.split.bit_width;
-	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
+	x86_pmu.num_counters		= eax.split.num_counters;
+	x86_pmu.cntval_bits		= eax.split.bit_width;
+	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
 
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events:
 	 */
 	if (version > 1)
-		x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
+		x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
 
 	/*
 	 * v2 and above have a perf capabilities MSR
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 0d1be36..4139100 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -483,7 +483,7 @@ static void p4_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		struct perf_event *event = cpuc->events[idx];
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -540,7 +540,7 @@ static void p4_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		struct perf_event *event = cpuc->events[idx];
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -562,7 +562,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -579,7 +579,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 		p4_pmu_clear_cccr_ovf(hwc);
 
 		val = x86_perf_event_update(event);
-		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 			continue;
 
 		/*
@@ -794,10 +794,10 @@ static __initconst struct x86_pmu p4_pmu = {
 	 * though leave it restricted at moment assuming
 	 * HT is on
 	 */
-	.num_events		= ARCH_P4_MAX_CCCR,
+	.num_counters		= ARCH_P4_MAX_CCCR,
 	.apic			= 1,
-	.event_bits		= 40,
-	.event_mask		= (1ULL << 40) - 1,
+	.cntval_bits		= 40,
+	.cntval_mask		= (1ULL << 40) - 1,
 	.max_period		= (1ULL << 39) - 1,
 	.hw_config		= p4_hw_config,
 	.schedule_events	= p4_pmu_schedule_events,
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 877182c..b26fbc7 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -119,7 +119,7 @@ static __initconst struct x86_pmu p6_pmu = {
 	.apic			= 1,
 	.max_period		= (1ULL << 31) - 1,
 	.version		= 0,
-	.num_events		= 2,
+	.num_counters		= 2,
 	/*
 	 * Events have 40 bits implemented. However they are designed such
 	 * that bits [32-39] are sign extensions of bit 31. As such the
@@ -127,8 +127,8 @@ static __initconst struct x86_pmu p6_pmu = {
 	 *
 	 * See IA-32 Intel Architecture Software developer manual Vol 3B
 	 */
-	.event_bits		= 32,
-	.event_mask		= (1ULL << 32) - 1,
+	.cntval_bits		= 32,
+	.cntval_mask		= (1ULL << 32) - 1,
 	.get_event_constraints	= x86_get_event_constraints,
 	.event_constraints	= p6_event_constraints,
 };
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 2bf90fa..c8abc4d 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -239,11 +239,11 @@ static void arch_perfmon_setup_counters(void)
 	if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
 		current_cpu_data.x86_model == 15) {
 		eax.split.version_id = 2;
-		eax.split.num_events = 2;
+		eax.split.num_counters = 2;
 		eax.split.bit_width = 40;
 	}
 
-	num_counters = eax.split.num_events;
+	num_counters = eax.split.num_counters;
 
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;
-- 
1.7.0.3



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/3] perf/core, x86: removing p6_pmu_raw_event()
  2010-03-29 16:36 [PATCH 0/3] perf/core, x86: unify perfctr bitmasks Robert Richter
  2010-03-29 16:36 ` [PATCH 1/3] perf/core, x86: undo some some *_counter* -> *_event* renames Robert Richter
@ 2010-03-29 16:36 ` Robert Richter
  2010-03-29 16:36 ` [PATCH 3/3] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks Robert Richter
  2010-03-30 10:11 ` [PATCH 0/3] perf/core, x86: unify perfctr bitmasks Stephane Eranian
  3 siblings, 0 replies; 29+ messages in thread
From: Robert Richter @ 2010-03-29 16:36 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter

The function is the same as intel_pmu_raw_event(). This patch removes
the duplicate code.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/kernel/cpu/perf_event.c       |    2 +-
 arch/x86/kernel/cpu/perf_event_intel.c |    2 ++
 arch/x86/kernel/cpu/perf_event_p6.c    |   20 +-------------------
 3 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f2fc2d8..81218d0 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1320,11 +1320,11 @@ undo:
 }
 
 #include "perf_event_amd.c"
-#include "perf_event_p6.c"
 #include "perf_event_p4.c"
 #include "perf_event_intel_lbr.c"
 #include "perf_event_intel_ds.c"
 #include "perf_event_intel.c"
+#include "perf_event_p6.c"
 
 static int __cpuinit
 x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0d68531..6c5d83f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -842,6 +842,8 @@ static __initconst struct x86_pmu intel_pmu = {
 	.cpu_dying		= intel_pmu_cpu_dying,
 };
 
+static __init int p6_pmu_init(void);
+
 static void intel_clovertown_quirks(void)
 {
 	/*
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index b26fbc7..8cdd054 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event)
  */
 #define P6_NOP_EVENT			0x0000002EULL
 
-static u64 p6_pmu_raw_event(u64 hw_event)
-{
-#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define P6_EVNTSEL_INV_MASK		0x00800000ULL
-#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define P6_EVNTSEL_MASK			\
-	(P6_EVNTSEL_EVENT_MASK |	\
-	 P6_EVNTSEL_UNIT_MASK  |	\
-	 P6_EVNTSEL_EDGE_MASK  |	\
-	 P6_EVNTSEL_INV_MASK   |	\
-	 P6_EVNTSEL_REG_MASK)
-
-	return hw_event & P6_EVNTSEL_MASK;
-}
-
 static struct event_constraint p6_event_constraints[] =
 {
 	INTEL_EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
@@ -114,7 +96,7 @@ static __initconst struct x86_pmu p6_pmu = {
 	.eventsel		= MSR_P6_EVNTSEL0,
 	.perfctr		= MSR_P6_PERFCTR0,
 	.event_map		= p6_pmu_event_map,
-	.raw_event		= p6_pmu_raw_event,
+	.raw_event		= intel_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
 	.apic			= 1,
 	.max_period		= (1ULL << 31) - 1,
-- 
1.7.0.3



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/3] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks
  2010-03-29 16:36 [PATCH 0/3] perf/core, x86: unify perfctr bitmasks Robert Richter
  2010-03-29 16:36 ` [PATCH 1/3] perf/core, x86: undo some some *_counter* -> *_event* renames Robert Richter
  2010-03-29 16:36 ` [PATCH 2/3] perf/core, x86: removing p6_pmu_raw_event() Robert Richter
@ 2010-03-29 16:36 ` Robert Richter
  2010-03-29 16:48   ` Peter Zijlstra
  2010-03-30 10:11 ` [PATCH 0/3] perf/core, x86: unify perfctr bitmasks Stephane Eranian
  3 siblings, 1 reply; 29+ messages in thread
From: Robert Richter @ 2010-03-29 16:36 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML, Robert Richter

ARCH_PERFMON_EVENTSEL bit masks are offen used in the kernel. This
patch adds macros for the bit masks and removes local defines.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/perf_event.h      |   58 ++++++++++++++------------------
 arch/x86/kernel/cpu/perf_event.c       |   14 ++++++--
 arch/x86/kernel/cpu/perf_event_amd.c   |   15 +--------
 arch/x86/kernel/cpu/perf_event_intel.c |   15 +--------
 4 files changed, 38 insertions(+), 64 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 987bf67..f6d43db 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -18,39 +18,31 @@
 #define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
 #define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
 
-#define ARCH_PERFMON_EVENTSEL_ENABLE			  (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_ANY			  (1 << 21)
-#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
-
-/*
- * Includes eventsel and unit mask as well:
- */
-
-
-#define INTEL_ARCH_EVTSEL_MASK		0x000000FFULL
-#define INTEL_ARCH_UNIT_MASK		0x0000FF00ULL
-#define INTEL_ARCH_EDGE_MASK		0x00040000ULL
-#define INTEL_ARCH_INV_MASK		0x00800000ULL
-#define INTEL_ARCH_CNT_MASK		0xFF000000ULL
-#define INTEL_ARCH_EVENT_MASK	(INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
-
-/*
- * filter mask to validate fixed counter events.
- * the following filters disqualify for fixed counters:
- *  - inv
- *  - edge
- *  - cnt-mask
- *  The other filters are supported by fixed counters.
- *  The any-thread option is supported starting with v3.
- */
-#define INTEL_ARCH_FIXED_MASK \
-	(INTEL_ARCH_CNT_MASK| \
-	 INTEL_ARCH_INV_MASK| \
-	 INTEL_ARCH_EDGE_MASK|\
-	 INTEL_ARCH_UNIT_MASK|\
-	 INTEL_ARCH_EVENT_MASK)
+#define ARCH_PERFMON_EVENTSEL_EVENT			0x000000FFULL
+#define ARCH_PERFMON_EVENTSEL_UMASK			0x0000FF00ULL
+#define ARCH_PERFMON_EVENTSEL_USR			(1ULL << 16)
+#define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
+#define ARCH_PERFMON_EVENTSEL_EDGE			(1ULL << 18)
+#define ARCH_PERFMON_EVENTSEL_INT			(1ULL << 20)
+#define ARCH_PERFMON_EVENTSEL_ANY			(1ULL << 21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
+#define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
+#define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
+
+#define AMD64_EVENTSEL_EVENT	\
+	(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
+#define INTEL_ARCH_EVENT_MASK	\
+	(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
+
+#define X86_RAW_EVENT_MASK		\
+	(ARCH_PERFMON_EVENTSEL_EVENT |	\
+	 ARCH_PERFMON_EVENTSEL_UMASK |	\
+	 ARCH_PERFMON_EVENTSEL_EDGE  |	\
+	 ARCH_PERFMON_EVENTSEL_INV   |	\
+	 ARCH_PERFMON_EVENTSEL_CMASK)
+#define AMD64_RAW_EVENT_MASK		\
+	(X86_RAW_EVENT_MASK          |  \
+	 AMD64_EVENTSEL_EVENT)
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 81218d0..ec25568 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -142,13 +142,21 @@ struct cpu_hw_events {
  * Constraint on the Event code.
  */
 #define INTEL_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
+	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
 
 /*
  * Constraint on the Event code + UMask + fixed-mask
+ *
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ *  - inv
+ *  - edge
+ *  - cnt-mask
+ *  The other filters are supported by fixed counters.
+ *  The any-thread option is supported starting with v3.
  */
 #define FIXED_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
+	EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
 
 /*
  * Constraint on the Event code + UMask
@@ -1424,7 +1432,7 @@ void __init init_hw_perf_events(void)
 
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if (c->cmask != INTEL_ARCH_FIXED_MASK)
+			if (c->cmask != X86_RAW_EVENT_MASK)
 				continue;
 
 			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 9d363ce..30e799a 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -113,20 +113,7 @@ static u64 amd_pmu_event_map(int hw_event)
 
 static u64 amd_pmu_raw_event(u64 hw_event)
 {
-#define K7_EVNTSEL_EVENT_MASK	0xF000000FFULL
-#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
-#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
-#define K7_EVNTSEL_INV_MASK	0x000800000ULL
-#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
-
-#define K7_EVNTSEL_MASK			\
-	(K7_EVNTSEL_EVENT_MASK |	\
-	 K7_EVNTSEL_UNIT_MASK  |	\
-	 K7_EVNTSEL_EDGE_MASK  |	\
-	 K7_EVNTSEL_INV_MASK   |	\
-	 K7_EVNTSEL_REG_MASK)
-
-	return hw_event & K7_EVNTSEL_MASK;
+	return hw_event & AMD64_RAW_EVENT_MASK;
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6c5d83f..2f13e7a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -454,20 +454,7 @@ static __initconst u64 atom_hw_cache_event_ids
 
 static u64 intel_pmu_raw_event(u64 hw_event)
 {
-#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define CORE_EVNTSEL_MASK		\
-	(INTEL_ARCH_EVTSEL_MASK |	\
-	 INTEL_ARCH_UNIT_MASK   |	\
-	 INTEL_ARCH_EDGE_MASK   |	\
-	 INTEL_ARCH_INV_MASK    |	\
-	 INTEL_ARCH_CNT_MASK)
-
-	return hw_event & CORE_EVNTSEL_MASK;
+	return hw_event & X86_RAW_EVENT_MASK;
 }
 
 static void intel_pmu_disable_all(void)
-- 
1.7.0.3



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks
  2010-03-29 16:36 ` [PATCH 3/3] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks Robert Richter
@ 2010-03-29 16:48   ` Peter Zijlstra
  2010-03-29 17:01     ` Robert Richter
  2010-03-30  9:28     ` Robert Richter
  0 siblings, 2 replies; 29+ messages in thread
From: Peter Zijlstra @ 2010-03-29 16:48 UTC (permalink / raw)
  To: Robert Richter; +Cc: Ingo Molnar, Stephane Eranian, LKML

On Mon, 2010-03-29 at 18:36 +0200, Robert Richter wrote:
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -454,20 +454,7 @@ static __initconst u64 atom_hw_cache_event_ids
>  
>  static u64 intel_pmu_raw_event(u64 hw_event)
>  {
> -#define CORE_EVNTSEL_EVENT_MASK                0x000000FFULL
> -#define CORE_EVNTSEL_UNIT_MASK         0x0000FF00ULL
> -#define CORE_EVNTSEL_EDGE_MASK         0x00040000ULL
> -#define CORE_EVNTSEL_INV_MASK          0x00800000ULL
> -#define CORE_EVNTSEL_REG_MASK          0xFF000000ULL
> -
> -#define CORE_EVNTSEL_MASK              \
> -       (INTEL_ARCH_EVTSEL_MASK |       \
> -        INTEL_ARCH_UNIT_MASK   |       \
> -        INTEL_ARCH_EDGE_MASK   |       \
> -        INTEL_ARCH_INV_MASK    |       \
> -        INTEL_ARCH_CNT_MASK)
> -
> -       return hw_event & CORE_EVNTSEL_MASK;
> +       return hw_event & X86_RAW_EVENT_MASK;
>  } 

Could you fold this with your 2/3 and create x86_pmu_raw_event() which
lives in arch/x86/kernel/cpu/perf_event.c, that's more consistent wrt
the X86_RAW_EVENT_MASK name and that way you don't need to re-order the
#include ""s either.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks
  2010-03-29 16:48   ` Peter Zijlstra
@ 2010-03-29 17:01     ` Robert Richter
  2010-03-30  9:28     ` Robert Richter
  1 sibling, 0 replies; 29+ messages in thread
From: Robert Richter @ 2010-03-29 17:01 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML

On 29.03.10 18:48:58, Peter Zijlstra wrote:
> On Mon, 2010-03-29 at 18:36 +0200, Robert Richter wrote:
> > +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> > @@ -454,20 +454,7 @@ static __initconst u64 atom_hw_cache_event_ids
> >  
> >  static u64 intel_pmu_raw_event(u64 hw_event)
> >  {
> > -#define CORE_EVNTSEL_EVENT_MASK                0x000000FFULL
> > -#define CORE_EVNTSEL_UNIT_MASK         0x0000FF00ULL
> > -#define CORE_EVNTSEL_EDGE_MASK         0x00040000ULL
> > -#define CORE_EVNTSEL_INV_MASK          0x00800000ULL
> > -#define CORE_EVNTSEL_REG_MASK          0xFF000000ULL
> > -
> > -#define CORE_EVNTSEL_MASK              \
> > -       (INTEL_ARCH_EVTSEL_MASK |       \
> > -        INTEL_ARCH_UNIT_MASK   |       \
> > -        INTEL_ARCH_EDGE_MASK   |       \
> > -        INTEL_ARCH_INV_MASK    |       \
> > -        INTEL_ARCH_CNT_MASK)
> > -
> > -       return hw_event & CORE_EVNTSEL_MASK;
> > +       return hw_event & X86_RAW_EVENT_MASK;
> >  } 
> 
> Could you fold this with your 2/3 and create x86_pmu_raw_event() which
> lives in arch/x86/kernel/cpu/perf_event.c, that's more consistent wrt
> the X86_RAW_EVENT_MASK name and that way you don't need to re-order the
> #include ""s either.

Ah, yes. Will do this and resend the patches.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center
email: robert.richter@amd.com


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks
  2010-03-29 16:48   ` Peter Zijlstra
  2010-03-29 17:01     ` Robert Richter
@ 2010-03-30  9:28     ` Robert Richter
  2010-04-02 19:09       ` [tip:perf/core] perf, " tip-bot for Robert Richter
  1 sibling, 1 reply; 29+ messages in thread
From: Robert Richter @ 2010-03-30  9:28 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, LKML

On 29.03.10 18:48:58, Peter Zijlstra wrote:
> Could you fold this with your 2/3 and create x86_pmu_raw_event() which
> lives in arch/x86/kernel/cpu/perf_event.c, that's more consistent wrt
> the X86_RAW_EVENT_MASK name and that way you don't need to re-order the
> #include ""s either.

The patch below replaces patches 2 and 3.

-Robert

---

>From 2d77650a4dc5ded763dc3c120381bdbe5a0be911 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 17 Mar 2010 12:32:37 +0100
Subject: [PATCH] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks

ARCH_PERFMON_EVENTSEL bit masks are often used in the kernel. This
patch adds macros for the bit masks and removes local defines. The
function intel_pmu_raw_event() becomes x86_pmu_raw_event() which is
generic for x86 models and same also for p6. Duplicate code is
removed.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/perf_event.h      |   58 ++++++++++++++------------------
 arch/x86/kernel/cpu/perf_event.c       |   19 +++++++++--
 arch/x86/kernel/cpu/perf_event_amd.c   |   15 +--------
 arch/x86/kernel/cpu/perf_event_intel.c |   22 +-----------
 arch/x86/kernel/cpu/perf_event_p6.c    |   20 +----------
 5 files changed, 45 insertions(+), 89 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 987bf67..f6d43db 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -18,39 +18,31 @@
 #define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
 #define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
 
-#define ARCH_PERFMON_EVENTSEL_ENABLE			  (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_ANY			  (1 << 21)
-#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
-
-/*
- * Includes eventsel and unit mask as well:
- */
-
-
-#define INTEL_ARCH_EVTSEL_MASK		0x000000FFULL
-#define INTEL_ARCH_UNIT_MASK		0x0000FF00ULL
-#define INTEL_ARCH_EDGE_MASK		0x00040000ULL
-#define INTEL_ARCH_INV_MASK		0x00800000ULL
-#define INTEL_ARCH_CNT_MASK		0xFF000000ULL
-#define INTEL_ARCH_EVENT_MASK	(INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
-
-/*
- * filter mask to validate fixed counter events.
- * the following filters disqualify for fixed counters:
- *  - inv
- *  - edge
- *  - cnt-mask
- *  The other filters are supported by fixed counters.
- *  The any-thread option is supported starting with v3.
- */
-#define INTEL_ARCH_FIXED_MASK \
-	(INTEL_ARCH_CNT_MASK| \
-	 INTEL_ARCH_INV_MASK| \
-	 INTEL_ARCH_EDGE_MASK|\
-	 INTEL_ARCH_UNIT_MASK|\
-	 INTEL_ARCH_EVENT_MASK)
+#define ARCH_PERFMON_EVENTSEL_EVENT			0x000000FFULL
+#define ARCH_PERFMON_EVENTSEL_UMASK			0x0000FF00ULL
+#define ARCH_PERFMON_EVENTSEL_USR			(1ULL << 16)
+#define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
+#define ARCH_PERFMON_EVENTSEL_EDGE			(1ULL << 18)
+#define ARCH_PERFMON_EVENTSEL_INT			(1ULL << 20)
+#define ARCH_PERFMON_EVENTSEL_ANY			(1ULL << 21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
+#define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
+#define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
+
+#define AMD64_EVENTSEL_EVENT	\
+	(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
+#define INTEL_ARCH_EVENT_MASK	\
+	(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
+
+#define X86_RAW_EVENT_MASK		\
+	(ARCH_PERFMON_EVENTSEL_EVENT |	\
+	 ARCH_PERFMON_EVENTSEL_UMASK |	\
+	 ARCH_PERFMON_EVENTSEL_EDGE  |	\
+	 ARCH_PERFMON_EVENTSEL_INV   |	\
+	 ARCH_PERFMON_EVENTSEL_CMASK)
+#define AMD64_RAW_EVENT_MASK		\
+	(X86_RAW_EVENT_MASK          |  \
+	 AMD64_EVENTSEL_EVENT)
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f2fc2d8..4e8d233 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -142,13 +142,21 @@ struct cpu_hw_events {
  * Constraint on the Event code.
  */
 #define INTEL_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
+	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
 
 /*
  * Constraint on the Event code + UMask + fixed-mask
+ *
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ *  - inv
+ *  - edge
+ *  - cnt-mask
+ *  The other filters are supported by fixed counters.
+ *  The any-thread option is supported starting with v3.
  */
 #define FIXED_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
+	EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
 
 /*
  * Constraint on the Event code + UMask
@@ -436,6 +444,11 @@ static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc
 	return 0;
 }
 
+static u64 x86_pmu_raw_event(u64 hw_event)
+{
+	return hw_event & X86_RAW_EVENT_MASK;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -1424,7 +1437,7 @@ void __init init_hw_perf_events(void)
 
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if (c->cmask != INTEL_ARCH_FIXED_MASK)
+			if (c->cmask != X86_RAW_EVENT_MASK)
 				continue;
 
 			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 9d363ce..30e799a 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -113,20 +113,7 @@ static u64 amd_pmu_event_map(int hw_event)
 
 static u64 amd_pmu_raw_event(u64 hw_event)
 {
-#define K7_EVNTSEL_EVENT_MASK	0xF000000FFULL
-#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
-#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
-#define K7_EVNTSEL_INV_MASK	0x000800000ULL
-#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
-
-#define K7_EVNTSEL_MASK			\
-	(K7_EVNTSEL_EVENT_MASK |	\
-	 K7_EVNTSEL_UNIT_MASK  |	\
-	 K7_EVNTSEL_EDGE_MASK  |	\
-	 K7_EVNTSEL_INV_MASK   |	\
-	 K7_EVNTSEL_REG_MASK)
-
-	return hw_event & K7_EVNTSEL_MASK;
+	return hw_event & AMD64_RAW_EVENT_MASK;
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0d68531..b65cfc3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -452,24 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids
  },
 };
 
-static u64 intel_pmu_raw_event(u64 hw_event)
-{
-#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define CORE_EVNTSEL_MASK		\
-	(INTEL_ARCH_EVTSEL_MASK |	\
-	 INTEL_ARCH_UNIT_MASK   |	\
-	 INTEL_ARCH_EDGE_MASK   |	\
-	 INTEL_ARCH_INV_MASK    |	\
-	 INTEL_ARCH_CNT_MASK)
-
-	return hw_event & CORE_EVNTSEL_MASK;
-}
-
 static void intel_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -788,7 +770,7 @@ static __initconst struct x86_pmu core_pmu = {
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
+	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
 	/*
@@ -827,7 +809,7 @@ static __initconst struct x86_pmu intel_pmu = {
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
+	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
 	/*
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index b26fbc7..03c139a 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event)
  */
 #define P6_NOP_EVENT			0x0000002EULL
 
-static u64 p6_pmu_raw_event(u64 hw_event)
-{
-#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define P6_EVNTSEL_INV_MASK		0x00800000ULL
-#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define P6_EVNTSEL_MASK			\
-	(P6_EVNTSEL_EVENT_MASK |	\
-	 P6_EVNTSEL_UNIT_MASK  |	\
-	 P6_EVNTSEL_EDGE_MASK  |	\
-	 P6_EVNTSEL_INV_MASK   |	\
-	 P6_EVNTSEL_REG_MASK)
-
-	return hw_event & P6_EVNTSEL_MASK;
-}
-
 static struct event_constraint p6_event_constraints[] =
 {
 	INTEL_EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
@@ -114,7 +96,7 @@ static __initconst struct x86_pmu p6_pmu = {
 	.eventsel		= MSR_P6_EVNTSEL0,
 	.perfctr		= MSR_P6_PERFCTR0,
 	.event_map		= p6_pmu_event_map,
-	.raw_event		= p6_pmu_raw_event,
+	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
 	.apic			= 1,
 	.max_period		= (1ULL << 31) - 1,
-- 
1.7.0.3



-- 
Advanced Micro Devices, Inc.
Operating System Research Center
email: robert.richter@amd.com


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-29 16:36 [PATCH 0/3] perf/core, x86: unify perfctr bitmasks Robert Richter
                   ` (2 preceding siblings ...)
  2010-03-29 16:36 ` [PATCH 3/3] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks Robert Richter
@ 2010-03-30 10:11 ` Stephane Eranian
  2010-03-30 13:41   ` Robert Richter
  3 siblings, 1 reply; 29+ messages in thread
From: Stephane Eranian @ 2010-03-30 10:11 UTC (permalink / raw)
  To: Robert Richter; +Cc: Peter Zijlstra, Ingo Molnar, LKML

On Mon, Mar 29, 2010 at 6:36 PM, Robert Richter <robert.richter@amd.com> wrote:
> This patch set unifies performance counter bit masks for x86. All mask
> are almost the same for all x86 models and thus can use the same macro
> definitions in arch/x86/include/asm/perf_event.h. It removes duplicate
> code. There is also a patch that reverts some changes of the big
> perf_counter -> perf_event rename.
>

But there are still fields which are unique to each vendor:
- GUEST vs. HOST on AMD
- ANY_THREAD on Intel.

For instance, I noticed that in

arch/x86/kernel/cpu/perf_event.c:__hw_perf_event_init():

      if (attr->type == PERF_TYPE_RAW) {
                hwc->config |= x86_pmu.raw_event(attr->config);
                if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
                    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
                        return -EACCES;
                return 0;
        }

Assumes ANY also exists on AMD processors. That is not the case.
This check needs to be moved into an Intel specific function.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 10:11 ` [PATCH 0/3] perf/core, x86: unify perfctr bitmasks Stephane Eranian
@ 2010-03-30 13:41   ` Robert Richter
  2010-03-30 13:53     ` Stephane Eranian
  0 siblings, 1 reply; 29+ messages in thread
From: Robert Richter @ 2010-03-30 13:41 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, LKML

On 30.03.10 12:11:46, Stephane Eranian wrote:
> On Mon, Mar 29, 2010 at 6:36 PM, Robert Richter <robert.richter@amd.com> wrote:
> > This patch set unifies performance counter bit masks for x86. All mask
> > are almost the same for all x86 models and thus can use the same macro
> > definitions in arch/x86/include/asm/perf_event.h. It removes duplicate
> > code. There is also a patch that reverts some changes of the big
> > perf_counter -> perf_event rename.
> >
> 
> But there are still fields which are unique to each vendor:
> - GUEST vs. HOST on AMD
> - ANY_THREAD on Intel.
> 
> For instance, I noticed that in
> 
> arch/x86/kernel/cpu/perf_event.c:__hw_perf_event_init():
> 
>       if (attr->type == PERF_TYPE_RAW) {
>                 hwc->config |= x86_pmu.raw_event(attr->config);
>                 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
>                     perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
>                         return -EACCES;
>                 return 0;
>         }
> 
> Assumes ANY also exists on AMD processors. That is not the case.
> This check needs to be moved into an Intel specific function.

Generally, ARCH_PERFMON_EVENTSEL_* refers to:

 Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume
 3B: System Programming Guide, Part 2
 30.2 ARCHITECTURAL PERFORMANCE MONITORING
 Appendix A: Performance-Monitoring Events
 Appendix B: Model-Specific Registers (MSRs) 

and AMD64_EVENTSEL_* to:

 AMD64 Architecture Programmer's Manual Volume 2: System Programming
 13.3.1 Performance Counters

X86_* is generic.

If a feature is available from both vendors, the names shouln't be
changed. Instead the first introduced mask should be used (at least
this is my suggestion).

So, there are some ARCH_PERFMON_EVENTSEL_* masks that are Intel only,
which is true for ARCH_PERFMON_EVENTSEL_ANY. And indead, the code
should be checked for this. ARCH_PERFMON_EVENTSEL_ANY is always
cleared on AMD cpus, so this code is ok. Actually the bit is cleared
for *all* cpus in x86_pmu_raw_event(), the code was and is broken for
this.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center
email: robert.richter@amd.com


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 13:41   ` Robert Richter
@ 2010-03-30 13:53     ` Stephane Eranian
  2010-03-30 15:00       ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Stephane Eranian @ 2010-03-30 13:53 UTC (permalink / raw)
  To: Robert Richter; +Cc: Peter Zijlstra, Ingo Molnar, LKML

On Tue, Mar 30, 2010 at 3:41 PM, Robert Richter <robert.richter@amd.com> wrote:
> On 30.03.10 12:11:46, Stephane Eranian wrote:
>> On Mon, Mar 29, 2010 at 6:36 PM, Robert Richter <robert.richter@amd.com> wrote:
>> > This patch set unifies performance counter bit masks for x86. All mask
>> > are almost the same for all x86 models and thus can use the same macro
>> > definitions in arch/x86/include/asm/perf_event.h. It removes duplicate
>> > code. There is also a patch that reverts some changes of the big
>> > perf_counter -> perf_event rename.
>> >
>>
>> But there are still fields which are unique to each vendor:
>> - GUEST vs. HOST on AMD
>> - ANY_THREAD on Intel.
>>
>> For instance, I noticed that in
>>
>> arch/x86/kernel/cpu/perf_event.c:__hw_perf_event_init():
>>
>>       if (attr->type == PERF_TYPE_RAW) {
>>                 hwc->config |= x86_pmu.raw_event(attr->config);
>>                 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
>>                     perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
>>                         return -EACCES;
>>                 return 0;
>>         }
>>
>> Assumes ANY also exists on AMD processors. That is not the case.
>> This check needs to be moved into an Intel specific function.
>
> Generally, ARCH_PERFMON_EVENTSEL_* refers to:
>
>  Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume
>  3B: System Programming Guide, Part 2
>  30.2 ARCHITECTURAL PERFORMANCE MONITORING
>  Appendix A: Performance-Monitoring Events
>  Appendix B: Model-Specific Registers (MSRs)
>
> and AMD64_EVENTSEL_* to:
>
>  AMD64 Architecture Programmer's Manual Volume 2: System Programming
>  13.3.1 Performance Counters
>
> X86_* is generic.
>
> If a feature is available from both vendors, the names shouln't be
> changed. Instead the first introduced mask should be used (at least
> this is my suggestion).
>
> So, there are some ARCH_PERFMON_EVENTSEL_* masks that are Intel only,
> which is true for ARCH_PERFMON_EVENTSEL_ANY. And indead, the code
> should be checked for this. ARCH_PERFMON_EVENTSEL_ANY is always
> cleared on AMD cpus, so this code is ok. Actually the bit is cleared

Until AMD uses that bit too and you won't notice this test. This is a security
check specific to Intel and it should be in an Intel-specific function.

> for *all* cpus in x86_pmu_raw_event(), the code was and is broken for
> this.
>
Yes, needs to be authorized for any perfmon v3 and later revisions.

> -Robert
>
> --
> Advanced Micro Devices, Inc.
> Operating System Research Center
> email: robert.richter@amd.com
>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 13:53     ` Stephane Eranian
@ 2010-03-30 15:00       ` Peter Zijlstra
  2010-03-30 15:11         ` Cyrill Gorcunov
  2010-03-30 15:59         ` Robert Richter
  0 siblings, 2 replies; 29+ messages in thread
From: Peter Zijlstra @ 2010-03-30 15:00 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Robert Richter, Ingo Molnar, LKML, Cyrill Gorcunov

On Tue, 2010-03-30 at 15:53 +0200, Stephane Eranian wrote:
> > So, there are some ARCH_PERFMON_EVENTSEL_* masks that are Intel only,
> > which is true for ARCH_PERFMON_EVENTSEL_ANY. And indead, the code
> > should be checked for this. ARCH_PERFMON_EVENTSEL_ANY is always
> > cleared on AMD cpus, so this code is ok. Actually the bit is cleared
> 
> Until AMD uses that bit too and you won't notice this test. This is a security
> check specific to Intel and it should be in an Intel-specific function.
> 
> > for *all* cpus in x86_pmu_raw_event(), the code was and is broken for
> > this.
> >
> Yes, needs to be authorized for any perfmon v3 and later revisions.

So how about something like this on top of Robert's patches?

---
 arch/x86/kernel/cpu/perf_event.c       |   16 ++++++----------
 arch/x86/kernel/cpu/perf_event_amd.c   |    5 +++--
 arch/x86/kernel/cpu/perf_event_intel.c |   15 ++++++++++++++-
 arch/x86/kernel/cpu/perf_event_p4.c    |    5 +++--
 4 files changed, 26 insertions(+), 15 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -201,7 +201,7 @@ struct x86_pmu {
 	unsigned	eventsel;
 	unsigned	perfctr;
 	u64		(*event_map)(int);
-	u64		(*raw_event)(u64);
+	int		(*raw_event)(struct perf_event *event);
 	int		max_events;
 	int		num_counters;
 	int		num_counters_fixed;
@@ -445,9 +445,10 @@ static int x86_hw_config(struct perf_eve
 	return 0;
 }
 
-static u64 x86_pmu_raw_event(u64 hw_event)
+static int x86_pmu_raw_event(struct perf_event *event)
 {
-	return hw_event & X86_RAW_EVENT_MASK;
+	event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+	return 0;
 }
 
 /*
@@ -511,13 +512,8 @@ static int __hw_perf_event_init(struct p
 	/*
 	 * Raw hw_event type provide the config in the hw_event structure
 	 */
-	if (attr->type == PERF_TYPE_RAW) {
-		hwc->config |= x86_pmu.raw_event(attr->config);
-		if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
-		    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
-		return 0;
-	}
+	if (attr->type == PERF_TYPE_RAW)
+		return x86_pmu.raw_event(event);
 
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, attr);
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_amd.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_amd.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_amd.c
@@ -111,9 +111,10 @@ static u64 amd_pmu_event_map(int hw_even
 	return amd_perfmon_event_map[hw_event];
 }
 
-static u64 amd_pmu_raw_event(u64 hw_event)
+static int amd_pmu_raw_event(struct perf_event *event)
 {
-	return hw_event & AMD64_RAW_EVENT_MASK;
+	event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+	return 0;
 }
 
 /*
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
@@ -758,6 +758,19 @@ intel_get_event_constraints(struct cpu_h
 	return x86_get_event_constraints(cpuc, event);
 }
 
+static int intel_pmu_raw_event(struct perf_event *event)
+{
+	int ret = x86_pmu_raw_event(event);
+	if (ret)
+		return ret;
+
+	if ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) &&
+			perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	return 0;
+}
+
 static __initconst struct x86_pmu core_pmu = {
 	.name			= "core",
 	.handle_irq		= x86_pmu_handle_irq,
@@ -809,7 +822,7 @@ static __initconst struct x86_pmu intel_
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
-	.raw_event		= x86_pmu_raw_event,
+	.raw_event		= intel_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
 	/*
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_p4.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_p4.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_p4.c
@@ -425,11 +425,12 @@ static u64 p4_pmu_event_map(int hw_event
  * on HT machine but allow HT-compatible specifics to be
  * passed on)
  */
-static u64 p4_pmu_raw_event(u64 hw_event)
+static int p4_pmu_raw_event(struct perf_event *event)
 {
-	return hw_event &
+	event->hw.config |= event->attr.config &
 		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
 		 p4_config_pack_cccr(P4_CCCR_MASK_HT));
+	return 0;
 }
 
 static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 15:00       ` Peter Zijlstra
@ 2010-03-30 15:11         ` Cyrill Gorcunov
  2010-03-30 15:31           ` Stephane Eranian
  2010-03-30 15:59         ` Robert Richter
  1 sibling, 1 reply; 29+ messages in thread
From: Cyrill Gorcunov @ 2010-03-30 15:11 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Stephane Eranian, Robert Richter, Ingo Molnar, LKML

On Tue, Mar 30, 2010 at 05:00:55PM +0200, Peter Zijlstra wrote:
> On Tue, 2010-03-30 at 15:53 +0200, Stephane Eranian wrote:
> > > So, there are some ARCH_PERFMON_EVENTSEL_* masks that are Intel only,
> > > which is true for ARCH_PERFMON_EVENTSEL_ANY. And indead, the code
> > > should be checked for this. ARCH_PERFMON_EVENTSEL_ANY is always
> > > cleared on AMD cpus, so this code is ok. Actually the bit is cleared
> > 
> > Until AMD uses that bit too and you won't notice this test. This is a security
> > check specific to Intel and it should be in an Intel-specific function.
> > 
> > > for *all* cpus in x86_pmu_raw_event(), the code was and is broken for
> > > this.
> > >
> > Yes, needs to be authorized for any perfmon v3 and later revisions.
> 
> So how about something like this on top of Robert's patches?
> 
[...]

Looks good for me! Thanks Peter!

	-- Cyrill

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 15:11         ` Cyrill Gorcunov
@ 2010-03-30 15:31           ` Stephane Eranian
  0 siblings, 0 replies; 29+ messages in thread
From: Stephane Eranian @ 2010-03-30 15:31 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: Peter Zijlstra, Robert Richter, Ingo Molnar, LKML

On Tue, Mar 30, 2010 at 5:11 PM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
> On Tue, Mar 30, 2010 at 05:00:55PM +0200, Peter Zijlstra wrote:
>> On Tue, 2010-03-30 at 15:53 +0200, Stephane Eranian wrote:
>> > > So, there are some ARCH_PERFMON_EVENTSEL_* masks that are Intel only,
>> > > which is true for ARCH_PERFMON_EVENTSEL_ANY. And indead, the code
>> > > should be checked for this. ARCH_PERFMON_EVENTSEL_ANY is always
>> > > cleared on AMD cpus, so this code is ok. Actually the bit is cleared
>> >
>> > Until AMD uses that bit too and you won't notice this test. This is a security
>> > check specific to Intel and it should be in an Intel-specific function.
>> >
>> > > for *all* cpus in x86_pmu_raw_event(), the code was and is broken for
>> > > this.
>> > >
>> > Yes, needs to be authorized for any perfmon v3 and later revisions.
>>
>> So how about something like this on top of Robert's patches?
>>
> [...]
>
> Looks good for me! Thanks Peter!
>
Looks better. Thanks.

Acked-by: Stephane Eranian <eranian@google.com>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 15:00       ` Peter Zijlstra
  2010-03-30 15:11         ` Cyrill Gorcunov
@ 2010-03-30 15:59         ` Robert Richter
  2010-03-30 16:55           ` Peter Zijlstra
  1 sibling, 1 reply; 29+ messages in thread
From: Robert Richter @ 2010-03-30 15:59 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Stephane Eranian, Ingo Molnar, LKML, Cyrill Gorcunov

On 30.03.10 17:00:55, Peter Zijlstra wrote:
> On Tue, 2010-03-30 at 15:53 +0200, Stephane Eranian wrote:
> > > So, there are some ARCH_PERFMON_EVENTSEL_* masks that are Intel only,
> > > which is true for ARCH_PERFMON_EVENTSEL_ANY. And indead, the code
> > > should be checked for this. ARCH_PERFMON_EVENTSEL_ANY is always
> > > cleared on AMD cpus, so this code is ok. Actually the bit is cleared
> > 
> > Until AMD uses that bit too and you won't notice this test. This is a security
> > check specific to Intel and it should be in an Intel-specific function.
> > 
> > > for *all* cpus in x86_pmu_raw_event(), the code was and is broken for
> > > this.
> > >
> > Yes, needs to be authorized for any perfmon v3 and later revisions.
> 
> So how about something like this on top of Robert's patches?
> 
> ---
>  arch/x86/kernel/cpu/perf_event.c       |   16 ++++++----------
>  arch/x86/kernel/cpu/perf_event_amd.c   |    5 +++--
>  arch/x86/kernel/cpu/perf_event_intel.c |   15 ++++++++++++++-
>  arch/x86/kernel/cpu/perf_event_p4.c    |    5 +++--
>  4 files changed, 26 insertions(+), 15 deletions(-)
> 
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -201,7 +201,7 @@ struct x86_pmu {
>  	unsigned	eventsel;
>  	unsigned	perfctr;
>  	u64		(*event_map)(int);
> -	u64		(*raw_event)(u64);
> +	int		(*raw_event)(struct perf_event *event);
>  	int		max_events;
>  	int		num_counters;
>  	int		num_counters_fixed;
> @@ -445,9 +445,10 @@ static int x86_hw_config(struct perf_eve
>  	return 0;
>  }
>  
> -static u64 x86_pmu_raw_event(u64 hw_event)
> +static int x86_pmu_raw_event(struct perf_event *event)
>  {
> -	return hw_event & X86_RAW_EVENT_MASK;
> +	event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;

This still clears the ANY bit. See below.

> +	return 0;
>  }
>  
>  /*
> @@ -511,13 +512,8 @@ static int __hw_perf_event_init(struct p
>  	/*
>  	 * Raw hw_event type provide the config in the hw_event structure
>  	 */
> -	if (attr->type == PERF_TYPE_RAW) {
> -		hwc->config |= x86_pmu.raw_event(attr->config);
> -		if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
> -		    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
> -			return -EACCES;
> -		return 0;
> -	}
> +	if (attr->type == PERF_TYPE_RAW)
> +		return x86_pmu.raw_event(event);

We could go on with this and move this into *_hw_config(), but so far
this change is fine to me. This would then remove .raw_event() at all.

[...]

> Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -758,6 +758,19 @@ intel_get_event_constraints(struct cpu_h
>  	return x86_get_event_constraints(cpuc, event);
>  }
>  
> +static int intel_pmu_raw_event(struct perf_event *event)
> +{
> +	int ret = x86_pmu_raw_event(event);
> +	if (ret)
> +		return ret;
> +
> +	if ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) &&
> +			perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
> +		return -EACCES;
> +
> +	return 0;

Maybe this?

	if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
		return 0;
	if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
		return -EACCES;
	event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
	return 0;

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center
email: robert.richter@amd.com


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 15:59         ` Robert Richter
@ 2010-03-30 16:55           ` Peter Zijlstra
  2010-03-30 17:11             ` Cyrill Gorcunov
                               ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Peter Zijlstra @ 2010-03-30 16:55 UTC (permalink / raw)
  To: Robert Richter; +Cc: Stephane Eranian, Ingo Molnar, LKML, Cyrill Gorcunov

On Tue, 2010-03-30 at 17:59 +0200, Robert Richter wrote:
> > +     if (attr->type == PERF_TYPE_RAW)
> > +             return x86_pmu.raw_event(event);
> 
> We could go on with this and move this into *_hw_config(), but so far
> this change is fine to me. This would then remove .raw_event() at all.
> 

Right, I did have a brief look at that, you mentioned this before to
Cyrill IIRC, but the issue is that... ~brainwave~ how about the below..

> 
> > Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> > ===================================================================
> > --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
> > +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> > @@ -758,6 +758,19 @@ intel_get_event_constraints(struct cpu_h
> >       return x86_get_event_constraints(cpuc, event);
> >  }
> >  
> > +static int intel_pmu_raw_event(struct perf_event *event)
> > +{
> > +     int ret = x86_pmu_raw_event(event);
> > +     if (ret)
> > +             return ret;
> > +
> > +     if ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) &&
> > +                     perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
> > +             return -EACCES;
> > +
> > +     return 0;
> 
> Maybe this?
> 
>         if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
>                 return 0;
>         if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
>                 return -EACCES;
>         event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
>         return 0; 

You're right..



---
 arch/x86/kernel/cpu/perf_event.c       |   35 +++++++++-------------------
 arch/x86/kernel/cpu/perf_event_amd.c   |   17 ++++++++++----
 arch/x86/kernel/cpu/perf_event_intel.c |   30 +++++++++++++++++++++---
 arch/x86/kernel/cpu/perf_event_p4.c    |   40 ++++++++++++++++++---------------
 4 files changed, 73 insertions(+), 49 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -196,12 +196,11 @@ struct x86_pmu {
 	void		(*enable_all)(int added);
 	void		(*enable)(struct perf_event *);
 	void		(*disable)(struct perf_event *);
-	int		(*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
+	int		(*hw_config)(struct perf_event *event);
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
 	unsigned	perfctr;
 	u64		(*event_map)(int);
-	u64		(*raw_event)(u64);
 	int		max_events;
 	int		num_counters;
 	int		num_counters_fixed;
@@ -426,28 +425,26 @@ set_ext_hw_attr(struct hw_perf_event *hw
 	return 0;
 }
 
-static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
+static int x86_pmu_hw_config(struct perf_event *event)
 {
 	/*
 	 * Generate PMC IRQs:
 	 * (keep 'enabled' bit clear for now)
 	 */
-	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
+	event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
 
 	/*
 	 * Count user and OS events unless requested not to
 	 */
-	if (!attr->exclude_user)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
-	if (!attr->exclude_kernel)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
+	if (!event->attr.exclude_user)
+		event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
+	if (!event->attr.exclude_kernel)
+		event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
 
-	return 0;
-}
+	if (event->attr.type == PERF_TYPE_RAW)
+		event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
 
-static u64 x86_pmu_raw_event(u64 hw_event)
-{
-	return hw_event & X86_RAW_EVENT_MASK;
+	return 0;
 }
 
 /*
@@ -489,7 +486,7 @@ static int __hw_perf_event_init(struct p
 	hwc->last_tag = ~0ULL;
 
 	/* Processor specifics */
-	err = x86_pmu.hw_config(attr, hwc);
+	err = x86_pmu.hw_config(event);
 	if (err)
 		return err;
 
@@ -508,16 +505,8 @@ static int __hw_perf_event_init(struct p
 			return -EOPNOTSUPP;
 	}
 
-	/*
-	 * Raw hw_event type provide the config in the hw_event structure
-	 */
-	if (attr->type == PERF_TYPE_RAW) {
-		hwc->config |= x86_pmu.raw_event(attr->config);
-		if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
-		    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
+	if (attr->type == PERF_TYPE_RAW)
 		return 0;
-	}
 
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, attr);
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_amd.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_amd.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_amd.c
@@ -111,9 +111,19 @@ static u64 amd_pmu_event_map(int hw_even
 	return amd_perfmon_event_map[hw_event];
 }
 
-static u64 amd_pmu_raw_event(u64 hw_event)
+static int amd_pmu_hw_config(struct perf_event *event)
 {
-	return hw_event & AMD64_RAW_EVENT_MASK;
+	int ret = x86_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+
+	if (event->attr.type != PERF_TYPE_RAW)
+		return 0;
+
+	event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+	return 0;
 }
 
 /*
@@ -365,12 +375,11 @@ static __initconst struct x86_pmu amd_pm
 	.enable_all		= x86_pmu_enable_all,
 	.enable			= x86_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_hw_config,
+	.hw_config		= amd_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_K7_EVNTSEL0,
 	.perfctr		= MSR_K7_PERFCTR0,
 	.event_map		= amd_pmu_event_map,
-	.raw_event		= amd_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
 	.num_counters		= 4,
 	.cntval_bits		= 48,
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
@@ -758,6 +758,30 @@ intel_get_event_constraints(struct cpu_h
 	return x86_get_event_constraints(cpuc, event);
 }
 
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+	int ret = x86_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+
+	if (event->attr.type != PERF_TYPE_RAW)
+		return 0;
+
+	if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
+		return 0;
+
+	if (x86_pmu.version < 3)
+		return -EINVAL;
+
+	if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
+
+	return 0;
+}
+
 static __initconst struct x86_pmu core_pmu = {
 	.name			= "core",
 	.handle_irq		= x86_pmu_handle_irq,
@@ -765,12 +789,11 @@ static __initconst struct x86_pmu core_p
 	.enable_all		= x86_pmu_enable_all,
 	.enable			= x86_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_hw_config,
+	.hw_config		= x86_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
-	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
 	/*
@@ -804,12 +827,11 @@ static __initconst struct x86_pmu intel_
 	.enable_all		= intel_pmu_enable_all,
 	.enable			= intel_pmu_enable_event,
 	.disable		= intel_pmu_disable_event,
-	.hw_config		= x86_hw_config,
+	.hw_config		= intel_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
-	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
 	/*
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_p4.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_p4.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_p4.c
@@ -419,20 +419,7 @@ static u64 p4_pmu_event_map(int hw_event
 	return config;
 }
 
-/*
- * We don't control raw events so it's up to the caller
- * to pass sane values (and we don't count the thread number
- * on HT machine but allow HT-compatible specifics to be
- * passed on)
- */
-static u64 p4_pmu_raw_event(u64 hw_event)
-{
-	return hw_event &
-		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
-		 p4_config_pack_cccr(P4_CCCR_MASK_HT));
-}
-
-static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
+static int p4_hw_config(struct perf_event *event)
 {
 	int cpu = raw_smp_processor_id();
 	u32 escr, cccr;
@@ -444,11 +431,29 @@ static int p4_hw_config(struct perf_even
 	 */
 
 	cccr = p4_default_cccr_conf(cpu);
-	escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user);
-	hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr);
+	escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
+					 event->attr.exclude_user);
+	event->hw.config = p4_config_pack_escr(escr) |
+			   p4_config_pack_cccr(cccr);
 
 	if (p4_ht_active() && p4_ht_thread(cpu))
-		hwc->config = p4_set_ht_bit(hwc->config);
+		event->hw.config = p4_set_ht_bit(event->hw.config);
+
+	if (event->attr.type != PERF_TYPE_RAW)
+		return 0;
+
+	/*
+	 * We don't control raw events so it's up to the caller
+	 * to pass sane values (and we don't count the thread number
+	 * on HT machine but allow HT-compatible specifics to be
+	 * passed on)
+	 *
+	 * XXX: HT wide things should check perf_paranoid_cpu() &&
+	 *      CAP_SYS_ADMIN
+	 */
+	event->hw.config |= event->attr.config &
+		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
+		 p4_config_pack_cccr(P4_CCCR_MASK_HT));
 
 	return 0;
 }
@@ -785,7 +790,6 @@ static __initconst struct x86_pmu p4_pmu
 	.eventsel		= MSR_P4_BPU_CCCR0,
 	.perfctr		= MSR_P4_BPU_PERFCTR0,
 	.event_map		= p4_pmu_event_map,
-	.raw_event		= p4_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(p4_general_events),
 	.get_event_constraints	= x86_get_event_constraints,
 	/*



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 16:55           ` Peter Zijlstra
@ 2010-03-30 17:11             ` Cyrill Gorcunov
  2010-03-30 17:24             ` Robert Richter
                               ` (2 subsequent siblings)
  3 siblings, 0 replies; 29+ messages in thread
From: Cyrill Gorcunov @ 2010-03-30 17:11 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Robert Richter, Stephane Eranian, Ingo Molnar, LKML

On Tue, Mar 30, 2010 at 06:55:13PM +0200, Peter Zijlstra wrote:
> On Tue, 2010-03-30 at 17:59 +0200, Robert Richter wrote:
> > > +     if (attr->type == PERF_TYPE_RAW)
> > > +             return x86_pmu.raw_event(event);
> > 
> > We could go on with this and move this into *_hw_config(), but so far
> > this change is fine to me. This would then remove .raw_event() at all.
> > 
> 
> Right, I did have a brief look at that, you mentioned this before to
> Cyrill IIRC, but the issue is that... ~brainwave~ how about the below..
> 

Yeah, Robert sais me about eliminating raw_event, just remember :)

[...]
> -static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
> +static int p4_hw_config(struct perf_event *event)
>  {
>  	int cpu = raw_smp_processor_id();
>  	u32 escr, cccr;
> @@ -444,11 +431,29 @@ static int p4_hw_config(struct perf_even
>  	 */
>  
>  	cccr = p4_default_cccr_conf(cpu);
> -	escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user);
> -	hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr);
> +	escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
> +					 event->attr.exclude_user);
> +	event->hw.config = p4_config_pack_escr(escr) |
> +			   p4_config_pack_cccr(cccr);
>  
>  	if (p4_ht_active() && p4_ht_thread(cpu))
> -		hwc->config = p4_set_ht_bit(hwc->config);
> +		event->hw.config = p4_set_ht_bit(event->hw.config);
> +
> +	if (event->attr.type != PERF_TYPE_RAW)
> +		return 0;
> +
> +	/*
> +	 * We don't control raw events so it's up to the caller
> +	 * to pass sane values (and we don't count the thread number
> +	 * on HT machine but allow HT-compatible specifics to be
> +	 * passed on)
> +	 *
> +	 * XXX: HT wide things should check perf_paranoid_cpu() &&
> +	 *      CAP_SYS_ADMIN
> +	 */
> +	event->hw.config |= event->attr.config &
> +		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
> +		 p4_config_pack_cccr(P4_CCCR_MASK_HT));
>  
>  	return 0;
>  }
[...]

I'll update P4 snippet.

	-- Cyrill

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 16:55           ` Peter Zijlstra
  2010-03-30 17:11             ` Cyrill Gorcunov
@ 2010-03-30 17:24             ` Robert Richter
  2010-03-30 18:29             ` Cyrill Gorcunov
  2010-04-02 19:09             ` [tip:perf/core] perf, x86: Fix up the ANY flag stuff tip-bot for Peter Zijlstra
  3 siblings, 0 replies; 29+ messages in thread
From: Robert Richter @ 2010-03-30 17:24 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Stephane Eranian, Ingo Molnar, LKML, Cyrill Gorcunov

On 30.03.10 18:55:13, Peter Zijlstra wrote:
> On Tue, 2010-03-30 at 17:59 +0200, Robert Richter wrote:
> > > +     if (attr->type == PERF_TYPE_RAW)
> > > +             return x86_pmu.raw_event(event);
> > 
> > We could go on with this and move this into *_hw_config(), but so far
> > this change is fine to me. This would then remove .raw_event() at all.
> > 
> 
> Right, I did have a brief look at that, you mentioned this before to
> Cyrill IIRC, but the issue is that... ~brainwave~ how about the below..

I like your patch, thanks Peter.

-Robert

-- 
Advanced Micro Devices, Inc.
Operating System Research Center
email: robert.richter@amd.com


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 16:55           ` Peter Zijlstra
  2010-03-30 17:11             ` Cyrill Gorcunov
  2010-03-30 17:24             ` Robert Richter
@ 2010-03-30 18:29             ` Cyrill Gorcunov
  2010-03-30 19:04               ` Peter Zijlstra
  2010-04-02 19:09             ` [tip:perf/core] perf, x86: Fix up the ANY flag stuff tip-bot for Peter Zijlstra
  3 siblings, 1 reply; 29+ messages in thread
From: Cyrill Gorcunov @ 2010-03-30 18:29 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Robert Richter, Stephane Eranian, Ingo Molnar, LKML, Lin Ming

On Tue, Mar 30, 2010 at 06:55:13PM +0200, Peter Zijlstra wrote:
[...]
> -static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
> +static int p4_hw_config(struct perf_event *event)
>  {
>  	int cpu = raw_smp_processor_id();
>  	u32 escr, cccr;
> @@ -444,11 +431,29 @@ static int p4_hw_config(struct perf_even
>  	 */
>  
>  	cccr = p4_default_cccr_conf(cpu);
> -	escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user);
> -	hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr);
> +	escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
> +					 event->attr.exclude_user);
> +	event->hw.config = p4_config_pack_escr(escr) |
> +			   p4_config_pack_cccr(cccr);
>  
>  	if (p4_ht_active() && p4_ht_thread(cpu))
> -		hwc->config = p4_set_ht_bit(hwc->config);
> +		event->hw.config = p4_set_ht_bit(event->hw.config);
> +
> +	if (event->attr.type != PERF_TYPE_RAW)
> +		return 0;
> +
> +	/*
> +	 * We don't control raw events so it's up to the caller
> +	 * to pass sane values (and we don't count the thread number
> +	 * on HT machine but allow HT-compatible specifics to be
> +	 * passed on)
> +	 *
> +	 * XXX: HT wide things should check perf_paranoid_cpu() &&
> +	 *      CAP_SYS_ADMIN
> +	 */
> +	event->hw.config |= event->attr.config &
> +		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
> +		 p4_config_pack_cccr(P4_CCCR_MASK_HT));
>  
>  	return 0;
>  }
[...]

P4 events thread specific is a bit more messy in compare with
architectural events. There are thread specific (TS) and thread
independent (TI) events. The exact effect of mixing flags from
what we call "ANY" bit is described in two matrix in SDM.

So to make code simplier I chose to just bind events to a
particular logical cpu, when event migrate to say a second cpu
the bits just flipped in accordance on which cpu the event is
going to run. Pretty simple. Even more -- if there was some
RAW event which have set "ANY" bit -- they all will be just stripped
and event get bound to a single cpu.

I'll try to find out an easy way to satisfy this "ANY" bit request
though it would require some time (perhaps today later or rather
tomorrow).

	-- Cyrill

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 18:29             ` Cyrill Gorcunov
@ 2010-03-30 19:04               ` Peter Zijlstra
  2010-03-30 19:18                 ` Cyrill Gorcunov
  2010-03-31 16:15                 ` Cyrill Gorcunov
  0 siblings, 2 replies; 29+ messages in thread
From: Peter Zijlstra @ 2010-03-30 19:04 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Robert Richter, Stephane Eranian, Ingo Molnar, LKML, Lin Ming

On Tue, 2010-03-30 at 22:29 +0400, Cyrill Gorcunov wrote:
> On Tue, Mar 30, 2010 at 06:55:13PM +0200, Peter Zijlstra wrote:
> [...]
> > -static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
> > +static int p4_hw_config(struct perf_event *event)
> >  {
> >  	int cpu = raw_smp_processor_id();
> >  	u32 escr, cccr;
> > @@ -444,11 +431,29 @@ static int p4_hw_config(struct perf_even
> >  	 */
> >  
> >  	cccr = p4_default_cccr_conf(cpu);
> > -	escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user);
> > -	hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr);
> > +	escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
> > +					 event->attr.exclude_user);
> > +	event->hw.config = p4_config_pack_escr(escr) |
> > +			   p4_config_pack_cccr(cccr);
> >  
> >  	if (p4_ht_active() && p4_ht_thread(cpu))
> > -		hwc->config = p4_set_ht_bit(hwc->config);
> > +		event->hw.config = p4_set_ht_bit(event->hw.config);
> > +
> > +	if (event->attr.type != PERF_TYPE_RAW)
> > +		return 0;
> > +
> > +	/*
> > +	 * We don't control raw events so it's up to the caller
> > +	 * to pass sane values (and we don't count the thread number
> > +	 * on HT machine but allow HT-compatible specifics to be
> > +	 * passed on)
> > +	 *
> > +	 * XXX: HT wide things should check perf_paranoid_cpu() &&
> > +	 *      CAP_SYS_ADMIN
> > +	 */
> > +	event->hw.config |= event->attr.config &
> > +		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
> > +		 p4_config_pack_cccr(P4_CCCR_MASK_HT));
> >  
> >  	return 0;
> >  }
> [...]
> 
> P4 events thread specific is a bit more messy in compare with
> architectural events. There are thread specific (TS) and thread
> independent (TI) events. The exact effect of mixing flags from
> what we call "ANY" bit is described in two matrix in SDM.
> 
> So to make code simplier I chose to just bind events to a
> particular logical cpu, when event migrate to say a second cpu
> the bits just flipped in accordance on which cpu the event is
> going to run. Pretty simple. Even more -- if there was some
> RAW event which have set "ANY" bit -- they all will be just stripped
> and event get bound to a single cpu.
> 
> I'll try to find out an easy way to satisfy this "ANY" bit request
> though it would require some time (perhaps today later or rather
> tomorrow).

Right, so don't worry about actively supporting ANY on regular events,
wider than logical cpu counting is a daft thing.

What would be nice to detect is if the raw event provided would be a TI
(ANY) event, in which case we should apply the extra paranoia.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 19:04               ` Peter Zijlstra
@ 2010-03-30 19:18                 ` Cyrill Gorcunov
  2010-03-31 16:15                 ` Cyrill Gorcunov
  1 sibling, 0 replies; 29+ messages in thread
From: Cyrill Gorcunov @ 2010-03-30 19:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Robert Richter, Stephane Eranian, Ingo Molnar, LKML, Lin Ming

On Tue, Mar 30, 2010 at 09:04:00PM +0200, Peter Zijlstra wrote:
> On Tue, 2010-03-30 at 22:29 +0400, Cyrill Gorcunov wrote:
> > On Tue, Mar 30, 2010 at 06:55:13PM +0200, Peter Zijlstra wrote:
> > [...]
> > > -static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
> > > +static int p4_hw_config(struct perf_event *event)
> > >  {
> > >  	int cpu = raw_smp_processor_id();
> > >  	u32 escr, cccr;
> > > @@ -444,11 +431,29 @@ static int p4_hw_config(struct perf_even
> > >  	 */
> > >  
> > >  	cccr = p4_default_cccr_conf(cpu);
> > > -	escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user);
> > > -	hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr);
> > > +	escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
> > > +					 event->attr.exclude_user);
> > > +	event->hw.config = p4_config_pack_escr(escr) |
> > > +			   p4_config_pack_cccr(cccr);
> > >  
> > >  	if (p4_ht_active() && p4_ht_thread(cpu))
> > > -		hwc->config = p4_set_ht_bit(hwc->config);
> > > +		event->hw.config = p4_set_ht_bit(event->hw.config);
> > > +
> > > +	if (event->attr.type != PERF_TYPE_RAW)
> > > +		return 0;
> > > +
> > > +	/*
> > > +	 * We don't control raw events so it's up to the caller
> > > +	 * to pass sane values (and we don't count the thread number
> > > +	 * on HT machine but allow HT-compatible specifics to be
> > > +	 * passed on)
> > > +	 *
> > > +	 * XXX: HT wide things should check perf_paranoid_cpu() &&
> > > +	 *      CAP_SYS_ADMIN
> > > +	 */
> > > +	event->hw.config |= event->attr.config &
> > > +		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
> > > +		 p4_config_pack_cccr(P4_CCCR_MASK_HT));
> > >  
> > >  	return 0;
> > >  }
> > [...]
> > 
> > P4 events thread specific is a bit more messy in compare with
> > architectural events. There are thread specific (TS) and thread
> > independent (TI) events. The exact effect of mixing flags from
> > what we call "ANY" bit is described in two matrix in SDM.
> > 
> > So to make code simplier I chose to just bind events to a
> > particular logical cpu, when event migrate to say a second cpu
> > the bits just flipped in accordance on which cpu the event is
> > going to run. Pretty simple. Even more -- if there was some
> > RAW event which have set "ANY" bit -- they all will be just stripped
> > and event get bound to a single cpu.
> > 
> > I'll try to find out an easy way to satisfy this "ANY" bit request
> > though it would require some time (perhaps today later or rather
> > tomorrow).
> 
> Right, so don't worry about actively supporting ANY on regular events,
> wider than logical cpu counting is a daft thing.
> 
> What would be nice to detect is if the raw event provided would be a TI
> (ANY) event, in which case we should apply the extra paranoia.
> 

Well, there is a side effect would be anyway, so I think it should be
fixed via the way like: if a caller wanna get ANY event and this caller
has enough rights for that -- go ahead you'll get what you want,
kernel is not going to do a dirty work for you :) so I would need only
fix two procedures -- event assignment (where permission will be checked
as well) and event migration where I will not do any additional work for
the caller.

At least if I not miss anything it should not be quite difficult and
invasive. Will check and send patch... later a bit. At moment we're
on a safe side anyway, ie the former patch is fine for me!

	-- Cyrill

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-30 19:04               ` Peter Zijlstra
  2010-03-30 19:18                 ` Cyrill Gorcunov
@ 2010-03-31 16:15                 ` Cyrill Gorcunov
  2010-03-31 16:26                   ` Cyrill Gorcunov
  1 sibling, 1 reply; 29+ messages in thread
From: Cyrill Gorcunov @ 2010-03-31 16:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Robert Richter, Stephane Eranian, Ingo Molnar, LKML, Lin Ming

On Tue, Mar 30, 2010 at 09:04:00PM +0200, Peter Zijlstra wrote:
> On Tue, 2010-03-30 at 22:29 +0400, Cyrill Gorcunov wrote:
[...]
> > 
> > I'll try to find out an easy way to satisfy this "ANY" bit request
> > though it would require some time (perhaps today later or rather
> > tomorrow).
> 
> Right, so don't worry about actively supporting ANY on regular events,
> wider than logical cpu counting is a daft thing.
> 
> What would be nice to detect is if the raw event provided would be a TI
> (ANY) event, in which case we should apply the extra paranoia.
> 

ok, here is a version ot top of your patches. Compile tested
only (have no p4 under my hands)

	-- Cyrill
---
x86, perf: P4 PMU -- check for permission granted on ANY event requested

In case if a caller (user) asked us to count events with
some weird mask we should check if this priviledge has been
granted since this could be a mix of mask bits we not like
which but allow if caller insist.

By ANY event term the combination of USR/OS bits in ESCR
register is assumed.

CC: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
 arch/x86/include/asm/perf_event_p4.h |   19 +++++++++++++++++++
 arch/x86/kernel/cpu/perf_event_p4.c  |   24 +++++++++++++++++++++---
 2 files changed, 40 insertions(+), 3 deletions(-)

Index: linux-2.6.git/arch/x86/include/asm/perf_event_p4.h
=====================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/perf_event_p4.h
+++ linux-2.6.git/arch/x86/include/asm/perf_event_p4.h
@@ -33,6 +33,9 @@
 #define P4_ESCR_T1_OS		0x00000002U
 #define P4_ESCR_T1_USR		0x00000001U
 
+#define P4_ESCR_T0_ANY		(P4_ESCR_T0_OS | P4_ESCR_T0_USR)
+#define P4_ESCR_T1_ANY		(P4_ESCR_T1_OS | P4_ESCR_T1_USR)
+
 #define P4_ESCR_EVENT(v)	((v) << P4_ESCR_EVENT_SHIFT)
 #define P4_ESCR_EMASK(v)	((v) << P4_ESCR_EVENTMASK_SHIFT)
 #define P4_ESCR_TAG(v)		((v) << P4_ESCR_TAG_SHIFT)
@@ -134,6 +137,22 @@
 #define P4_CONFIG_HT_SHIFT		63
 #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
 
+/*
+ * typically we set USR or/and OS bits for one of the
+ * threads only at once, any other option is treated
+ * as "odd"
+ */
+static inline bool p4_is_odd_cpl(u32 escr)
+{
+	unsigned int t0 = (escr & P4_ESCR_T0_ANY) << 0;
+	unsigned int t1 = (escr & P4_ESCR_T1_ANY) << 2;
+
+	if ((t0 ^ t1) != t0)
+		return true;
+
+	return false;
+}
+
 static inline bool p4_is_event_cascaded(u64 config)
 {
 	u32 cccr = p4_config_unpack_cccr(config);
Index: linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
=====================================================================
--- linux-2.6.git.orig/arch/x86/kernel/cpu/perf_event_p4.c
+++ linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
@@ -443,13 +443,18 @@ static int p4_hw_config(struct perf_even
 		return 0;
 
 	/*
+	 * a caller may ask for something definitely weird and
+	 * screwed, sigh...
+	 */
+	escr = p4_config_unpack_escr(event->attr.config);
+	if (p4_is_odd_cpl(escr) && perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	/*
 	 * We don't control raw events so it's up to the caller
 	 * to pass sane values (and we don't count the thread number
 	 * on HT machine but allow HT-compatible specifics to be
 	 * passed on)
-	 *
-	 * XXX: HT wide things should check perf_paranoid_cpu() &&
-	 *      CAP_SYS_ADMIN
 	 */
 	event->hw.config |= event->attr.config &
 		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
@@ -630,6 +635,19 @@ static void p4_pmu_swap_config_ts(struct
 	escr = p4_config_unpack_escr(hwc->config);
 	cccr = p4_config_unpack_cccr(hwc->config);
 
+	/*
+	 * for non-standart configs we don't clobber cpl
+	 * related bits so it's preferred the caller don't
+	 * use this mode
+	 */
+	if (unlikely(p4_is_odd_cpl(escr))) {
+		if (p4_ht_thread(cpu))
+			hwc->config |= P4_CONFIG_HT;
+		else
+			hwc->config &= ~P4_CONFIG_HT;
+		return;
+	}
+
 	if (p4_ht_thread(cpu)) {
 		cccr &= ~P4_CCCR_OVF_PMI_T0;
 		cccr |= P4_CCCR_OVF_PMI_T1;

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-31 16:15                 ` Cyrill Gorcunov
@ 2010-03-31 16:26                   ` Cyrill Gorcunov
  2010-03-31 17:05                     ` Cyrill Gorcunov
  0 siblings, 1 reply; 29+ messages in thread
From: Cyrill Gorcunov @ 2010-03-31 16:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Robert Richter, Stephane Eranian, Ingo Molnar, LKML, Lin Ming

On Wed, Mar 31, 2010 at 08:15:23PM +0400, Cyrill Gorcunov wrote:
> On Tue, Mar 30, 2010 at 09:04:00PM +0200, Peter Zijlstra wrote:
> > On Tue, 2010-03-30 at 22:29 +0400, Cyrill Gorcunov wrote:
> [...]
> > > 
[...]
> +static inline bool p4_is_odd_cpl(u32 escr)
> +{
> +	unsigned int t0 = (escr & P4_ESCR_T0_ANY) << 0;
> +	unsigned int t1 = (escr & P4_ESCR_T1_ANY) << 2;
> +
> +	if ((t0 ^ t1) != t0)
> +		return true;

/me in shame: This is bogus, Peter don't take it yet.

> +
> +	return false;
> +}
[...]

	-- Cyrill

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-31 16:26                   ` Cyrill Gorcunov
@ 2010-03-31 17:05                     ` Cyrill Gorcunov
  2010-04-01  2:14                       ` Lin Ming
  0 siblings, 1 reply; 29+ messages in thread
From: Cyrill Gorcunov @ 2010-03-31 17:05 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Robert Richter, Stephane Eranian, Ingo Molnar, LKML, Lin Ming

On Wed, Mar 31, 2010 at 08:26:47PM +0400, Cyrill Gorcunov wrote:
> On Wed, Mar 31, 2010 at 08:15:23PM +0400, Cyrill Gorcunov wrote:
> > On Tue, Mar 30, 2010 at 09:04:00PM +0200, Peter Zijlstra wrote:
> > > On Tue, 2010-03-30 at 22:29 +0400, Cyrill Gorcunov wrote:
> > [...]
> > > > 
> [...]
> > +static inline bool p4_is_odd_cpl(u32 escr)
> > +{
> > +	unsigned int t0 = (escr & P4_ESCR_T0_ANY) << 0;
> > +	unsigned int t1 = (escr & P4_ESCR_T1_ANY) << 2;
> > +
> > +	if ((t0 ^ t1) != t0)
> > +		return true;
> 
> /me in shame: This is bogus, Peter don't take it yet.
>

Updated
 
	-- Cyrill
---
x86, perf: P4 PMU -- check for permission granted on ANY event v2

In case if a caller (user) asked us to count events with
some weird mask we should check if this priviledge has been
granted since this could be a mix of bitmasks we not like
which but allow if caller insist.

By ANY event term the combination of USR/OS bits in ESCR
register is assumed.

CC: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
 arch/x86/include/asm/perf_event_p4.h |   17 +++++++++++++++++
 arch/x86/kernel/cpu/perf_event_p4.c  |   24 +++++++++++++++++++++---
 2 files changed, 38 insertions(+), 3 deletions(-)

Index: linux-2.6.git/arch/x86/include/asm/perf_event_p4.h
=====================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/perf_event_p4.h
+++ linux-2.6.git/arch/x86/include/asm/perf_event_p4.h
@@ -33,6 +33,9 @@
 #define P4_ESCR_T1_OS		0x00000002U
 #define P4_ESCR_T1_USR		0x00000001U
 
+#define P4_ESCR_T0_ANY		(P4_ESCR_T0_OS | P4_ESCR_T0_USR)
+#define P4_ESCR_T1_ANY		(P4_ESCR_T1_OS | P4_ESCR_T1_USR)
+
 #define P4_ESCR_EVENT(v)	((v) << P4_ESCR_EVENT_SHIFT)
 #define P4_ESCR_EMASK(v)	((v) << P4_ESCR_EVENTMASK_SHIFT)
 #define P4_ESCR_TAG(v)		((v) << P4_ESCR_TAG_SHIFT)
@@ -134,6 +137,20 @@
 #define P4_CONFIG_HT_SHIFT		63
 #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
 
+/*
+ * typically we set USR or/and OS bits for one of the
+ * threads only at once, any other option is treated
+ * as "any"
+ */
+static inline bool p4_is_any_cpl(u32 escr)
+{
+	if ((escr & P4_ESCR_T0_ANY) &&
+	    (escr & P4_ESCR_T1_ANY))
+		return true;
+
+	return false;
+}
+
 static inline bool p4_is_event_cascaded(u64 config)
 {
 	u32 cccr = p4_config_unpack_cccr(config);
Index: linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
=====================================================================
--- linux-2.6.git.orig/arch/x86/kernel/cpu/perf_event_p4.c
+++ linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
@@ -443,13 +443,18 @@ static int p4_hw_config(struct perf_even
 		return 0;
 
 	/*
+	 * a caller may ask for something definitely weird and
+	 * screwed, sigh...
+	 */
+	escr = p4_config_unpack_escr(event->attr.config);
+	if (p4_is_any_cpl(escr) && perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	/*
 	 * We don't control raw events so it's up to the caller
 	 * to pass sane values (and we don't count the thread number
 	 * on HT machine but allow HT-compatible specifics to be
 	 * passed on)
-	 *
-	 * XXX: HT wide things should check perf_paranoid_cpu() &&
-	 *      CAP_SYS_ADMIN
 	 */
 	event->hw.config |= event->attr.config &
 		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
@@ -630,6 +635,19 @@ static void p4_pmu_swap_config_ts(struct
 	escr = p4_config_unpack_escr(hwc->config);
 	cccr = p4_config_unpack_cccr(hwc->config);
 
+	/*
+	 * for non-standart configs we don't clobber cpl
+	 * related bits so it's preferred the caller don't
+	 * use this mode
+	 */
+	if (unlikely(p4_is_any_cpl(escr))) {
+		if (p4_ht_thread(cpu))
+			hwc->config |= P4_CONFIG_HT;
+		else
+			hwc->config &= ~P4_CONFIG_HT;
+		return;
+	}
+
 	if (p4_ht_thread(cpu)) {
 		cccr &= ~P4_CCCR_OVF_PMI_T0;
 		cccr |= P4_CCCR_OVF_PMI_T1;

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-03-31 17:05                     ` Cyrill Gorcunov
@ 2010-04-01  2:14                       ` Lin Ming
  2010-04-01  6:47                         ` Lin Ming
  0 siblings, 1 reply; 29+ messages in thread
From: Lin Ming @ 2010-04-01  2:14 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Peter Zijlstra, Robert Richter, Stephane Eranian, Ingo Molnar, LKML

On Thu, 2010-04-01 at 01:05 +0800, Cyrill Gorcunov wrote:
> On Wed, Mar 31, 2010 at 08:26:47PM +0400, Cyrill Gorcunov wrote:
> > On Wed, Mar 31, 2010 at 08:15:23PM +0400, Cyrill Gorcunov wrote:
> > > On Tue, Mar 30, 2010 at 09:04:00PM +0200, Peter Zijlstra wrote:
> > > > On Tue, 2010-03-30 at 22:29 +0400, Cyrill Gorcunov wrote:
> > > [...]
> > > > > 
> > [...]
> > > +static inline bool p4_is_odd_cpl(u32 escr)
> > > +{
> > > +	unsigned int t0 = (escr & P4_ESCR_T0_ANY) << 0;
> > > +	unsigned int t1 = (escr & P4_ESCR_T1_ANY) << 2;
> > > +
> > > +	if ((t0 ^ t1) != t0)
> > > +		return true;
> > 
> > /me in shame: This is bogus, Peter don't take it yet.
> >
> 
> Updated
>  
> 	-- Cyrill
> ---
> x86, perf: P4 PMU -- check for permission granted on ANY event v2
> 
> In case if a caller (user) asked us to count events with
> some weird mask we should check if this priviledge has been
> granted since this could be a mix of bitmasks we not like
> which but allow if caller insist.
> 
> By ANY event term the combination of USR/OS bits in ESCR
> register is assumed.

I'll test this patch.
Does it need to be applied on top of Robert's patch?

Lin Ming

> 
> CC: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> ---
>  arch/x86/include/asm/perf_event_p4.h |   17 +++++++++++++++++
>  arch/x86/kernel/cpu/perf_event_p4.c  |   24 +++++++++++++++++++++---
>  2 files changed, 38 insertions(+), 3 deletions(-)
> 
> Index: linux-2.6.git/arch/x86/include/asm/perf_event_p4.h
> =====================================================================
> --- linux-2.6.git.orig/arch/x86/include/asm/perf_event_p4.h
> +++ linux-2.6.git/arch/x86/include/asm/perf_event_p4.h
> @@ -33,6 +33,9 @@
>  #define P4_ESCR_T1_OS		0x00000002U
>  #define P4_ESCR_T1_USR		0x00000001U
>  
> +#define P4_ESCR_T0_ANY		(P4_ESCR_T0_OS | P4_ESCR_T0_USR)
> +#define P4_ESCR_T1_ANY		(P4_ESCR_T1_OS | P4_ESCR_T1_USR)
> +
>  #define P4_ESCR_EVENT(v)	((v) << P4_ESCR_EVENT_SHIFT)
>  #define P4_ESCR_EMASK(v)	((v) << P4_ESCR_EVENTMASK_SHIFT)
>  #define P4_ESCR_TAG(v)		((v) << P4_ESCR_TAG_SHIFT)
> @@ -134,6 +137,20 @@
>  #define P4_CONFIG_HT_SHIFT		63
>  #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
>  
> +/*
> + * typically we set USR or/and OS bits for one of the
> + * threads only at once, any other option is treated
> + * as "any"
> + */
> +static inline bool p4_is_any_cpl(u32 escr)
> +{
> +	if ((escr & P4_ESCR_T0_ANY) &&
> +	    (escr & P4_ESCR_T1_ANY))
> +		return true;
> +
> +	return false;
> +}
> +
>  static inline bool p4_is_event_cascaded(u64 config)
>  {
>  	u32 cccr = p4_config_unpack_cccr(config);
> Index: linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
> =====================================================================
> --- linux-2.6.git.orig/arch/x86/kernel/cpu/perf_event_p4.c
> +++ linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
> @@ -443,13 +443,18 @@ static int p4_hw_config(struct perf_even
>  		return 0;
>  
>  	/*
> +	 * a caller may ask for something definitely weird and
> +	 * screwed, sigh...
> +	 */
> +	escr = p4_config_unpack_escr(event->attr.config);
> +	if (p4_is_any_cpl(escr) && perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
> +		return -EACCES;
> +
> +	/*
>  	 * We don't control raw events so it's up to the caller
>  	 * to pass sane values (and we don't count the thread number
>  	 * on HT machine but allow HT-compatible specifics to be
>  	 * passed on)
> -	 *
> -	 * XXX: HT wide things should check perf_paranoid_cpu() &&
> -	 *      CAP_SYS_ADMIN
>  	 */
>  	event->hw.config |= event->attr.config &
>  		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
> @@ -630,6 +635,19 @@ static void p4_pmu_swap_config_ts(struct
>  	escr = p4_config_unpack_escr(hwc->config);
>  	cccr = p4_config_unpack_cccr(hwc->config);
>  
> +	/*
> +	 * for non-standart configs we don't clobber cpl
> +	 * related bits so it's preferred the caller don't
> +	 * use this mode
> +	 */
> +	if (unlikely(p4_is_any_cpl(escr))) {
> +		if (p4_ht_thread(cpu))
> +			hwc->config |= P4_CONFIG_HT;
> +		else
> +			hwc->config &= ~P4_CONFIG_HT;
> +		return;
> +	}
> +
>  	if (p4_ht_thread(cpu)) {
>  		cccr &= ~P4_CCCR_OVF_PMI_T0;
>  		cccr |= P4_CCCR_OVF_PMI_T1;


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-04-01  2:14                       ` Lin Ming
@ 2010-04-01  6:47                         ` Lin Ming
  2010-04-01 10:36                           ` Peter Zijlstra
  0 siblings, 1 reply; 29+ messages in thread
From: Lin Ming @ 2010-04-01  6:47 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Peter Zijlstra, Robert Richter, Stephane Eranian, Ingo Molnar, LKML

On Thu, 2010-04-01 at 10:14 +0800, Lin Ming wrote:
> On Thu, 2010-04-01 at 01:05 +0800, Cyrill Gorcunov wrote:
> > On Wed, Mar 31, 2010 at 08:26:47PM +0400, Cyrill Gorcunov wrote:
> > > On Wed, Mar 31, 2010 at 08:15:23PM +0400, Cyrill Gorcunov wrote:
> > > > On Tue, Mar 30, 2010 at 09:04:00PM +0200, Peter Zijlstra wrote:
> > > > > On Tue, 2010-03-30 at 22:29 +0400, Cyrill Gorcunov wrote:
> > > > [...]
> > > > > > 
> > > [...]
> > > > +static inline bool p4_is_odd_cpl(u32 escr)
> > > > +{
> > > > +	unsigned int t0 = (escr & P4_ESCR_T0_ANY) << 0;
> > > > +	unsigned int t1 = (escr & P4_ESCR_T1_ANY) << 2;
> > > > +
> > > > +	if ((t0 ^ t1) != t0)
> > > > +		return true;
> > > 
> > > /me in shame: This is bogus, Peter don't take it yet.
> > >
> > 
> > Updated
> >  
> > 	-- Cyrill
> > ---
> > x86, perf: P4 PMU -- check for permission granted on ANY event v2
> > 
> > In case if a caller (user) asked us to count events with
> > some weird mask we should check if this priviledge has been
> > granted since this could be a mix of bitmasks we not like
> > which but allow if caller insist.
> > 
> > By ANY event term the combination of USR/OS bits in ESCR
> > register is assumed.
> 
> I'll test this patch.
> Does it need to be applied on top of Robert's patch?

I tested this patch on top of Peter's and Robert's patch.
Works well on P4.

Only some build error, fixed by below patch.

In file included from arch/x86/kernel/cpu/perf_event.c:1326:
arch/x86/kernel/cpu/perf_event_p6.c:94: error: ‘x86_hw_config’ undeclared here (not in a function)
arch/x86/kernel/cpu/perf_event_p6.c:99: error: unknown field ‘raw_event’ specified in initializer
arch/x86/kernel/cpu/perf_event_p6.c:99: error: ‘x86_pmu_raw_event’ undeclared here (not in a function)
make[3]: *** [arch/x86/kernel/cpu/perf_event.o] Error 1

diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 626abc0..4ec9680 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -91,12 +91,11 @@ static __initconst struct x86_pmu p6_pmu = {
 	.enable_all		= p6_pmu_enable_all,
 	.enable			= p6_pmu_enable_event,
 	.disable		= p6_pmu_disable_event,
-	.hw_config		= x86_hw_config,
+	.hw_config		= x86_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_P6_EVNTSEL0,
 	.perfctr		= MSR_P6_PERFCTR0,
 	.event_map		= p6_pmu_event_map,
-	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
 	.apic			= 1,
 	.max_period		= (1ULL << 31) - 1,


Lin Ming

> 
> Lin Ming
> 
> > 
> > CC: Peter Zijlstra <peterz@infradead.org>
> > Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> > ---
> >  arch/x86/include/asm/perf_event_p4.h |   17 +++++++++++++++++
> >  arch/x86/kernel/cpu/perf_event_p4.c  |   24 +++++++++++++++++++++---
> >  2 files changed, 38 insertions(+), 3 deletions(-)
> > 
> > Index: linux-2.6.git/arch/x86/include/asm/perf_event_p4.h
> > =====================================================================
> > --- linux-2.6.git.orig/arch/x86/include/asm/perf_event_p4.h
> > +++ linux-2.6.git/arch/x86/include/asm/perf_event_p4.h
> > @@ -33,6 +33,9 @@
> >  #define P4_ESCR_T1_OS		0x00000002U
> >  #define P4_ESCR_T1_USR		0x00000001U
> >  
> > +#define P4_ESCR_T0_ANY		(P4_ESCR_T0_OS | P4_ESCR_T0_USR)
> > +#define P4_ESCR_T1_ANY		(P4_ESCR_T1_OS | P4_ESCR_T1_USR)
> > +
> >  #define P4_ESCR_EVENT(v)	((v) << P4_ESCR_EVENT_SHIFT)
> >  #define P4_ESCR_EMASK(v)	((v) << P4_ESCR_EVENTMASK_SHIFT)
> >  #define P4_ESCR_TAG(v)		((v) << P4_ESCR_TAG_SHIFT)
> > @@ -134,6 +137,20 @@
> >  #define P4_CONFIG_HT_SHIFT		63
> >  #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
> >  
> > +/*
> > + * typically we set USR or/and OS bits for one of the
> > + * threads only at once, any other option is treated
> > + * as "any"
> > + */
> > +static inline bool p4_is_any_cpl(u32 escr)
> > +{
> > +	if ((escr & P4_ESCR_T0_ANY) &&
> > +	    (escr & P4_ESCR_T1_ANY))
> > +		return true;
> > +
> > +	return false;
> > +}
> > +
> >  static inline bool p4_is_event_cascaded(u64 config)
> >  {
> >  	u32 cccr = p4_config_unpack_cccr(config);
> > Index: linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
> > =====================================================================
> > --- linux-2.6.git.orig/arch/x86/kernel/cpu/perf_event_p4.c
> > +++ linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
> > @@ -443,13 +443,18 @@ static int p4_hw_config(struct perf_even
> >  		return 0;
> >  
> >  	/*
> > +	 * a caller may ask for something definitely weird and
> > +	 * screwed, sigh...
> > +	 */
> > +	escr = p4_config_unpack_escr(event->attr.config);
> > +	if (p4_is_any_cpl(escr) && perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
> > +		return -EACCES;
> > +
> > +	/*
> >  	 * We don't control raw events so it's up to the caller
> >  	 * to pass sane values (and we don't count the thread number
> >  	 * on HT machine but allow HT-compatible specifics to be
> >  	 * passed on)
> > -	 *
> > -	 * XXX: HT wide things should check perf_paranoid_cpu() &&
> > -	 *      CAP_SYS_ADMIN
> >  	 */
> >  	event->hw.config |= event->attr.config &
> >  		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
> > @@ -630,6 +635,19 @@ static void p4_pmu_swap_config_ts(struct
> >  	escr = p4_config_unpack_escr(hwc->config);
> >  	cccr = p4_config_unpack_cccr(hwc->config);
> >  
> > +	/*
> > +	 * for non-standart configs we don't clobber cpl
> > +	 * related bits so it's preferred the caller don't
> > +	 * use this mode
> > +	 */
> > +	if (unlikely(p4_is_any_cpl(escr))) {
> > +		if (p4_ht_thread(cpu))
> > +			hwc->config |= P4_CONFIG_HT;
> > +		else
> > +			hwc->config &= ~P4_CONFIG_HT;
> > +		return;
> > +	}
> > +
> >  	if (p4_ht_thread(cpu)) {
> >  		cccr &= ~P4_CCCR_OVF_PMI_T0;
> >  		cccr |= P4_CCCR_OVF_PMI_T1;


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/3] perf/core, x86: unify perfctr bitmasks
  2010-04-01  6:47                         ` Lin Ming
@ 2010-04-01 10:36                           ` Peter Zijlstra
  0 siblings, 0 replies; 29+ messages in thread
From: Peter Zijlstra @ 2010-04-01 10:36 UTC (permalink / raw)
  To: Lin Ming
  Cc: Cyrill Gorcunov, Robert Richter, Stephane Eranian, Ingo Molnar, LKML

On Thu, 2010-04-01 at 14:47 +0800, Lin Ming wrote:
> 
> In file included from arch/x86/kernel/cpu/perf_event.c:1326:
> arch/x86/kernel/cpu/perf_event_p6.c:94: error: ‘x86_hw_config’ undeclared here (not in a function)
> arch/x86/kernel/cpu/perf_event_p6.c:99: error: unknown field ‘raw_event’ specified in initializer
> arch/x86/kernel/cpu/perf_event_p6.c:99: error: ‘x86_pmu_raw_event’ undeclared here (not in a function)
> make[3]: *** [arch/x86/kernel/cpu/perf_event.o] Error 1
> 
> diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
> index 626abc0..4ec9680 100644
> --- a/arch/x86/kernel/cpu/perf_event_p6.c
> +++ b/arch/x86/kernel/cpu/perf_event_p6.c
> @@ -91,12 +91,11 @@ static __initconst struct x86_pmu p6_pmu = {
>         .enable_all             = p6_pmu_enable_all,
>         .enable                 = p6_pmu_enable_event,
>         .disable                = p6_pmu_disable_event,
> -       .hw_config              = x86_hw_config,
> +       .hw_config              = x86_pmu_hw_config,
>         .schedule_events        = x86_schedule_events,
>         .eventsel               = MSR_P6_EVNTSEL0,
>         .perfctr                = MSR_P6_PERFCTR0,
>         .event_map              = p6_pmu_event_map,
> -       .raw_event              = x86_pmu_raw_event,
>         .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
>         .apic                   = 1,
>         .max_period             = (1ULL << 31) - 1, 

egads, that hunk seems to have tunneled into another patch in my
queue,.. /me goes fix.

Thanks Lin.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [tip:perf/core] perf, x86: Undo some some *_counter* -> *_event* renames
  2010-03-29 16:36 ` [PATCH 1/3] perf/core, x86: undo some some *_counter* -> *_event* renames Robert Richter
@ 2010-04-02 19:09   ` tip-bot for Robert Richter
  0 siblings, 0 replies; 29+ messages in thread
From: tip-bot for Robert Richter @ 2010-04-02 19:09 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, robert.richter, a.p.zijlstra, tglx, mingo

Commit-ID:  948b1bb89a44561560531394c18da4a99215f772
Gitweb:     http://git.kernel.org/tip/948b1bb89a44561560531394c18da4a99215f772
Author:     Robert Richter <robert.richter@amd.com>
AuthorDate: Mon, 29 Mar 2010 18:36:50 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 2 Apr 2010 19:52:02 +0200

perf, x86: Undo some some *_counter* -> *_event* renames

The big rename:

 cdd6c48 perf: Do the big rename: Performance Counters -> Performance Events

accidentally renamed some members of stucts that were named after
registers in the spec. To avoid confusion this patch reverts some
changes. The related specs are MSR descriptions in AMD's BKDGs and the
ARCHITECTURAL PERFORMANCE MONITORING section in the Intel 64 and IA-32
Architectures Software Developer's Manuals.

This patch does:

 $ sed -i -e 's:num_events:num_counters:g' \
   arch/x86/include/asm/perf_event.h \
   arch/x86/kernel/cpu/perf_event_amd.c \
   arch/x86/kernel/cpu/perf_event.c \
   arch/x86/kernel/cpu/perf_event_intel.c \
   arch/x86/kernel/cpu/perf_event_p6.c \
   arch/x86/kernel/cpu/perf_event_p4.c \
   arch/x86/oprofile/op_model_ppro.c

 $ sed -i -e 's:event_bits:cntval_bits:g' -e 's:event_mask:cntval_mask:g' \
   arch/x86/kernel/cpu/perf_event_amd.c \
   arch/x86/kernel/cpu/perf_event.c \
   arch/x86/kernel/cpu/perf_event_intel.c \
   arch/x86/kernel/cpu/perf_event_p6.c \
   arch/x86/kernel/cpu/perf_event_p4.c

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1269880612-25800-2-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_event.h      |    4 +-
 arch/x86/kernel/cpu/perf_event.c       |   74 ++++++++++++++++----------------
 arch/x86/kernel/cpu/perf_event_amd.c   |   12 +++---
 arch/x86/kernel/cpu/perf_event_intel.c |   16 +++---
 arch/x86/kernel/cpu/perf_event_p4.c    |   14 +++---
 arch/x86/kernel/cpu/perf_event_p6.c    |    6 +-
 arch/x86/oprofile/op_model_ppro.c      |    4 +-
 7 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 124dddd..987bf67 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -67,7 +67,7 @@
 union cpuid10_eax {
 	struct {
 		unsigned int version_id:8;
-		unsigned int num_events:8;
+		unsigned int num_counters:8;
 		unsigned int bit_width:8;
 		unsigned int mask_length:8;
 	} split;
@@ -76,7 +76,7 @@ union cpuid10_eax {
 
 union cpuid10_edx {
 	struct {
-		unsigned int num_events_fixed:4;
+		unsigned int num_counters_fixed:4;
 		unsigned int reserved:28;
 	} split;
 	unsigned int full;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b534356..9daaa1e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -195,10 +195,10 @@ struct x86_pmu {
 	u64		(*event_map)(int);
 	u64		(*raw_event)(u64);
 	int		max_events;
-	int		num_events;
-	int		num_events_fixed;
-	int		event_bits;
-	u64		event_mask;
+	int		num_counters;
+	int		num_counters_fixed;
+	int		cntval_bits;
+	u64		cntval_mask;
 	int		apic;
 	u64		max_period;
 	struct event_constraint *
@@ -268,7 +268,7 @@ static u64
 x86_perf_event_update(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
-	int shift = 64 - x86_pmu.event_bits;
+	int shift = 64 - x86_pmu.cntval_bits;
 	u64 prev_raw_count, new_raw_count;
 	int idx = hwc->idx;
 	s64 delta;
@@ -320,12 +320,12 @@ static bool reserve_pmc_hardware(void)
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		disable_lapic_nmi_watchdog();
 
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
 			goto perfctr_fail;
 	}
 
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
 			goto eventsel_fail;
 	}
@@ -336,7 +336,7 @@ eventsel_fail:
 	for (i--; i >= 0; i--)
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 
-	i = x86_pmu.num_events;
+	i = x86_pmu.num_counters;
 
 perfctr_fail:
 	for (i--; i >= 0; i--)
@@ -352,7 +352,7 @@ static void release_pmc_hardware(void)
 {
 	int i;
 
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		release_perfctr_nmi(x86_pmu.perfctr + i);
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 	}
@@ -547,7 +547,7 @@ static void x86_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
 		if (!test_bit(idx, cpuc->active_mask))
@@ -582,7 +582,7 @@ static void x86_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		struct perf_event *event = cpuc->events[idx];
 		u64 val;
 
@@ -657,14 +657,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	 * assign events to counters starting with most
 	 * constrained events.
 	 */
-	wmax = x86_pmu.num_events;
+	wmax = x86_pmu.num_counters;
 
 	/*
 	 * when fixed event counters are present,
 	 * wmax is incremented by 1 to account
 	 * for one more choice
 	 */
-	if (x86_pmu.num_events_fixed)
+	if (x86_pmu.num_counters_fixed)
 		wmax++;
 
 	for (w = 1, num = n; num && w <= wmax; w++) {
@@ -714,7 +714,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 	struct perf_event *event;
 	int n, max_count;
 
-	max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
+	max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
 
 	/* current number of events already accepted */
 	n = cpuc->n_events;
@@ -904,7 +904,7 @@ x86_perf_event_set_period(struct perf_event *event)
 	atomic64_set(&hwc->prev_count, (u64)-left);
 
 	wrmsrl(hwc->event_base + idx,
-			(u64)(-left) & x86_pmu.event_mask);
+			(u64)(-left) & x86_pmu.cntval_mask);
 
 	perf_event_update_userpage(event);
 
@@ -987,7 +987,7 @@ void perf_event_print_debug(void)
 	unsigned long flags;
 	int cpu, idx;
 
-	if (!x86_pmu.num_events)
+	if (!x86_pmu.num_counters)
 		return;
 
 	local_irq_save(flags);
@@ -1011,7 +1011,7 @@ void perf_event_print_debug(void)
 	}
 	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
 		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
 
@@ -1024,7 +1024,7 @@ void perf_event_print_debug(void)
 		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
 			cpu, idx, prev_left);
 	}
-	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
 
 		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1089,7 +1089,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 
@@ -1097,7 +1097,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 		hwc = &event->hw;
 
 		val = x86_perf_event_update(event);
-		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 			continue;
 
 		/*
@@ -1401,46 +1401,46 @@ void __init init_hw_perf_events(void)
 	if (x86_pmu.quirks)
 		x86_pmu.quirks();
 
-	if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
+	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
 		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
-		     x86_pmu.num_events, X86_PMC_MAX_GENERIC);
-		x86_pmu.num_events = X86_PMC_MAX_GENERIC;
+		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
+		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
 	}
-	x86_pmu.intel_ctrl = (1 << x86_pmu.num_events) - 1;
-	perf_max_events = x86_pmu.num_events;
+	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+	perf_max_events = x86_pmu.num_counters;
 
-	if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
+	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
 		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
-		     x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
-		x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
+		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
+		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
 	}
 
 	x86_pmu.intel_ctrl |=
-		((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
+		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
 	perf_events_lapic_init();
 	register_die_notifier(&perf_event_nmi_notifier);
 
 	unconstrained = (struct event_constraint)
-		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
-				   0, x86_pmu.num_events);
+		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
+				   0, x86_pmu.num_counters);
 
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
 			if (c->cmask != INTEL_ARCH_FIXED_MASK)
 				continue;
 
-			c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
-			c->weight += x86_pmu.num_events;
+			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+			c->weight += x86_pmu.num_counters;
 		}
 	}
 
 	pr_info("... version:                %d\n",     x86_pmu.version);
-	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
-	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
-	pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
+	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
+	pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
+	pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
 	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
+	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
 	perf_cpu_notifier(x86_pmu_notifier);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 285623b..7753a5c 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -165,7 +165,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
 	 * be removed on one CPU at a time AND PMU is disabled
 	 * when we come here
 	 */
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (nb->owners[i] == event) {
 			cmpxchg(nb->owners+i, event, NULL);
 			break;
@@ -215,7 +215,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	struct amd_nb *nb = cpuc->amd_nb;
 	struct perf_event *old = NULL;
-	int max = x86_pmu.num_events;
+	int max = x86_pmu.num_counters;
 	int i, j, k = -1;
 
 	/*
@@ -293,7 +293,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
 	/*
 	 * initialize all possible NB constraints
 	 */
-	for (i = 0; i < x86_pmu.num_events; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		__set_bit(i, nb->event_constraints[i].idxmsk);
 		nb->event_constraints[i].weight = 1;
 	}
@@ -385,9 +385,9 @@ static __initconst struct x86_pmu amd_pmu = {
 	.event_map		= amd_pmu_event_map,
 	.raw_event		= amd_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_events		= 4,
-	.event_bits		= 48,
-	.event_mask		= (1ULL << 48) - 1,
+	.num_counters		= 4,
+	.cntval_bits		= 48,
+	.cntval_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
 	/* use highest bit to detect overflow */
 	.max_period		= (1ULL << 47) - 1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 676aac2..cc4d90a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -653,20 +653,20 @@ static void intel_pmu_reset(void)
 	unsigned long flags;
 	int idx;
 
-	if (!x86_pmu.num_events)
+	if (!x86_pmu.num_counters)
 		return;
 
 	local_irq_save(flags);
 
 	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
 		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
 	}
-	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
 		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
-	}
+
 	if (ds)
 		ds->bts_index = ds->bts_buffer_base;
 
@@ -901,16 +901,16 @@ static __init int intel_pmu_init(void)
 		x86_pmu = intel_pmu;
 
 	x86_pmu.version			= version;
-	x86_pmu.num_events		= eax.split.num_events;
-	x86_pmu.event_bits		= eax.split.bit_width;
-	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
+	x86_pmu.num_counters		= eax.split.num_counters;
+	x86_pmu.cntval_bits		= eax.split.bit_width;
+	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
 
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events:
 	 */
 	if (version > 1)
-		x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
+		x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
 
 	/*
 	 * v2 and above have a perf capabilities MSR
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 0d1be36..4139100 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -483,7 +483,7 @@ static void p4_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		struct perf_event *event = cpuc->events[idx];
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -540,7 +540,7 @@ static void p4_pmu_enable_all(int added)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		struct perf_event *event = cpuc->events[idx];
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -562,7 +562,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -579,7 +579,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 		p4_pmu_clear_cccr_ovf(hwc);
 
 		val = x86_perf_event_update(event);
-		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 			continue;
 
 		/*
@@ -794,10 +794,10 @@ static __initconst struct x86_pmu p4_pmu = {
 	 * though leave it restricted at moment assuming
 	 * HT is on
 	 */
-	.num_events		= ARCH_P4_MAX_CCCR,
+	.num_counters		= ARCH_P4_MAX_CCCR,
 	.apic			= 1,
-	.event_bits		= 40,
-	.event_mask		= (1ULL << 40) - 1,
+	.cntval_bits		= 40,
+	.cntval_mask		= (1ULL << 40) - 1,
 	.max_period		= (1ULL << 39) - 1,
 	.hw_config		= p4_hw_config,
 	.schedule_events	= p4_pmu_schedule_events,
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 877182c..b26fbc7 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -119,7 +119,7 @@ static __initconst struct x86_pmu p6_pmu = {
 	.apic			= 1,
 	.max_period		= (1ULL << 31) - 1,
 	.version		= 0,
-	.num_events		= 2,
+	.num_counters		= 2,
 	/*
 	 * Events have 40 bits implemented. However they are designed such
 	 * that bits [32-39] are sign extensions of bit 31. As such the
@@ -127,8 +127,8 @@ static __initconst struct x86_pmu p6_pmu = {
 	 *
 	 * See IA-32 Intel Architecture Software developer manual Vol 3B
 	 */
-	.event_bits		= 32,
-	.event_mask		= (1ULL << 32) - 1,
+	.cntval_bits		= 32,
+	.cntval_mask		= (1ULL << 32) - 1,
 	.get_event_constraints	= x86_get_event_constraints,
 	.event_constraints	= p6_event_constraints,
 };
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 2bf90fa..c8abc4d 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -239,11 +239,11 @@ static void arch_perfmon_setup_counters(void)
 	if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
 		current_cpu_data.x86_model == 15) {
 		eax.split.version_id = 2;
-		eax.split.num_events = 2;
+		eax.split.num_counters = 2;
 		eax.split.bit_width = 40;
 	}
 
-	num_counters = eax.split.num_events;
+	num_counters = eax.split.num_counters;
 
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [tip:perf/core] perf, x86: implement ARCH_PERFMON_EVENTSEL bit masks
  2010-03-30  9:28     ` Robert Richter
@ 2010-04-02 19:09       ` tip-bot for Robert Richter
  0 siblings, 0 replies; 29+ messages in thread
From: tip-bot for Robert Richter @ 2010-04-02 19:09 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, robert.richter, a.p.zijlstra, tglx, mingo

Commit-ID:  a098f4484bc7dae23f5b62360954007b99b64600
Gitweb:     http://git.kernel.org/tip/a098f4484bc7dae23f5b62360954007b99b64600
Author:     Robert Richter <robert.richter@amd.com>
AuthorDate: Tue, 30 Mar 2010 11:28:21 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 2 Apr 2010 19:52:03 +0200

perf, x86: implement ARCH_PERFMON_EVENTSEL bit masks

ARCH_PERFMON_EVENTSEL bit masks are often used in the kernel. This
patch adds macros for the bit masks and removes local defines. The
function intel_pmu_raw_event() becomes x86_pmu_raw_event() which is
generic for x86 models and same also for p6. Duplicate code is
removed.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100330092821.GH11907@erda.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_event.h      |   58 ++++++++++++++------------------
 arch/x86/kernel/cpu/perf_event.c       |   19 +++++++++--
 arch/x86/kernel/cpu/perf_event_amd.c   |   15 +--------
 arch/x86/kernel/cpu/perf_event_intel.c |   22 +-----------
 arch/x86/kernel/cpu/perf_event_p6.c    |   20 +----------
 5 files changed, 45 insertions(+), 89 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 987bf67..f6d43db 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -18,39 +18,31 @@
 #define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
 #define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
 
-#define ARCH_PERFMON_EVENTSEL_ENABLE			  (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_ANY			  (1 << 21)
-#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
-
-/*
- * Includes eventsel and unit mask as well:
- */
-
-
-#define INTEL_ARCH_EVTSEL_MASK		0x000000FFULL
-#define INTEL_ARCH_UNIT_MASK		0x0000FF00ULL
-#define INTEL_ARCH_EDGE_MASK		0x00040000ULL
-#define INTEL_ARCH_INV_MASK		0x00800000ULL
-#define INTEL_ARCH_CNT_MASK		0xFF000000ULL
-#define INTEL_ARCH_EVENT_MASK	(INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
-
-/*
- * filter mask to validate fixed counter events.
- * the following filters disqualify for fixed counters:
- *  - inv
- *  - edge
- *  - cnt-mask
- *  The other filters are supported by fixed counters.
- *  The any-thread option is supported starting with v3.
- */
-#define INTEL_ARCH_FIXED_MASK \
-	(INTEL_ARCH_CNT_MASK| \
-	 INTEL_ARCH_INV_MASK| \
-	 INTEL_ARCH_EDGE_MASK|\
-	 INTEL_ARCH_UNIT_MASK|\
-	 INTEL_ARCH_EVENT_MASK)
+#define ARCH_PERFMON_EVENTSEL_EVENT			0x000000FFULL
+#define ARCH_PERFMON_EVENTSEL_UMASK			0x0000FF00ULL
+#define ARCH_PERFMON_EVENTSEL_USR			(1ULL << 16)
+#define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
+#define ARCH_PERFMON_EVENTSEL_EDGE			(1ULL << 18)
+#define ARCH_PERFMON_EVENTSEL_INT			(1ULL << 20)
+#define ARCH_PERFMON_EVENTSEL_ANY			(1ULL << 21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
+#define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
+#define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
+
+#define AMD64_EVENTSEL_EVENT	\
+	(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
+#define INTEL_ARCH_EVENT_MASK	\
+	(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
+
+#define X86_RAW_EVENT_MASK		\
+	(ARCH_PERFMON_EVENTSEL_EVENT |	\
+	 ARCH_PERFMON_EVENTSEL_UMASK |	\
+	 ARCH_PERFMON_EVENTSEL_EDGE  |	\
+	 ARCH_PERFMON_EVENTSEL_INV   |	\
+	 ARCH_PERFMON_EVENTSEL_CMASK)
+#define AMD64_RAW_EVENT_MASK		\
+	(X86_RAW_EVENT_MASK          |  \
+	 AMD64_EVENTSEL_EVENT)
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9daaa1e..1dd42c1 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -143,13 +143,21 @@ struct cpu_hw_events {
  * Constraint on the Event code.
  */
 #define INTEL_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
+	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
 
 /*
  * Constraint on the Event code + UMask + fixed-mask
+ *
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ *  - inv
+ *  - edge
+ *  - cnt-mask
+ *  The other filters are supported by fixed counters.
+ *  The any-thread option is supported starting with v3.
  */
 #define FIXED_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
+	EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
 
 /*
  * Constraint on the Event code + UMask
@@ -437,6 +445,11 @@ static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc
 	return 0;
 }
 
+static u64 x86_pmu_raw_event(u64 hw_event)
+{
+	return hw_event & X86_RAW_EVENT_MASK;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -1427,7 +1440,7 @@ void __init init_hw_perf_events(void)
 
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if (c->cmask != INTEL_ARCH_FIXED_MASK)
+			if (c->cmask != X86_RAW_EVENT_MASK)
 				continue;
 
 			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 7753a5c..37e9517 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -113,20 +113,7 @@ static u64 amd_pmu_event_map(int hw_event)
 
 static u64 amd_pmu_raw_event(u64 hw_event)
 {
-#define K7_EVNTSEL_EVENT_MASK	0xF000000FFULL
-#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
-#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
-#define K7_EVNTSEL_INV_MASK	0x000800000ULL
-#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
-
-#define K7_EVNTSEL_MASK			\
-	(K7_EVNTSEL_EVENT_MASK |	\
-	 K7_EVNTSEL_UNIT_MASK  |	\
-	 K7_EVNTSEL_EDGE_MASK  |	\
-	 K7_EVNTSEL_INV_MASK   |	\
-	 K7_EVNTSEL_REG_MASK)
-
-	return hw_event & K7_EVNTSEL_MASK;
+	return hw_event & AMD64_RAW_EVENT_MASK;
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cc4d90a..dfdd6f9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -452,24 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids
  },
 };
 
-static u64 intel_pmu_raw_event(u64 hw_event)
-{
-#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define CORE_EVNTSEL_MASK		\
-	(INTEL_ARCH_EVTSEL_MASK |	\
-	 INTEL_ARCH_UNIT_MASK   |	\
-	 INTEL_ARCH_EDGE_MASK   |	\
-	 INTEL_ARCH_INV_MASK    |	\
-	 INTEL_ARCH_CNT_MASK)
-
-	return hw_event & CORE_EVNTSEL_MASK;
-}
-
 static void intel_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -788,7 +770,7 @@ static __initconst struct x86_pmu core_pmu = {
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
+	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
 	/*
@@ -827,7 +809,7 @@ static __initconst struct x86_pmu intel_pmu = {
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
+	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
 	/*
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index b26fbc7..03c139a 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event)
  */
 #define P6_NOP_EVENT			0x0000002EULL
 
-static u64 p6_pmu_raw_event(u64 hw_event)
-{
-#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define P6_EVNTSEL_INV_MASK		0x00800000ULL
-#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define P6_EVNTSEL_MASK			\
-	(P6_EVNTSEL_EVENT_MASK |	\
-	 P6_EVNTSEL_UNIT_MASK  |	\
-	 P6_EVNTSEL_EDGE_MASK  |	\
-	 P6_EVNTSEL_INV_MASK   |	\
-	 P6_EVNTSEL_REG_MASK)
-
-	return hw_event & P6_EVNTSEL_MASK;
-}
-
 static struct event_constraint p6_event_constraints[] =
 {
 	INTEL_EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
@@ -114,7 +96,7 @@ static __initconst struct x86_pmu p6_pmu = {
 	.eventsel		= MSR_P6_EVNTSEL0,
 	.perfctr		= MSR_P6_PERFCTR0,
 	.event_map		= p6_pmu_event_map,
-	.raw_event		= p6_pmu_raw_event,
+	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
 	.apic			= 1,
 	.max_period		= (1ULL << 31) - 1,

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [tip:perf/core] perf, x86: Fix up the ANY flag stuff
  2010-03-30 16:55           ` Peter Zijlstra
                               ` (2 preceding siblings ...)
  2010-03-30 18:29             ` Cyrill Gorcunov
@ 2010-04-02 19:09             ` tip-bot for Peter Zijlstra
  3 siblings, 0 replies; 29+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-04-02 19:09 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, hpa, mingo, a.p.zijlstra, gorcunov,
	robert.richter, tglx, mingo

Commit-ID:  b4cdc5c264b35c67007800dec3928e9547a9d70b
Gitweb:     http://git.kernel.org/tip/b4cdc5c264b35c67007800dec3928e9547a9d70b
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Tue, 30 Mar 2010 17:00:06 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 2 Apr 2010 19:52:04 +0200

perf, x86: Fix up the ANY flag stuff

Stephane noticed that the ANY flag was in generic arch code, and Cyrill
reported that it broke the P4 code.

Solve this by merging x86_pmu::raw_event into x86_pmu::hw_config and
provide intel_pmu and amd_pmu specific versions of this callback.

The intel_pmu one deals with the ANY flag, the amd_pmu adds the few extra
event bits AMD64 has.

Reported-by: Stephane Eranian <eranian@google.com>
Reported-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: Robert Richter <robert.richter@amd.com>
Acked-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1269968113.5258.442.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c       |   35 +++++++++------------------
 arch/x86/kernel/cpu/perf_event_amd.c   |   17 ++++++++++---
 arch/x86/kernel/cpu/perf_event_intel.c |   30 ++++++++++++++++++++---
 arch/x86/kernel/cpu/perf_event_p4.c    |   40 +++++++++++++++++--------------
 arch/x86/kernel/cpu/perf_event_p6.c    |    3 +-
 5 files changed, 74 insertions(+), 51 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1dd42c1..65e9c5e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -196,12 +196,11 @@ struct x86_pmu {
 	void		(*enable_all)(int added);
 	void		(*enable)(struct perf_event *);
 	void		(*disable)(struct perf_event *);
-	int		(*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
+	int		(*hw_config)(struct perf_event *event);
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
 	unsigned	perfctr;
 	u64		(*event_map)(int);
-	u64		(*raw_event)(u64);
 	int		max_events;
 	int		num_counters;
 	int		num_counters_fixed;
@@ -426,28 +425,26 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
 	return 0;
 }
 
-static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
+static int x86_pmu_hw_config(struct perf_event *event)
 {
 	/*
 	 * Generate PMC IRQs:
 	 * (keep 'enabled' bit clear for now)
 	 */
-	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
+	event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
 
 	/*
 	 * Count user and OS events unless requested not to
 	 */
-	if (!attr->exclude_user)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
-	if (!attr->exclude_kernel)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
+	if (!event->attr.exclude_user)
+		event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
+	if (!event->attr.exclude_kernel)
+		event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
 
-	return 0;
-}
+	if (event->attr.type == PERF_TYPE_RAW)
+		event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
 
-static u64 x86_pmu_raw_event(u64 hw_event)
-{
-	return hw_event & X86_RAW_EVENT_MASK;
+	return 0;
 }
 
 /*
@@ -489,7 +486,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	hwc->last_tag = ~0ULL;
 
 	/* Processor specifics */
-	err = x86_pmu.hw_config(attr, hwc);
+	err = x86_pmu.hw_config(event);
 	if (err)
 		return err;
 
@@ -508,16 +505,8 @@ static int __hw_perf_event_init(struct perf_event *event)
 			return -EOPNOTSUPP;
 	}
 
-	/*
-	 * Raw hw_event type provide the config in the hw_event structure
-	 */
-	if (attr->type == PERF_TYPE_RAW) {
-		hwc->config |= x86_pmu.raw_event(attr->config);
-		if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
-		    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
+	if (attr->type == PERF_TYPE_RAW)
 		return 0;
-	}
 
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, attr);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 37e9517..bbd7339 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -111,9 +111,19 @@ static u64 amd_pmu_event_map(int hw_event)
 	return amd_perfmon_event_map[hw_event];
 }
 
-static u64 amd_pmu_raw_event(u64 hw_event)
+static int amd_pmu_hw_config(struct perf_event *event)
 {
-	return hw_event & AMD64_RAW_EVENT_MASK;
+	int ret = x86_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+
+	if (event->attr.type != PERF_TYPE_RAW)
+		return 0;
+
+	event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+	return 0;
 }
 
 /*
@@ -365,12 +375,11 @@ static __initconst struct x86_pmu amd_pmu = {
 	.enable_all		= x86_pmu_enable_all,
 	.enable			= x86_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_hw_config,
+	.hw_config		= amd_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_K7_EVNTSEL0,
 	.perfctr		= MSR_K7_PERFCTR0,
 	.event_map		= amd_pmu_event_map,
-	.raw_event		= amd_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
 	.num_counters		= 4,
 	.cntval_bits		= 48,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index dfdd6f9..30bf10c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -758,6 +758,30 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 	return x86_get_event_constraints(cpuc, event);
 }
 
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+	int ret = x86_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+
+	if (event->attr.type != PERF_TYPE_RAW)
+		return 0;
+
+	if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
+		return 0;
+
+	if (x86_pmu.version < 3)
+		return -EINVAL;
+
+	if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
+
+	return 0;
+}
+
 static __initconst struct x86_pmu core_pmu = {
 	.name			= "core",
 	.handle_irq		= x86_pmu_handle_irq,
@@ -765,12 +789,11 @@ static __initconst struct x86_pmu core_pmu = {
 	.enable_all		= x86_pmu_enable_all,
 	.enable			= x86_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_hw_config,
+	.hw_config		= x86_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
-	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
 	/*
@@ -804,12 +827,11 @@ static __initconst struct x86_pmu intel_pmu = {
 	.enable_all		= intel_pmu_enable_all,
 	.enable			= intel_pmu_enable_event,
 	.disable		= intel_pmu_disable_event,
-	.hw_config		= x86_hw_config,
+	.hw_config		= intel_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
-	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
 	/*
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 4139100..acd237d 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -419,20 +419,7 @@ static u64 p4_pmu_event_map(int hw_event)
 	return config;
 }
 
-/*
- * We don't control raw events so it's up to the caller
- * to pass sane values (and we don't count the thread number
- * on HT machine but allow HT-compatible specifics to be
- * passed on)
- */
-static u64 p4_pmu_raw_event(u64 hw_event)
-{
-	return hw_event &
-		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
-		 p4_config_pack_cccr(P4_CCCR_MASK_HT));
-}
-
-static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
+static int p4_hw_config(struct perf_event *event)
 {
 	int cpu = raw_smp_processor_id();
 	u32 escr, cccr;
@@ -444,11 +431,29 @@ static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
 	 */
 
 	cccr = p4_default_cccr_conf(cpu);
-	escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user);
-	hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr);
+	escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
+					 event->attr.exclude_user);
+	event->hw.config = p4_config_pack_escr(escr) |
+			   p4_config_pack_cccr(cccr);
 
 	if (p4_ht_active() && p4_ht_thread(cpu))
-		hwc->config = p4_set_ht_bit(hwc->config);
+		event->hw.config = p4_set_ht_bit(event->hw.config);
+
+	if (event->attr.type != PERF_TYPE_RAW)
+		return 0;
+
+	/*
+	 * We don't control raw events so it's up to the caller
+	 * to pass sane values (and we don't count the thread number
+	 * on HT machine but allow HT-compatible specifics to be
+	 * passed on)
+	 *
+	 * XXX: HT wide things should check perf_paranoid_cpu() &&
+	 *      CAP_SYS_ADMIN
+	 */
+	event->hw.config |= event->attr.config &
+		(p4_config_pack_escr(P4_ESCR_MASK_HT) |
+		 p4_config_pack_cccr(P4_CCCR_MASK_HT));
 
 	return 0;
 }
@@ -785,7 +790,6 @@ static __initconst struct x86_pmu p4_pmu = {
 	.eventsel		= MSR_P4_BPU_CCCR0,
 	.perfctr		= MSR_P4_BPU_PERFCTR0,
 	.event_map		= p4_pmu_event_map,
-	.raw_event		= p4_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(p4_general_events),
 	.get_event_constraints	= x86_get_event_constraints,
 	/*
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 03c139a..9123e8e 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -91,12 +91,11 @@ static __initconst struct x86_pmu p6_pmu = {
 	.enable_all		= p6_pmu_enable_all,
 	.enable			= p6_pmu_enable_event,
 	.disable		= p6_pmu_disable_event,
-	.hw_config		= x86_hw_config,
+	.hw_config		= x86_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_P6_EVNTSEL0,
 	.perfctr		= MSR_P6_PERFCTR0,
 	.event_map		= p6_pmu_event_map,
-	.raw_event		= x86_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
 	.apic			= 1,
 	.max_period		= (1ULL << 31) - 1,

^ permalink raw reply related	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2010-04-02 19:10 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-29 16:36 [PATCH 0/3] perf/core, x86: unify perfctr bitmasks Robert Richter
2010-03-29 16:36 ` [PATCH 1/3] perf/core, x86: undo some some *_counter* -> *_event* renames Robert Richter
2010-04-02 19:09   ` [tip:perf/core] perf, x86: Undo " tip-bot for Robert Richter
2010-03-29 16:36 ` [PATCH 2/3] perf/core, x86: removing p6_pmu_raw_event() Robert Richter
2010-03-29 16:36 ` [PATCH 3/3] perf/core, x86: implement ARCH_PERFMON_EVENTSEL bit masks Robert Richter
2010-03-29 16:48   ` Peter Zijlstra
2010-03-29 17:01     ` Robert Richter
2010-03-30  9:28     ` Robert Richter
2010-04-02 19:09       ` [tip:perf/core] perf, " tip-bot for Robert Richter
2010-03-30 10:11 ` [PATCH 0/3] perf/core, x86: unify perfctr bitmasks Stephane Eranian
2010-03-30 13:41   ` Robert Richter
2010-03-30 13:53     ` Stephane Eranian
2010-03-30 15:00       ` Peter Zijlstra
2010-03-30 15:11         ` Cyrill Gorcunov
2010-03-30 15:31           ` Stephane Eranian
2010-03-30 15:59         ` Robert Richter
2010-03-30 16:55           ` Peter Zijlstra
2010-03-30 17:11             ` Cyrill Gorcunov
2010-03-30 17:24             ` Robert Richter
2010-03-30 18:29             ` Cyrill Gorcunov
2010-03-30 19:04               ` Peter Zijlstra
2010-03-30 19:18                 ` Cyrill Gorcunov
2010-03-31 16:15                 ` Cyrill Gorcunov
2010-03-31 16:26                   ` Cyrill Gorcunov
2010-03-31 17:05                     ` Cyrill Gorcunov
2010-04-01  2:14                       ` Lin Ming
2010-04-01  6:47                         ` Lin Ming
2010-04-01 10:36                           ` Peter Zijlstra
2010-04-02 19:09             ` [tip:perf/core] perf, x86: Fix up the ANY flag stuff tip-bot for Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.