All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] perf_events: update extra shared registers management (v2)
@ 2011-05-20 14:37 Stephane Eranian
  2011-05-23  9:11 ` Peter Zijlstra
  0 siblings, 1 reply; 14+ messages in thread
From: Stephane Eranian @ 2011-05-20 14:37 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo, peterz, andi, ming.m.lin


The following short series of patches improves the code
which manages the extra shared regs used by some events
on Intel processors. Those events require an extra MSR
which may be shared between siblings CPUs when HT is on.
When HT is off, the kernel still needs to ensure that
events within an event group do not try to program
different values into that extra MSR.

This series improves the current code for managing the
register sharing by using static allocation instead of
dynamically trying to find a table slot to host that
extra MSR. This greatly simplifies the code. The patch
also prepare the kernel for more registers with those
kinds of constraints (e.g, LBR_SELECT, LD_LAT).

The patch also adds the missing group validation of
events using those extra MSRs. Up until now, one could
put two instances of the those events which had incompatible
values for the extra MSR. There was no upfront check and
the group would never be scheduled. Now, such group cannot
be constructed anymore (fail early).

Finally, the third patch adds the SandyBridge support for
the offcore_response events (which use these shared MSR).
It also removes the offcore_response events  from the
SandyBridge constraint event table. Those events don't
have any constraints contrary to what's published in
the documentation.

The second version updates PATCH 1/3 which was an
older version with reg->idx initialization problems.

[PATCH 0/3] introduction
[PATCH 1/3] rework of the register sharing logic
[PATCH 2/3] add missing shared regs validation
[PATCH 3/3] add Intel SandyBridge offcore_response support

Signed-off-by: Stephane Eranian <eranian@google.com>
---

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-20 14:37 [PATCH 0/3] perf_events: update extra shared registers management (v2) Stephane Eranian
@ 2011-05-23  9:11 ` Peter Zijlstra
  2011-05-23  9:32   ` Peter Zijlstra
  2011-07-01 15:23   ` [tip:perf/core] perf, intel: Try alternative OFFCORE encodings tip-bot for Peter Zijlstra
  0 siblings, 2 replies; 14+ messages in thread
From: Peter Zijlstra @ 2011-05-23  9:11 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: linux-kernel, mingo, andi, ming.m.lin


How about something like the below on top of your patches?

---
Subject: perf, intel: Try alternative OFFCORE encoding
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon May 23 11:08:15 CEST 2011

Since the OFFCORE registers are fully symmetric, try the other when the
speficied one is already taken.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/x86/kernel/cpu/perf_event.c       |    5 ++-
 arch/x86/kernel/cpu/perf_event_intel.c |   45 ++++++++++++++++++++++++++-------
 2 files changed, 40 insertions(+), 10 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -326,9 +326,12 @@ struct x86_pmu {
 	 * Extra registers for events
 	 */
 	struct extra_reg *extra_regs;
-	bool regs_no_ht_sharing;
+	unsigned int er_flags;
 };
 
+#define ERF_NO_HT_SHARING	1
+#define ERF_HAS_RSP_1		2
+
 static struct x86_pmu x86_pmu __read_mostly;
 
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1019,6 +1019,27 @@ intel_bts_constraints(struct perf_event
 	return NULL;
 }
 
+static bool intel_try_alt_er(struct perf_event *event, int idx)
+{
+	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
+		return false;
+
+	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+		event->attr.config = 0x01bb;
+		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
+		event->hw.extra_reg.msr = MSR_OFFCORE_RSP_1;
+	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+		event->attr.config = 0x01b7;
+		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
+		event->hw.extra_reg.msr = MSR_OFFCORE_RSP_0;
+	}
+
+	if (event->hw.extra_reg.idx == idx)
+		return false;
+
+	return true;
+}
+
 /*
  * manage allocation of shared extra msr for certain events
  *
@@ -1028,19 +1049,21 @@ intel_bts_constraints(struct perf_event
  */
 static struct event_constraint *
 __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
-				   struct hw_perf_event_extra *reg)
+				   struct perf_event *event)
 {
 	struct event_constraint *c = &emptyconstraint;
+	struct hw_perf_event_extra *reg = &event->hw.extra_reg;
 	struct er_account *era;
+	int idx = reg->idx;
 
 	/* already allocated shared msr */
 	if (reg->alloc)
 		return &unconstrained;
 
+again:
 	era = &cpuc->shared_regs->regs[reg->idx];
 
 	raw_spin_lock(&era->lock);
-
 	if (!atomic_read(&era->ref) || era->config == reg->config) {
 
 		/* lock in msr value */
@@ -1062,6 +1085,9 @@ __intel_shared_reg_get_constraints(struc
 		 * the regular event constraint table.
 		 */
 		c = &unconstrained;
+	} else if (intel_try_alt_er(event, idx)) {
+		raw_spin_unlock(&era->lock);
+		goto again;
 	}
 	raw_spin_unlock(&era->lock);
 
@@ -1096,13 +1122,12 @@ intel_shared_regs_constraints(struct cpu
 			      struct perf_event *event)
 {
 	struct event_constraint *c = NULL;
-	struct hw_perf_event_extra *xreg;
 
-	xreg = &event->hw.extra_reg;
-	if (xreg->idx != EXTRA_REG_NONE)
-		c = __intel_shared_reg_get_constraints(cpuc, xreg);
+	if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
+		c = __intel_shared_reg_get_contraints(cpuc, event);
+
 	return c;
- }
+}
 
 static struct event_constraint *
 intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
@@ -1261,7 +1286,7 @@ static void intel_pmu_cpu_starting(int c
 	 */
 	intel_pmu_lbr_reset();
 
-	if (!cpuc->shared_regs || x86_pmu.regs_no_ht_sharing)
+	if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
 		return;
 
 	for_each_cpu(i, topology_thread_cpumask(cpu)) {
@@ -1486,6 +1511,7 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_westmere_extra_regs;
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
 
 		/* UOPS_ISSUED.STALLED_CYCLES */
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1505,7 +1531,8 @@ static __init int intel_pmu_init(void)
 		x86_pmu.pebs_constraints = intel_snb_pebs_events;
 		x86_pmu.extra_regs = intel_snb_extra_regs;
 		/* all extra regs are per-cpu when HT is on */
-		x86_pmu.regs_no_ht_sharing = true;
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
 
 		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23  9:11 ` Peter Zijlstra
@ 2011-05-23  9:32   ` Peter Zijlstra
  2011-05-23  9:36     ` Stephane Eranian
  2011-07-01 15:23   ` [tip:perf/core] perf, intel: Try alternative OFFCORE encodings tip-bot for Peter Zijlstra
  1 sibling, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2011-05-23  9:32 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: linux-kernel, mingo, andi, ming.m.lin

On Mon, 2011-05-23 at 11:11 +0200, Peter Zijlstra wrote:
> +       if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
> +               event->attr.config = 0x01bb;
> +               event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
> +               event->hw.extra_reg.msr = MSR_OFFCORE_RSP_1;
> +       } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
> +               event->attr.config = 0x01b7;
> +               event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
> +               event->hw.extra_reg.msr = MSR_OFFCORE_RSP_0;
> +       } 

clearly I meant to write:

event->hw.config &= ~X86_RAW_EVENT_MASK;
event->hw.config |= 0x01b[b7];

:-)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23  9:32   ` Peter Zijlstra
@ 2011-05-23  9:36     ` Stephane Eranian
  2011-05-23 10:58       ` Stephane Eranian
  0 siblings, 1 reply; 14+ messages in thread
From: Stephane Eranian @ 2011-05-23  9:36 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, May 23, 2011 at 11:32 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, 2011-05-23 at 11:11 +0200, Peter Zijlstra wrote:
>> +       if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
>> +               event->attr.config = 0x01bb;
>> +               event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
>> +               event->hw.extra_reg.msr = MSR_OFFCORE_RSP_1;
>> +       } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
>> +               event->attr.config = 0x01b7;
>> +               event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
>> +               event->hw.extra_reg.msr = MSR_OFFCORE_RSP_0;
>> +       }
>
> clearly I meant to write:
>
> event->hw.config &= ~X86_RAW_EVENT_MASK;
> event->hw.config |= 0x01b[b7];
>
Yes, to preserve the other bits.
This patch looks good, I will test it some more.
Thanks.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23  9:36     ` Stephane Eranian
@ 2011-05-23 10:58       ` Stephane Eranian
  2011-05-23 11:10         ` Peter Zijlstra
  2011-05-23 11:11         ` Peter Zijlstra
  0 siblings, 2 replies; 14+ messages in thread
From: Stephane Eranian @ 2011-05-23 10:58 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, May 23, 2011 at 11:36 AM, Stephane Eranian <eranian@google.com> wrote:
> On Mon, May 23, 2011 at 11:32 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>> On Mon, 2011-05-23 at 11:11 +0200, Peter Zijlstra wrote:
>>> +       if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
>>> +               event->attr.config = 0x01bb;
>>> +               event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
>>> +               event->hw.extra_reg.msr = MSR_OFFCORE_RSP_1;
>>> +       } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
>>> +               event->attr.config = 0x01b7;
>>> +               event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
>>> +               event->hw.extra_reg.msr = MSR_OFFCORE_RSP_0;
>>> +       }
>>
>> clearly I meant to write:
>>
>> event->hw.config &= ~X86_RAW_EVENT_MASK;

Not quite, you want INTEL_ARCH_EVENT_MASK instead
because you only want to modify umask+event code.

There is a major issue as it stands, though. You can
get into an infinite loop bouncing between RSP_0 and RSP_1
in case there is no solution in the group, i.e., you have 3 values
for the extra MSR. I think you need to count the number of times
you've called intel_try_alt_er() with success or maintain some sort
of bitmask of possible alternate choices and when you exhaust that,
you simply fail.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23 10:58       ` Stephane Eranian
@ 2011-05-23 11:10         ` Peter Zijlstra
  2011-05-23 12:00           ` Stephane Eranian
  2011-05-23 11:11         ` Peter Zijlstra
  1 sibling, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2011-05-23 11:10 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, 2011-05-23 at 12:58 +0200, Stephane Eranian wrote:
> There is a major issue as it stands, though. You can
> get into an infinite loop bouncing between RSP_0 and RSP_1
> in case there is no solution in the group, i.e., you have 3 values
> for the extra MSR. I think you need to count the number of times
> you've called intel_try_alt_er() with success or maintain some sort
> of bitmask of possible alternate choices and when you exhaust that,
> you simply fail. 

That should be sorted by the compare with the initial idx value, no?
Once its back where it started out it'll bail.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23 10:58       ` Stephane Eranian
  2011-05-23 11:10         ` Peter Zijlstra
@ 2011-05-23 11:11         ` Peter Zijlstra
  2011-05-23 11:56           ` Stephane Eranian
  1 sibling, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2011-05-23 11:11 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, 2011-05-23 at 12:58 +0200, Stephane Eranian wrote:
> >> event->hw.config &= ~X86_RAW_EVENT_MASK;
> 
> Not quite, you want INTEL_ARCH_EVENT_MASK instead
> because you only want to modify umask+event code. 

Do EDGE,INV,CMASK actually make a difference for the OFFCORE event?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23 11:11         ` Peter Zijlstra
@ 2011-05-23 11:56           ` Stephane Eranian
  2011-05-23 15:15             ` Peter Zijlstra
  0 siblings, 1 reply; 14+ messages in thread
From: Stephane Eranian @ 2011-05-23 11:56 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, May 23, 2011 at 1:11 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, 2011-05-23 at 12:58 +0200, Stephane Eranian wrote:
>> >> event->hw.config &= ~X86_RAW_EVENT_MASK;
>>
>> Not quite, you want INTEL_ARCH_EVENT_MASK instead
>> because you only want to modify umask+event code.
>
> Do EDGE,INV,CMASK actually make a difference for the OFFCORE event?
>
The effect of those filters is independent of the event.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23 11:10         ` Peter Zijlstra
@ 2011-05-23 12:00           ` Stephane Eranian
  2011-05-23 15:15             ` Peter Zijlstra
  0 siblings, 1 reply; 14+ messages in thread
From: Stephane Eranian @ 2011-05-23 12:00 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, May 23, 2011 at 1:10 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, 2011-05-23 at 12:58 +0200, Stephane Eranian wrote:
>> There is a major issue as it stands, though. You can
>> get into an infinite loop bouncing between RSP_0 and RSP_1
>> in case there is no solution in the group, i.e., you have 3 values
>> for the extra MSR. I think you need to count the number of times
>> you've called intel_try_alt_er() with success or maintain some sort
>> of bitmask of possible alternate choices and when you exhaust that,
>> you simply fail.
>
> That should be sorted by the compare with the initial idx value, no?
> Once its back where it started out it'll bail.
>
Nope.

Take:
  - ev1=rsp_0:0x1001
  - ev2=rsp_0:0x1002
  - ev3=rsp_1:0x1008

ev1-> rsp_0
ev2-> rsp_0, conflict, then try yields rsp_1 -> ok
ev3 -> rsp_1, conflict, then rsp_0, but fails, try again -> rsp_1,
fails, and so on

The issue is that the intel_try() function does not know the
history of the swaps between rsp_0, rsp1.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23 12:00           ` Stephane Eranian
@ 2011-05-23 15:15             ` Peter Zijlstra
  2011-05-23 15:27               ` Stephane Eranian
  0 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2011-05-23 15:15 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, 2011-05-23 at 14:00 +0200, Stephane Eranian wrote:
> On Mon, May 23, 2011 at 1:10 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Mon, 2011-05-23 at 12:58 +0200, Stephane Eranian wrote:
> >> There is a major issue as it stands, though. You can
> >> get into an infinite loop bouncing between RSP_0 and RSP_1
> >> in case there is no solution in the group, i.e., you have 3 values
> >> for the extra MSR. I think you need to count the number of times
> >> you've called intel_try_alt_er() with success or maintain some sort
> >> of bitmask of possible alternate choices and when you exhaust that,
> >> you simply fail.
> >
> > That should be sorted by the compare with the initial idx value, no?
> > Once its back where it started out it'll bail.
> >
> Nope.
> 
> Take:
>   - ev1=rsp_0:0x1001
>   - ev2=rsp_0:0x1002
>   - ev3=rsp_1:0x1008
> 
> ev1-> rsp_0
> ev2-> rsp_0, conflict, then try yields rsp_1 -> ok
> ev3 -> rsp_1, conflict, then rsp_0, but fails, try again -> rsp_1,
> fails, and so on
> 
> The issue is that the intel_try() function does not know the
> history of the swaps between rsp_0, rsp1.

But it does, we pass the initial reg->idx in, and return false when that
matches the new idx, so in your example, ev3 will do:

     rsp_1 -> conflict, 
 try rsp_0 -> conflict, 
 try rsp_1 -> bail, return emptyconstraint



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23 11:56           ` Stephane Eranian
@ 2011-05-23 15:15             ` Peter Zijlstra
  0 siblings, 0 replies; 14+ messages in thread
From: Peter Zijlstra @ 2011-05-23 15:15 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, 2011-05-23 at 13:56 +0200, Stephane Eranian wrote:
> On Mon, May 23, 2011 at 1:11 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Mon, 2011-05-23 at 12:58 +0200, Stephane Eranian wrote:
> >> >> event->hw.config &= ~X86_RAW_EVENT_MASK;
> >>
> >> Not quite, you want INTEL_ARCH_EVENT_MASK instead
> >> because you only want to modify umask+event code.
> >
> > Do EDGE,INV,CMASK actually make a difference for the OFFCORE event?
> >
> The effect of those filters is independent of the event.

OK.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23 15:15             ` Peter Zijlstra
@ 2011-05-23 15:27               ` Stephane Eranian
  2011-05-23 15:30                 ` Peter Zijlstra
  0 siblings, 1 reply; 14+ messages in thread
From: Stephane Eranian @ 2011-05-23 15:27 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, May 23, 2011 at 5:15 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, 2011-05-23 at 14:00 +0200, Stephane Eranian wrote:
>> On Mon, May 23, 2011 at 1:10 PM, Peter Zijlstra <peterz@infradead.org> wrote:
>> > On Mon, 2011-05-23 at 12:58 +0200, Stephane Eranian wrote:
>> >> There is a major issue as it stands, though. You can
>> >> get into an infinite loop bouncing between RSP_0 and RSP_1
>> >> in case there is no solution in the group, i.e., you have 3 values
>> >> for the extra MSR. I think you need to count the number of times
>> >> you've called intel_try_alt_er() with success or maintain some sort
>> >> of bitmask of possible alternate choices and when you exhaust that,
>> >> you simply fail.
>> >
>> > That should be sorted by the compare with the initial idx value, no?
>> > Once its back where it started out it'll bail.
>> >
>> Nope.
>>
>> Take:
>>   - ev1=rsp_0:0x1001
>>   - ev2=rsp_0:0x1002
>>   - ev3=rsp_1:0x1008
>>
>> ev1-> rsp_0
>> ev2-> rsp_0, conflict, then try yields rsp_1 -> ok
>> ev3 -> rsp_1, conflict, then rsp_0, but fails, try again -> rsp_1,
>> fails, and so on
>>
>> The issue is that the intel_try() function does not know the
>> history of the swaps between rsp_0, rsp1.
>
> But it does, we pass the initial reg->idx in, and return false when that
> matches the new idx, so in your example, ev3 will do:
>
>     rsp_1 -> conflict,
>  try rsp_0 -> conflict,
>  try rsp_1 -> bail, return emptyconstraint
>
>
Ok, my fault. That's because I tweaked your patch a bit and
I did not keep track of idx. I guess the logic is not obvious.
I think you should rename idx to orig_idx.


>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] perf_events: update extra shared registers management (v2)
  2011-05-23 15:27               ` Stephane Eranian
@ 2011-05-23 15:30                 ` Peter Zijlstra
  0 siblings, 0 replies; 14+ messages in thread
From: Peter Zijlstra @ 2011-05-23 15:30 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: LKML, mingo, Andi Kleen, Lin Ming

On Mon, 2011-05-23 at 17:27 +0200, Stephane Eranian wrote:
> Ok, my fault. That's because I tweaked your patch a bit and
> I did not keep track of idx. 

AH! Ok, at least its not my brain that's broken, was starting to doubt
my sanity there for a moment ;-)

> I guess the logic is not obvious.
> I think you should rename idx to orig_idx.

Indeed, that would clarify things.

Thanks!

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [tip:perf/core] perf, intel: Try alternative OFFCORE encodings
  2011-05-23  9:11 ` Peter Zijlstra
  2011-05-23  9:32   ` Peter Zijlstra
@ 2011-07-01 15:23   ` tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 14+ messages in thread
From: tip-bot for Peter Zijlstra @ 2011-07-01 15:23 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, a.p.zijlstra, tglx, mingo

Commit-ID:  b79e8941fb9af07d810da91b4e29da2bba331b6e
Gitweb:     http://git.kernel.org/tip/b79e8941fb9af07d810da91b4e29da2bba331b6e
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Mon, 23 May 2011 11:08:15 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 1 Jul 2011 11:06:37 +0200

perf, intel: Try alternative OFFCORE encodings

Since the OFFCORE registers are fully symmetric, try the other one
when the specified one is already in use.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1306141897.18455.8.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c       |    5 +++-
 arch/x86/kernel/cpu/perf_event_intel.c |   44 ++++++++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 583f311..c53d433 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -327,9 +327,12 @@ struct x86_pmu {
 	 * Extra registers for events
 	 */
 	struct extra_reg *extra_regs;
-	bool regs_no_ht_sharing;
+	unsigned int er_flags;
 };
 
+#define ERF_NO_HT_SHARING	1
+#define ERF_HAS_RSP_1		2
+
 static struct x86_pmu x86_pmu __read_mostly;
 
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a674ae4..5c44862 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1018,6 +1018,29 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
+static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+{
+	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
+		return false;
+
+	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+		event->hw.config |= 0x01bb;
+		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
+		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
+	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+		event->hw.config |= 0x01b7;
+		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
+		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+	}
+
+	if (event->hw.extra_reg.idx == orig_idx)
+		return false;
+
+	return true;
+}
+
 /*
  * manage allocation of shared extra msr for certain events
  *
@@ -1027,16 +1050,19 @@ intel_bts_constraints(struct perf_event *event)
  */
 static struct event_constraint *
 __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
-				   struct hw_perf_event_extra *reg)
+				   struct perf_event *event)
 {
 	struct event_constraint *c = &emptyconstraint;
+	struct hw_perf_event_extra *reg = &event->hw.extra_reg;
 	struct er_account *era;
 	unsigned long flags;
+	int orig_idx = reg->idx;
 
 	/* already allocated shared msr */
 	if (reg->alloc)
 		return &unconstrained;
 
+again:
 	era = &cpuc->shared_regs->regs[reg->idx];
 	/*
 	 * we use spin_lock_irqsave() to avoid lockdep issues when
@@ -1065,6 +1091,9 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 		 * the regular event constraint table.
 		 */
 		c = &unconstrained;
+	} else if (intel_try_alt_er(event, orig_idx)) {
+		raw_spin_unlock(&era->lock);
+		goto again;
 	}
 	raw_spin_unlock_irqrestore(&era->lock, flags);
 
@@ -1099,11 +1128,10 @@ intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
 			      struct perf_event *event)
 {
 	struct event_constraint *c = NULL;
-	struct hw_perf_event_extra *xreg;
 
-	xreg = &event->hw.extra_reg;
-	if (xreg->idx != EXTRA_REG_NONE)
-		c = __intel_shared_reg_get_constraints(cpuc, xreg);
+	if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
+		c = __intel_shared_reg_get_constraints(cpuc, event);
+
 	return c;
 }
 
@@ -1264,7 +1292,7 @@ static void intel_pmu_cpu_starting(int cpu)
 	 */
 	intel_pmu_lbr_reset();
 
-	if (!cpuc->shared_regs || x86_pmu.regs_no_ht_sharing)
+	if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
 		return;
 
 	for_each_cpu(i, topology_thread_cpumask(cpu)) {
@@ -1489,6 +1517,7 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_westmere_extra_regs;
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
 
 		/* UOPS_ISSUED.STALLED_CYCLES */
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1508,7 +1537,8 @@ static __init int intel_pmu_init(void)
 		x86_pmu.pebs_constraints = intel_snb_pebs_events;
 		x86_pmu.extra_regs = intel_snb_extra_regs;
 		/* all extra regs are per-cpu when HT is on */
-		x86_pmu.regs_no_ht_sharing = true;
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
 
 		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2011-07-01 15:23 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-20 14:37 [PATCH 0/3] perf_events: update extra shared registers management (v2) Stephane Eranian
2011-05-23  9:11 ` Peter Zijlstra
2011-05-23  9:32   ` Peter Zijlstra
2011-05-23  9:36     ` Stephane Eranian
2011-05-23 10:58       ` Stephane Eranian
2011-05-23 11:10         ` Peter Zijlstra
2011-05-23 12:00           ` Stephane Eranian
2011-05-23 15:15             ` Peter Zijlstra
2011-05-23 15:27               ` Stephane Eranian
2011-05-23 15:30                 ` Peter Zijlstra
2011-05-23 11:11         ` Peter Zijlstra
2011-05-23 11:56           ` Stephane Eranian
2011-05-23 15:15             ` Peter Zijlstra
2011-07-01 15:23   ` [tip:perf/core] perf, intel: Try alternative OFFCORE encodings tip-bot for Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.