linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf: Fix intel shared extra msr allocation
@ 2012-06-01  3:20 Yan, Zheng
  2012-06-01  9:35 ` Stephane Eranian
  0 siblings, 1 reply; 27+ messages in thread
From: Yan, Zheng @ 2012-06-01  3:20 UTC (permalink / raw)
  To: a.p.zijlstra, eranian, linux-kernel

From: "Yan, Zheng" <zheng.z.yan@intel.com>

intel_shared_reg_get/put_constraints() can be indirectly called
by validate_group(). In that case, they should avoid modifying
the perf_event date structure because the event can be already
in active state. Otherwise the shared extra msr's reference
count will be left in inconsistent state.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel.c |   31 +++++++++++++++++++++++--------
 1 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..10840d0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,11 +1119,21 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static bool intel_try_alt_er(struct perf_event *event, int *idx,
+			     int orig_idx, bool fake_cpuc)
 {
-	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
+	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1) || *idx != orig_idx)
 		return false;
 
+	/* don't modify the event structure if the cpuc is faked */
+	if (fake_cpuc) {
+		if (*idx == EXTRA_REG_RSP_0)
+			*idx = EXTRA_REG_RSP_1;
+		else if (*idx == EXTRA_REG_RSP_1)
+			*idx = EXTRA_REG_RSP_0;
+		return (*idx != orig_idx);
+	}
+
 	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
 		event->hw.config |= 0x01bb;
@@ -1139,6 +1149,7 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
 	if (event->hw.extra_reg.idx == orig_idx)
 		return false;
 
+	*idx = event->hw.extra_reg.idx;
 	return true;
 }
 
@@ -1155,16 +1166,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 				   struct hw_perf_event_extra *reg)
 {
 	struct event_constraint *c = &emptyconstraint;
+	struct intel_shared_regs *shared_regs = cpuc->shared_regs;
 	struct er_account *era;
 	unsigned long flags;
 	int orig_idx = reg->idx;
+	int idx = orig_idx;
 
-	/* already allocated shared msr */
-	if (reg->alloc)
+	/* shared msr is already allocated and cpuc is not faked */
+	if (reg->alloc && shared_regs->core_id != -1)
 		return NULL; /* call x86_get_event_constraint() */
 
 again:
-	era = &cpuc->shared_regs->regs[reg->idx];
+	era = &shared_regs->regs[idx];
 	/*
 	 * we use spin_lock_irqsave() to avoid lockdep issues when
 	 * passing a fake cpuc
@@ -1181,14 +1194,16 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 		atomic_inc(&era->ref);
 
 		/* no need to reallocate during incremental event scheduling */
-		reg->alloc = 1;
+		if (shared_regs->core_id != -1)
+			reg->alloc = 1;
 
 		/*
 		 * need to call x86_get_event_constraint()
 		 * to check if associated event has constraints
 		 */
 		c = NULL;
-	} else if (intel_try_alt_er(event, orig_idx)) {
+	} else if (intel_try_alt_er(event, &idx, orig_idx,
+				    shared_regs->core_id == -1)) {
 		raw_spin_unlock_irqrestore(&era->lock, flags);
 		goto again;
 	}
@@ -1208,7 +1223,7 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
 	 * allocated. Also takes care of event which do
 	 * not use an extra shared reg
 	 */
-	if (!reg->alloc)
+	if (!reg->alloc || cpuc->shared_regs->core_id == -1)
 		return;
 
 	era = &cpuc->shared_regs->regs[reg->idx];
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-01  3:20 [PATCH] perf: Fix intel shared extra msr allocation Yan, Zheng
@ 2012-06-01  9:35 ` Stephane Eranian
  2012-06-01 14:11   ` Yan, Zheng
  0 siblings, 1 reply; 27+ messages in thread
From: Stephane Eranian @ 2012-06-01  9:35 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: a.p.zijlstra, linux-kernel

On Fri, Jun 1, 2012 at 5:20 AM, Yan, Zheng <zheng.z.yan@intel.com> wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>
> intel_shared_reg_get/put_constraints() can be indirectly called
> by validate_group(). In that case, they should avoid modifying
> the perf_event date structure because the event can be already
> in active state. Otherwise the shared extra msr's reference
> count will be left in inconsistent state.
>
I understand the problem but I am wondering if you actually saw
it in real life. The reason I am asking is because  of the way
validate_group() collects the events and how they are added
to sibling_list. The new event is added at the tail. Thus it will
come last, and will get to __intel_shared_reg_get_constraints()
last, thus I am wondering if it can really modify the programming
on the existing events.

See more comments inline.

> Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event_intel.c |   31 +++++++++++++++++++++++--------
>  1 files changed, 23 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 166546e..10840d0 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1119,11 +1119,21 @@ intel_bts_constraints(struct perf_event *event)
>        return NULL;
>  }
>
> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
> +static bool intel_try_alt_er(struct perf_event *event, int *idx,
> +                            int orig_idx, bool fake_cpuc)
>  {
> -       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
> +       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1) || *idx != orig_idx)
>                return false;
>
> +       /* don't modify the event structure if the cpuc is faked */
> +       if (fake_cpuc) {
> +               if (*idx == EXTRA_REG_RSP_0)
> +                       *idx = EXTRA_REG_RSP_1;
> +               else if (*idx == EXTRA_REG_RSP_1)
> +                       *idx = EXTRA_REG_RSP_0;
> +               return (*idx != orig_idx);
> +       }
> +
I understand that.

>        if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01bb;
> @@ -1139,6 +1149,7 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>        if (event->hw.extra_reg.idx == orig_idx)
>                return false;
>
> +       *idx = event->hw.extra_reg.idx;
>        return true;
>  }
>
> @@ -1155,16 +1166,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>                                   struct hw_perf_event_extra *reg)
>  {
>        struct event_constraint *c = &emptyconstraint;
> +       struct intel_shared_regs *shared_regs = cpuc->shared_regs;
>        struct er_account *era;
>        unsigned long flags;
>        int orig_idx = reg->idx;
> +       int idx = orig_idx;
>
> -       /* already allocated shared msr */
> -       if (reg->alloc)
> +       /* shared msr is already allocated and cpuc is not faked */
> +       if (reg->alloc && shared_regs->core_id != -1)
>                return NULL; /* call x86_get_event_constraint() */
>
I don't understand what you need this stuff. Shared_regs is faked as well.

>  again:
> -       era = &cpuc->shared_regs->regs[reg->idx];
> +       era = &shared_regs->regs[idx];
>        /*
>         * we use spin_lock_irqsave() to avoid lockdep issues when
>         * passing a fake cpuc
> @@ -1181,14 +1194,16 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>                atomic_inc(&era->ref);
>
>                /* no need to reallocate during incremental event scheduling */
> -               reg->alloc = 1;
> +               if (shared_regs->core_id != -1)
> +                       reg->alloc = 1;
>
>                /*
>                 * need to call x86_get_event_constraint()
>                 * to check if associated event has constraints
>                 */
>                c = NULL;
> -       } else if (intel_try_alt_er(event, orig_idx)) {
> +       } else if (intel_try_alt_er(event, &idx, orig_idx,
> +                                   shared_regs->core_id == -1)) {
>                raw_spin_unlock_irqrestore(&era->lock, flags);
>                goto again;
>        }
> @@ -1208,7 +1223,7 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
>         * allocated. Also takes care of event which do
>         * not use an extra shared reg
>         */
> -       if (!reg->alloc)
> +       if (!reg->alloc || cpuc->shared_regs->core_id == -1)
>                return;
>
>        era = &cpuc->shared_regs->regs[reg->idx];
> --
> 1.7.7.6
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-01  9:35 ` Stephane Eranian
@ 2012-06-01 14:11   ` Yan, Zheng
  2012-06-04 13:12     ` Stephane Eranian
  2012-06-05 10:14     ` Peter Zijlstra
  0 siblings, 2 replies; 27+ messages in thread
From: Yan, Zheng @ 2012-06-01 14:11 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, a.p.zijlstra, linux-kernel

On Fri, Jun 1, 2012 at 5:35 PM, Stephane Eranian <eranian@google.com> wrote:
> On Fri, Jun 1, 2012 at 5:20 AM, Yan, Zheng <zheng.z.yan@intel.com> wrote:
>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>
>> intel_shared_reg_get/put_constraints() can be indirectly called
>> by validate_group(). In that case, they should avoid modifying
>> the perf_event date structure because the event can be already
>> in active state. Otherwise the shared extra msr's reference
>> count will be left in inconsistent state.
>>
> I understand the problem but I am wondering if you actually saw
> it in real life. The reason I am asking is because  of the way
> validate_group() collects the events and how they are added
> to sibling_list. The new event is added at the tail. Thus it will
> come last, and will get to __intel_shared_reg_get_constraints()
> last, thus I am wondering if it can really modify the programming
> on the existing events.

The real problem is from __intel_shared_reg_put_constraints(). it set
reg->alloc to 0 and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
reference count. Later when deleting the event,  put_constraints() will find
reg->alloc is 0 and it won't decrease the shared msr's reference count.

Run 'perf stat --group -a -C 0 -e LLC-loads -e LLC-stores sleep 1" on
Nehalem can trigger the bug.

>
> See more comments inline.
>
>> Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
>> ---
>>  arch/x86/kernel/cpu/perf_event_intel.c |   31 +++++++++++++++++++++++--------
>>  1 files changed, 23 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
>> index 166546e..10840d0 100644
>> --- a/arch/x86/kernel/cpu/perf_event_intel.c
>> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
>> @@ -1119,11 +1119,21 @@ intel_bts_constraints(struct perf_event *event)
>>        return NULL;
>>  }
>>
>> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>> +static bool intel_try_alt_er(struct perf_event *event, int *idx,
>> +                            int orig_idx, bool fake_cpuc)
>>  {
>> -       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
>> +       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1) || *idx != orig_idx)
>>                return false;
>>
>> +       /* don't modify the event structure if the cpuc is faked */
>> +       if (fake_cpuc) {
>> +               if (*idx == EXTRA_REG_RSP_0)
>> +                       *idx = EXTRA_REG_RSP_1;
>> +               else if (*idx == EXTRA_REG_RSP_1)
>> +                       *idx = EXTRA_REG_RSP_0;
>> +               return (*idx != orig_idx);
>> +       }
>> +
> I understand that.
>
>>        if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
>>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>>                event->hw.config |= 0x01bb;
>> @@ -1139,6 +1149,7 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>>        if (event->hw.extra_reg.idx == orig_idx)
>>                return false;
>>
>> +       *idx = event->hw.extra_reg.idx;
>>        return true;
>>  }
>>
>> @@ -1155,16 +1166,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>>                                   struct hw_perf_event_extra *reg)
>>  {
>>        struct event_constraint *c = &emptyconstraint;
>> +       struct intel_shared_regs *shared_regs = cpuc->shared_regs;
>>        struct er_account *era;
>>        unsigned long flags;
>>        int orig_idx = reg->idx;
>> +       int idx = orig_idx;
>>
>> -       /* already allocated shared msr */
>> -       if (reg->alloc)
>> +       /* shared msr is already allocated and cpuc is not faked */
>> +       if (reg->alloc && shared_regs->core_id != -1)
>>                return NULL; /* call x86_get_event_constraint() */
>>
> I don't understand what you need this stuff. Shared_regs is faked as well.

The event can be in active state, we should avoid clearing reg->alloc.

Regards
Yan, Zheng

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-01 14:11   ` Yan, Zheng
@ 2012-06-04 13:12     ` Stephane Eranian
  2012-06-05  2:18       ` Yan, Zheng
  2012-06-05 10:14     ` Peter Zijlstra
  1 sibling, 1 reply; 27+ messages in thread
From: Stephane Eranian @ 2012-06-04 13:12 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: Yan, Zheng, a.p.zijlstra, linux-kernel

On Fri, Jun 1, 2012 at 4:11 PM, Yan, Zheng <zheng.z.yan@linux.intel.com> wrote:
> On Fri, Jun 1, 2012 at 5:35 PM, Stephane Eranian <eranian@google.com> wrote:
>> On Fri, Jun 1, 2012 at 5:20 AM, Yan, Zheng <zheng.z.yan@intel.com> wrote:
>>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>>
>>> intel_shared_reg_get/put_constraints() can be indirectly called
>>> by validate_group(). In that case, they should avoid modifying
>>> the perf_event date structure because the event can be already
>>> in active state. Otherwise the shared extra msr's reference
>>> count will be left in inconsistent state.
>>>
>> I understand the problem but I am wondering if you actually saw
>> it in real life. The reason I am asking is because  of the way
>> validate_group() collects the events and how they are added
>> to sibling_list. The new event is added at the tail. Thus it will
>> come last, and will get to __intel_shared_reg_get_constraints()
>> last, thus I am wondering if it can really modify the programming
>> on the existing events.
>
> The real problem is from __intel_shared_reg_put_constraints(). it set
> reg->alloc to 0 and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
> reference count. Later when deleting the event,  put_constraints() will find
> reg->alloc is 0 and it won't decrease the shared msr's reference count.
>
> Run 'perf stat --group -a -C 0 -e LLC-loads -e LLC-stores sleep 1" on
> Nehalem can trigger the bug.
>
And what do you see in this particular example?

I'd like to see the results via libpfm4 and /perf_examples/syst_count:
$ sudo ./syst_count -e offcore_response_0:dmnd_data_rd
-eoffcore_response_0:dmnd_rfo  -p -d 10 -c 0

Should see 50% for each event group.

>>
>> See more comments inline.
>>
>>> Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
>>> ---
>>>  arch/x86/kernel/cpu/perf_event_intel.c |   31 +++++++++++++++++++++++--------
>>>  1 files changed, 23 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
>>> index 166546e..10840d0 100644
>>> --- a/arch/x86/kernel/cpu/perf_event_intel.c
>>> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
>>> @@ -1119,11 +1119,21 @@ intel_bts_constraints(struct perf_event *event)
>>>        return NULL;
>>>  }
>>>
>>> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>>> +static bool intel_try_alt_er(struct perf_event *event, int *idx,
>>> +                            int orig_idx, bool fake_cpuc)
>>>  {
>>> -       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
>>> +       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1) || *idx != orig_idx)
>>>                return false;
>>>
>>> +       /* don't modify the event structure if the cpuc is faked */
>>> +       if (fake_cpuc) {
>>> +               if (*idx == EXTRA_REG_RSP_0)
>>> +                       *idx = EXTRA_REG_RSP_1;
>>> +               else if (*idx == EXTRA_REG_RSP_1)
>>> +                       *idx = EXTRA_REG_RSP_0;
>>> +               return (*idx != orig_idx);
>>> +       }
>>> +
>> I understand that.
>>
>>>        if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
>>>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>>>                event->hw.config |= 0x01bb;
>>> @@ -1139,6 +1149,7 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>>>        if (event->hw.extra_reg.idx == orig_idx)
>>>                return false;
>>>
>>> +       *idx = event->hw.extra_reg.idx;
>>>        return true;
>>>  }
>>>
>>> @@ -1155,16 +1166,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>>>                                   struct hw_perf_event_extra *reg)
>>>  {
>>>        struct event_constraint *c = &emptyconstraint;
>>> +       struct intel_shared_regs *shared_regs = cpuc->shared_regs;
>>>        struct er_account *era;
>>>        unsigned long flags;
>>>        int orig_idx = reg->idx;
>>> +       int idx = orig_idx;
>>>
>>> -       /* already allocated shared msr */
>>> -       if (reg->alloc)
>>> +       /* shared msr is already allocated and cpuc is not faked */
>>> +       if (reg->alloc && shared_regs->core_id != -1)
>>>                return NULL; /* call x86_get_event_constraint() */
>>>
>> I don't understand what you need this stuff. Shared_regs is faked as well.
>
> The event can be in active state, we should avoid clearing reg->alloc.
>
> Regards
> Yan, Zheng

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-04 13:12     ` Stephane Eranian
@ 2012-06-05  2:18       ` Yan, Zheng
  0 siblings, 0 replies; 27+ messages in thread
From: Yan, Zheng @ 2012-06-05  2:18 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, a.p.zijlstra, linux-kernel

On 06/04/2012 09:12 PM, Stephane Eranian wrote:
> On Fri, Jun 1, 2012 at 4:11 PM, Yan, Zheng <zheng.z.yan@linux.intel.com> wrote:
>> On Fri, Jun 1, 2012 at 5:35 PM, Stephane Eranian <eranian@google.com> wrote:
>>> On Fri, Jun 1, 2012 at 5:20 AM, Yan, Zheng <zheng.z.yan@intel.com> wrote:
>>>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>>>
>>>> intel_shared_reg_get/put_constraints() can be indirectly called
>>>> by validate_group(). In that case, they should avoid modifying
>>>> the perf_event date structure because the event can be already
>>>> in active state. Otherwise the shared extra msr's reference
>>>> count will be left in inconsistent state.
>>>>
>>> I understand the problem but I am wondering if you actually saw
>>> it in real life. The reason I am asking is because  of the way
>>> validate_group() collects the events and how they are added
>>> to sibling_list. The new event is added at the tail. Thus it will
>>> come last, and will get to __intel_shared_reg_get_constraints()
>>> last, thus I am wondering if it can really modify the programming
>>> on the existing events.
>>
>> The real problem is from __intel_shared_reg_put_constraints(). it set
>> reg->alloc to 0 and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
>> reference count. Later when deleting the event,  put_constraints() will find
>> reg->alloc is 0 and it won't decrease the shared msr's reference count.
>>
>> Run 'perf stat --group -a -C 0 -e LLC-loads -e LLC-stores sleep 1" on
>> Nehalem can trigger the bug.
>>
> And what do you see in this particular example?
The offcore_rsp msr's reference count are left in inconsistent state. It means
we can only use the msr for LLC-loads event after that. Any other events that
require programming the offcore_rsp msr will fail.

> 
> I'd like to see the results via libpfm4 and /perf_examples/syst_count:
> $ sudo ./syst_count -e offcore_response_0:dmnd_data_rd
> -eoffcore_response_0:dmnd_rfo  -p -d 10 -c 0
> 
This does not use the event group feature. do you mean
./syst_count -e offcore_response_0:dmnd_data_rd,offcore_response_0:dmnd_rfo -p -d 10 -c 0.

After apply my patch, the above command fails on Nehalem. Because there is no offcore_rsp1.

Regards
Yan, Zheng

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-01 14:11   ` Yan, Zheng
  2012-06-04 13:12     ` Stephane Eranian
@ 2012-06-05 10:14     ` Peter Zijlstra
  2012-06-05 10:21       ` Stephane Eranian
  1 sibling, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2012-06-05 10:14 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: Stephane Eranian, Yan, Zheng, linux-kernel

On Fri, 2012-06-01 at 22:11 +0800, Yan, Zheng wrote:
> 
> The real problem is from __intel_shared_reg_put_constraints(). it set
> reg->alloc to 0 

ok

> and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
> reference count. 

So? That's fake state, who cares what its left in?

> Later when deleting the event,  put_constraints() will find
> reg->alloc is 0 and it won't decrease the shared msr's reference count.

OK, so the only problem is us setting reg->alloc to 0?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 10:14     ` Peter Zijlstra
@ 2012-06-05 10:21       ` Stephane Eranian
  2012-06-05 10:27         ` Peter Zijlstra
  0 siblings, 1 reply; 27+ messages in thread
From: Stephane Eranian @ 2012-06-05 10:21 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, Jun 5, 2012 at 12:14 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Fri, 2012-06-01 at 22:11 +0800, Yan, Zheng wrote:
>>
>> The real problem is from __intel_shared_reg_put_constraints(). it set
>> reg->alloc to 0
>
> ok
>
>> and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
>> reference count.
>
> So? That's fake state, who cares what its left in?
>
Right, that's what I am trying to elucidate. Who cares about it
beyond validate_group()? The fake_cpuc is reallocated for each
validate group. And this includes the fake shared_regs.

>> Later when deleting the event,  put_constraints() will find
>> reg->alloc is 0 and it won't decrease the shared msr's reference count.
>
> OK, so the only problem is us setting reg->alloc to 0?

I agree with the first part of the patch in intel_try_alt_er(), we
should not touch
the actual event struct. But I am still unclear about the reg->alloc part.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 10:21       ` Stephane Eranian
@ 2012-06-05 10:27         ` Peter Zijlstra
  2012-06-05 10:38           ` Stephane Eranian
  0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2012-06-05 10:27 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, 2012-06-05 at 12:21 +0200, Stephane Eranian wrote:
> I agree with the first part of the patch in intel_try_alt_er(), we
> should not touch
> the actual event struct. But I am still unclear about the reg->alloc part.

reg->alloc is part of the actual event.

Thing is, the patch is horridly ugly.. while I agree that changing event
state isn't good, special casing all that code isn't good either.

I was looking at cloning the events for validate_group() as well, but so
far that's not turning out too pretty either.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 10:27         ` Peter Zijlstra
@ 2012-06-05 10:38           ` Stephane Eranian
  2012-06-05 12:07             ` Peter Zijlstra
  0 siblings, 1 reply; 27+ messages in thread
From: Stephane Eranian @ 2012-06-05 10:38 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, Jun 5, 2012 at 12:27 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Tue, 2012-06-05 at 12:21 +0200, Stephane Eranian wrote:
>> I agree with the first part of the patch in intel_try_alt_er(), we
>> should not touch
>> the actual event struct. But I am still unclear about the reg->alloc part.
>
> reg->alloc is part of the actual event.
>
Ok, I missed that (despite writing some of that code ;-<)

> Thing is, the patch is horridly ugly.. while I agree that changing event
> state isn't good, special casing all that code isn't good either.

Yeah, not pretty.

>
> I was looking at cloning the events for validate_group() as well, but so
> far that's not turning out too pretty either.

How about we add a field or flag to cpuc to tell it's fake, and then in
try_alt_er() and __intel_shared_reg_get_constraints() we avoid touching
live struct (like reg->alloc) if fake==1. I think he was trying to do the same
with the core_id == -1 test.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 10:38           ` Stephane Eranian
@ 2012-06-05 12:07             ` Peter Zijlstra
  2012-06-05 12:39               ` Peter Zijlstra
  0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2012-06-05 12:07 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, 2012-06-05 at 12:38 +0200, Stephane Eranian wrote:
> How about we add a field or flag to cpuc to tell it's fake, and then
> in
> try_alt_er() and __intel_shared_reg_get_constraints() we avoid
> touching
> live struct (like reg->alloc) if fake==1. I think he was trying to do
> the same with the core_id == -1 test. 

We might have to do something like that, however I'm trying to figure
out when that reg->alloc test in __intel_shared_reg_get_contraints() is
useful.

If it is useful in event scheduling, we cannot just leave it out in
validate_group().

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 12:07             ` Peter Zijlstra
@ 2012-06-05 12:39               ` Peter Zijlstra
  2012-06-05 12:51                 ` Stephane Eranian
  0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2012-06-05 12:39 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, 2012-06-05 at 14:07 +0200, Peter Zijlstra wrote:
> On Tue, 2012-06-05 at 12:38 +0200, Stephane Eranian wrote:
> > How about we add a field or flag to cpuc to tell it's fake, and then
> > in
> > try_alt_er() and __intel_shared_reg_get_constraints() we avoid
> > touching
> > live struct (like reg->alloc) if fake==1. I think he was trying to do
> > the same with the core_id == -1 test. 
> 
> We might have to do something like that, however I'm trying to figure
> out when that reg->alloc test in __intel_shared_reg_get_contraints() is
> useful.
> 
> If it is useful in event scheduling, we cannot just leave it out in
> validate_group().

OK, so x86_schedule_events() can call that multiple times on the same
event in case we keep adding events.. it needs the constraints of the
previous events as well, and in that case we skip the whole extra_reg
muck.

But for validate_group() we only do this once, so it should work. Nasty
though.




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 12:39               ` Peter Zijlstra
@ 2012-06-05 12:51                 ` Stephane Eranian
  2012-06-05 13:04                   ` Peter Zijlstra
  0 siblings, 1 reply; 27+ messages in thread
From: Stephane Eranian @ 2012-06-05 12:51 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, Jun 5, 2012 at 2:39 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, 2012-06-05 at 14:07 +0200, Peter Zijlstra wrote:
>> On Tue, 2012-06-05 at 12:38 +0200, Stephane Eranian wrote:
>> > How about we add a field or flag to cpuc to tell it's fake, and then
>> > in
>> > try_alt_er() and __intel_shared_reg_get_constraints() we avoid
>> > touching
>> > live struct (like reg->alloc) if fake==1. I think he was trying to do
>> > the same with the core_id == -1 test.
>>
>> We might have to do something like that, however I'm trying to figure
>> out when that reg->alloc test in __intel_shared_reg_get_contraints() is
>> useful.
>>
>> If it is useful in event scheduling, we cannot just leave it out in
>> validate_group().
>
> OK, so x86_schedule_events() can call that multiple times on the same
> event in case we keep adding events.. it needs the constraints of the
> previous events as well, and in that case we skip the whole extra_reg
> muck.

That reg->alloc is to avoid going through that shared MSR allocation
code again because we already have a valid assignment. It helps with
incremental scheduling of events (collect() -> schedule()).
>
> But for validate_group() we only do this once, so it should work. Nasty
> though.

True, only one pass. So I think if you say in:
__intel_shared_reg_put_constraints()

     if (!cpuc->is_fake)
           reg->alloc = 1;

That should do it given that:

__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
                                   struct hw_perf_event_extra *reg)
{
        struct er_account *era;
        if (!reg->alloc)
                return;

}

But then, if reg->alloc was already set to 1, you will have a problem
if you leave
it as is. So I think in __intel_shared_reg_put_constraints(), you still need:

__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
                                   struct hw_perf_event_extra *reg)
{
       if (!reg->alloc || cpuc->is_fake)
          return;

Or something like that.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 12:51                 ` Stephane Eranian
@ 2012-06-05 13:04                   ` Peter Zijlstra
  2012-06-05 13:30                     ` [PATCH] perf, x86: Fix Intel shared extra MSR allocation Peter Zijlstra
  2012-06-05 13:31                     ` [PATCH] perf: Fix intel shared extra msr allocation Stephane Eranian
  0 siblings, 2 replies; 27+ messages in thread
From: Peter Zijlstra @ 2012-06-05 13:04 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, 2012-06-05 at 14:51 +0200, Stephane Eranian wrote:
> So I think if you say in:
> __intel_shared_reg_put_constraints()
> 
>      if (!cpuc->is_fake)
>            reg->alloc = 1;
> 
> That should do it given that:
> 
> __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
>                                    struct hw_perf_event_extra *reg)
> {
>         struct er_account *era;
>         if (!reg->alloc)
>                 return;
> 
> }
> 
> But then, if reg->alloc was already set to 1, you will have a problem
> if you leave
> it as is. So I think in __intel_shared_reg_put_constraints(), you still need:
> 
> __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
>                                    struct hw_perf_event_extra *reg)
> {
>        if (!reg->alloc || cpuc->is_fake)
>           return;
> 
> Or something like that. 

Right, and then something slightly less hideous for intel_try_alt_er().

How does something like this look?

---
 arch/x86/kernel/cpu/perf_event_intel.c |   43 +++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..a3b7eb3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
 {
 	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
-		return false;
+		return idx;
 
-	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+	if (idx == EXTRA_REG_RSP_0)
+		return EXTRA_REG_RSP_1;
+
+	if (idx == EXTRA_REG_RSP_1)
+		return EXTRA_REG_RSP_0;
+
+	return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+	if (idx == EXTRA_REG_RSP_0) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
 		event->hw.config |= 0x01bb;
 		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
-	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+	} else if (idx == EXTRA_REG_RSP_1) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
 		event->hw.config |= 0x01b7;
 		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
 	}
-
-	if (event->hw.extra_reg.idx == orig_idx)
-		return false;
-
-	return true;
 }
 
 /*
@@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 	struct event_constraint *c = &emptyconstraint;
 	struct er_account *era;
 	unsigned long flags;
-	int orig_idx = reg->idx;
+	int idx = reg->idx;
 
 	/* already allocated shared msr */
 	if (reg->alloc)
 		return NULL; /* call x86_get_event_constraint() */
 
 again:
-	era = &cpuc->shared_regs->regs[reg->idx];
+	era = &cpuc->shared_regs->regs[idx];
 	/*
 	 * we use spin_lock_irqsave() to avoid lockdep issues when
 	 * passing a fake cpuc
@@ -1181,16 +1187,23 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 		atomic_inc(&era->ref);
 
 		/* no need to reallocate during incremental event scheduling */
-		reg->alloc = 1;
+		if (!cpuc->is_fake) {
+			if (idx != reg->idx)
+				intel_fixup_er(event, idx);
+			reg->alloc = 1;
+		}
 
 		/*
 		 * need to call x86_get_event_constraint()
 		 * to check if associated event has constraints
 		 */
 		c = NULL;
-	} else if (intel_try_alt_er(event, orig_idx)) {
-		raw_spin_unlock_irqrestore(&era->lock, flags);
-		goto again;
+	} else {
+		idx = intel_alt_er(idx);
+		if (idx != reg->idx) {
+			raw_spin_unlock_irqrestore(&era->lock, flags);
+			goto again;
+		}
 	}
 	raw_spin_unlock_irqrestore(&era->lock, flags);
 


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH] perf, x86: Fix Intel shared extra MSR allocation
  2012-06-05 13:04                   ` Peter Zijlstra
@ 2012-06-05 13:30                     ` Peter Zijlstra
  2012-06-05 13:56                       ` Peter Zijlstra
                                         ` (2 more replies)
  2012-06-05 13:31                     ` [PATCH] perf: Fix intel shared extra msr allocation Stephane Eranian
  1 sibling, 3 replies; 27+ messages in thread
From: Peter Zijlstra @ 2012-06-05 13:30 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

Subject: perf: Fix Intel shared extra MSR allocation

Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <zheng.z.yan@linux.intel.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/x86/kernel/cpu/perf_event.c       |    1 +
 arch/x86/kernel/cpu/perf_event.h       |    1 +
 arch/x86/kernel/cpu/perf_event_intel.c |   74 +++++++++++++++++++++++---------
 3 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6d..cb60838 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
 		if (!cpuc->shared_regs)
 			goto error;
 	}
+	cpuc->is_fake = 1;
 	return cpuc;
 error:
 	free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf..83794d8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 
 	unsigned int		group_flag;
+	int			is_fake;
 
 	/*
 	 * Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..6d00c42 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
 {
 	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
-		return false;
+		return idx;
 
-	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+	if (idx == EXTRA_REG_RSP_0)
+		return EXTRA_REG_RSP_1;
+
+	if (idx == EXTRA_REG_RSP_1)
+		return EXTRA_REG_RSP_0;
+
+	return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+	if (idx == EXTRA_REG_RSP_0) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
 		event->hw.config |= 0x01bb;
 		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
-	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+	} else if (idx == EXTRA_REG_RSP_1) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
 		event->hw.config |= 0x01b7;
 		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
 	}
-
-	if (event->hw.extra_reg.idx == orig_idx)
-		return false;
-
-	return true;
 }
 
 /*
@@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 	struct event_constraint *c = &emptyconstraint;
 	struct er_account *era;
 	unsigned long flags;
-	int orig_idx = reg->idx;
+	int idx = reg->idx;
 
 	/* already allocated shared msr */
 	if (reg->alloc)
 		return NULL; /* call x86_get_event_constraint() */
 
 again:
-	era = &cpuc->shared_regs->regs[reg->idx];
+	era = &cpuc->shared_regs->regs[idx];
 	/*
 	 * we use spin_lock_irqsave() to avoid lockdep issues when
 	 * passing a fake cpuc
@@ -1173,6 +1179,29 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 
 	if (!atomic_read(&era->ref) || era->config == reg->config) {
 
+		/*
+		 * If its a fake cpuc -- as per validate_{group,event}() we
+		 * shouldn't touch event state and we can avoid doing so
+		 * since both will only call get_event_constraints() once
+		 * on each event, this avoids the need for reg->alloc.
+		 *
+		 * Not doing the ER fixup will only result in era->reg being
+		 * wrong, but since we won't actually try and program hardware
+		 * this isn't a problem either.
+		 */
+		if (!cpuc->is_fake) {
+			if (idx != reg->idx)
+				intel_fixup_er(event, idx);
+
+			/* 
+			 * x86_schedule_events() can call get_event_constraints()
+			 * multiple times on events in the case of incremental
+			 * scheduling(). reg->alloc ensures we only do the ER
+			 * allocation once.
+			 */
+			reg->alloc = 1;
+		}
+
 		/* lock in msr value */
 		era->config = reg->config;
 		era->reg = reg->reg;
@@ -1180,17 +1209,17 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 		/* one more user */
 		atomic_inc(&era->ref);
 
-		/* no need to reallocate during incremental event scheduling */
-		reg->alloc = 1;
-
 		/*
 		 * need to call x86_get_event_constraint()
 		 * to check if associated event has constraints
 		 */
 		c = NULL;
-	} else if (intel_try_alt_er(event, orig_idx)) {
-		raw_spin_unlock_irqrestore(&era->lock, flags);
-		goto again;
+	} else {
+		idx = intel_alt_er(idx);
+		if (idx != reg->idx) {
+			raw_spin_unlock_irqrestore(&era->lock, flags);
+			goto again;
+		}
 	}
 	raw_spin_unlock_irqrestore(&era->lock, flags);
 
@@ -1204,11 +1233,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
 	struct er_account *era;
 
 	/*
-	 * only put constraint if extra reg was actually
-	 * allocated. Also takes care of event which do
-	 * not use an extra shared reg
+	 * Only put constraint if extra reg was actually allocated. Also takes
+	 * care of event which do not use an extra shared reg.
+	 *
+	 * Also, if this is a fake cpuc we shouldn't touch any event state
+	 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+	 * either since it'll be thrown out.
 	 */
-	if (!reg->alloc)
+	if (!reg->alloc || cpuc->is_fake)
 		return;
 
 	era = &cpuc->shared_regs->regs[reg->idx];


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 13:04                   ` Peter Zijlstra
  2012-06-05 13:30                     ` [PATCH] perf, x86: Fix Intel shared extra MSR allocation Peter Zijlstra
@ 2012-06-05 13:31                     ` Stephane Eranian
  2012-06-05 13:32                       ` Peter Zijlstra
  1 sibling, 1 reply; 27+ messages in thread
From: Stephane Eranian @ 2012-06-05 13:31 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

Looks nicer, still missing this little piece + is_fake definition and
setting is allocate_fakecpuc())

@@ -1210,7 +1210,7 @@ __intel_shared_reg_put_constraints(struct
cpu_hw_events *cpuc,
         * allocated. Also takes care of event which do
         * not use an extra shared reg
         */
-       if (!reg->alloc)
+       if (!reg->alloc || cpuc->is_fake)
                return;


On Tue, Jun 5, 2012 at 3:04 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, 2012-06-05 at 14:51 +0200, Stephane Eranian wrote:
>> So I think if you say in:
>> __intel_shared_reg_put_constraints()
>>
>>      if (!cpuc->is_fake)
>>            reg->alloc = 1;
>>
>> That should do it given that:
>>
>> __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
>>                                    struct hw_perf_event_extra *reg)
>> {
>>         struct er_account *era;
>>         if (!reg->alloc)
>>                 return;
>>
>> }
>>
>> But then, if reg->alloc was already set to 1, you will have a problem
>> if you leave
>> it as is. So I think in __intel_shared_reg_put_constraints(), you still need:
>>
>> __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
>>                                    struct hw_perf_event_extra *reg)
>> {
>>        if (!reg->alloc || cpuc->is_fake)
>>           return;
>>
>> Or something like that.
>
> Right, and then something slightly less hideous for intel_try_alt_er().
>
> How does something like this look?
>
> ---
>  arch/x86/kernel/cpu/perf_event_intel.c |   43 +++++++++++++++++++++-----------
>  1 file changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 166546e..a3b7eb3 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
>        return NULL;
>  }
>
> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
> +static int intel_alt_er(int idx)
>  {
>        if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
> -               return false;
> +               return idx;
>
> -       if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
> +       if (idx == EXTRA_REG_RSP_0)
> +               return EXTRA_REG_RSP_1;
> +
> +       if (idx == EXTRA_REG_RSP_1)
> +               return EXTRA_REG_RSP_0;
> +
> +       return idx;
> +}
> +
> +static void intel_fixup_er(struct perf_event *event, int idx)
> +{
> +       if (idx == EXTRA_REG_RSP_0) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01bb;
>                event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
> -       } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
> +       } else if (idx == EXTRA_REG_RSP_1) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01b7;
>                event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
>        }
> -
> -       if (event->hw.extra_reg.idx == orig_idx)
> -               return false;
> -
> -       return true;
>  }
>
>  /*
> @@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>        struct event_constraint *c = &emptyconstraint;
>        struct er_account *era;
>        unsigned long flags;
> -       int orig_idx = reg->idx;
> +       int idx = reg->idx;
>
>        /* already allocated shared msr */
>        if (reg->alloc)
>                return NULL; /* call x86_get_event_constraint() */
>
>  again:
> -       era = &cpuc->shared_regs->regs[reg->idx];
> +       era = &cpuc->shared_regs->regs[idx];
>        /*
>         * we use spin_lock_irqsave() to avoid lockdep issues when
>         * passing a fake cpuc
> @@ -1181,16 +1187,23 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>                atomic_inc(&era->ref);
>
>                /* no need to reallocate during incremental event scheduling */
> -               reg->alloc = 1;
> +               if (!cpuc->is_fake) {
> +                       if (idx != reg->idx)
> +                               intel_fixup_er(event, idx);
> +                       reg->alloc = 1;
> +               }
>
>                /*
>                 * need to call x86_get_event_constraint()
>                 * to check if associated event has constraints
>                 */
>                c = NULL;
> -       } else if (intel_try_alt_er(event, orig_idx)) {
> -               raw_spin_unlock_irqrestore(&era->lock, flags);
> -               goto again;
> +       } else {
> +               idx = intel_alt_er(idx);
> +               if (idx != reg->idx) {
> +                       raw_spin_unlock_irqrestore(&era->lock, flags);
> +                       goto again;
> +               }
>        }
>        raw_spin_unlock_irqrestore(&era->lock, flags);
>
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 13:31                     ` [PATCH] perf: Fix intel shared extra msr allocation Stephane Eranian
@ 2012-06-05 13:32                       ` Peter Zijlstra
  2012-06-05 13:38                         ` Stephane Eranian
  0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2012-06-05 13:32 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, 2012-06-05 at 15:31 +0200, Stephane Eranian wrote:
> Looks nicer, still missing this little piece + is_fake definition and
> setting is allocate_fakecpuc())
> 
Yeah, just send out something more complete -- although only built
tested.

This was just seeing how the intel_try_alt_er() stuff could be done
saner.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 13:32                       ` Peter Zijlstra
@ 2012-06-05 13:38                         ` Stephane Eranian
  2012-06-05 13:47                           ` Peter Zijlstra
  0 siblings, 1 reply; 27+ messages in thread
From: Stephane Eranian @ 2012-06-05 13:38 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, Jun 5, 2012 at 3:32 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, 2012-06-05 at 15:31 +0200, Stephane Eranian wrote:
>> Looks nicer, still missing this little piece + is_fake definition and
>> setting is allocate_fakecpuc())
>>
> Yeah, just send out something more complete -- although only built
> tested.
>
> This was just seeing how the intel_try_alt_er() stuff could be done
> saner.

Will try it on my systems. Though it appears the patch has some
^M all over.
Thanks for looking into this today.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 13:38                         ` Stephane Eranian
@ 2012-06-05 13:47                           ` Peter Zijlstra
  2012-06-05 13:51                             ` Stephane Eranian
  0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2012-06-05 13:47 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, 2012-06-05 at 15:38 +0200, Stephane Eranian wrote:

> Will try it on my systems. Though it appears the patch has some
> ^M all over.

Yeah, stupid evolution can only do quoted-printable these days..
progress they call this :/

I can send you an awk script to unmangle it if you want.. but git can do
it too somehow.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 13:47                           ` Peter Zijlstra
@ 2012-06-05 13:51                             ` Stephane Eranian
  2012-06-06 10:12                               ` Stephane Eranian
  0 siblings, 1 reply; 27+ messages in thread
From: Stephane Eranian @ 2012-06-05 13:51 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, Jun 5, 2012 at 3:47 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, 2012-06-05 at 15:38 +0200, Stephane Eranian wrote:
>
>> Will try it on my systems. Though it appears the patch has some
>> ^M all over.
>
> Yeah, stupid evolution can only do quoted-printable these days..
> progress they call this :/
>
> I can send you an awk script to unmangle it if you want.. but git can do
> it too somehow.

Will try with git, otherwise can do it manually, patch is small enough.
Thanks.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation
  2012-06-05 13:30                     ` [PATCH] perf, x86: Fix Intel shared extra MSR allocation Peter Zijlstra
@ 2012-06-05 13:56                       ` Peter Zijlstra
  2012-06-05 21:26                         ` Stephane Eranian
  2012-06-06  1:00                         ` Yan, Zheng
  2012-06-06 15:57                       ` [tip:perf/core] perf/x86: " tip-bot for Peter Zijlstra
  2012-06-06 16:11                       ` tip-bot for Peter Zijlstra
  2 siblings, 2 replies; 27+ messages in thread
From: Peter Zijlstra @ 2012-06-05 13:56 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

On Tue, 2012-06-05 at 15:30 +0200, Peter Zijlstra wrote:

> @@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>  	struct event_constraint *c = &emptyconstraint;
>  	struct er_account *era;
>  	unsigned long flags;
> -	int orig_idx = reg->idx;
> +	int idx = reg->idx;
>  
>  	/* already allocated shared msr */
>  	if (reg->alloc)
>  		return NULL; /* call x86_get_event_constraint() */

I'm afraid that needs to be:

	/*
	 * reg->alloc can be set due to existing state, so for fake cpuc
	 * we need to ignore this, otherwise we might fail to allocate
	 * proper fake state for this extra reg constraint. Also see
	 * the comment below.
	 */
	if (reg->alloc && !cpuc->is_fake)
		return NULL; /* call x86_get_event_constraints() */

>  
>  again:
> -	era = &cpuc->shared_regs->regs[reg->idx];
> +	era = &cpuc->shared_regs->regs[idx];
>  	/*
>  	 * we use spin_lock_irqsave() to avoid lockdep issues when
>  	 * passing a fake cpuc
> @@ -1173,6 +1179,29 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>  
>  	if (!atomic_read(&era->ref) || era->config == reg->config) {
>  
> +		/*
> +		 * If its a fake cpuc -- as per validate_{group,event}() we
> +		 * shouldn't touch event state and we can avoid doing so
> +		 * since both will only call get_event_constraints() once
> +		 * on each event, this avoids the need for reg->alloc.
> +		 *
> +		 * Not doing the ER fixup will only result in era->reg being
> +		 * wrong, but since we won't actually try and program hardware
> +		 * this isn't a problem either.
> +		 */
> +		if (!cpuc->is_fake) {
> +			if (idx != reg->idx)
> +				intel_fixup_er(event, idx);
> +
> +			/* 
> +			 * x86_schedule_events() can call get_event_constraints()
> +			 * multiple times on events in the case of incremental
> +			 * scheduling(). reg->alloc ensures we only do the ER
> +			 * allocation once.
> +			 */
> +			reg->alloc = 1;
> +		}
> +
>  		/* lock in msr value */
>  		era->config = reg->config;
>  		era->reg = reg->reg;


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation
  2012-06-05 13:56                       ` Peter Zijlstra
@ 2012-06-05 21:26                         ` Stephane Eranian
  2012-06-06  1:00                         ` Yan, Zheng
  1 sibling, 0 replies; 27+ messages in thread
From: Stephane Eranian @ 2012-06-05 21:26 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 3058 bytes --]

On Tue, Jun 5, 2012 at 3:56 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, 2012-06-05 at 15:30 +0200, Peter Zijlstra wrote:
>
>> @@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>>       struct event_constraint *c = &emptyconstraint;
>>       struct er_account *era;
>>       unsigned long flags;
>> -     int orig_idx = reg->idx;
>> +     int idx = reg->idx;
>>
>>       /* already allocated shared msr */
>>       if (reg->alloc)
>>               return NULL; /* call x86_get_event_constraint() */
>
> I'm afraid that needs to be:
>
>        /*
>         * reg->alloc can be set due to existing state, so for fake cpuc
>         * we need to ignore this, otherwise we might fail to allocate
>         * proper fake state for this extra reg constraint. Also see
>         * the comment below.
>         */
>        if (reg->alloc && !cpuc->is_fake)
>                return NULL; /* call x86_get_event_constraints() */
>
>>
Yes.

>>  again:
>> -     era = &cpuc->shared_regs->regs[reg->idx];
>> +     era = &cpuc->shared_regs->regs[idx];
>>       /*
>>        * we use spin_lock_irqsave() to avoid lockdep issues when
>>        * passing a fake cpuc
>> @@ -1173,6 +1179,29 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>>
>>       if (!atomic_read(&era->ref) || era->config == reg->config) {
>>
>> +             /*
>> +              * If its a fake cpuc -- as per validate_{group,event}() we
>> +              * shouldn't touch event state and we can avoid doing so
>> +              * since both will only call get_event_constraints() once
>> +              * on each event, this avoids the need for reg->alloc.
>> +              *
>> +              * Not doing the ER fixup will only result in era->reg being
>> +              * wrong, but since we won't actually try and program hardware
>> +              * this isn't a problem either.
>> +              */
>> +             if (!cpuc->is_fake) {
>> +                     if (idx != reg->idx)
>> +                             intel_fixup_er(event, idx);
>> +
>> +                     /*
>> +                      * x86_schedule_events() can call get_event_constraints()
>> +                      * multiple times on events in the case of incremental
>> +                      * scheduling(). reg->alloc ensures we only do the ER
>> +                      * allocation once.
>> +                      */
>> +                     reg->alloc = 1;
>> +             }
>> +
>>               /* lock in msr value */
>>               era->config = reg->config;
>>               era->reg = reg->reg;
>
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation
  2012-06-05 13:56                       ` Peter Zijlstra
  2012-06-05 21:26                         ` Stephane Eranian
@ 2012-06-06  1:00                         ` Yan, Zheng
  1 sibling, 0 replies; 27+ messages in thread
From: Yan, Zheng @ 2012-06-06  1:00 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Stephane Eranian, linux-kernel

On 06/05/2012 09:56 PM, Peter Zijlstra wrote:
>> @@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>> >  	struct event_constraint *c = &emptyconstraint;
>> >  	struct er_account *era;
>> >  	unsigned long flags;
>> > -	int orig_idx = reg->idx;
>> > +	int idx = reg->idx;
>> >  
>> >  	/* already allocated shared msr */
>> >  	if (reg->alloc)
>> >  		return NULL; /* call x86_get_event_constraint() */
> I'm afraid that needs to be:
> 
> 	/*
> 	 * reg->alloc can be set due to existing state, so for fake cpuc
> 	 * we need to ignore this, otherwise we might fail to allocate
> 	 * proper fake state for this extra reg constraint. Also see
> 	 * the comment below.
> 	 */
> 	if (reg->alloc && !cpuc->is_fake)
> 		return NULL; /* call x86_get_event_constraints() */
> 
Agree

Regards
Yan, Zheng




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-05 13:51                             ` Stephane Eranian
@ 2012-06-06 10:12                               ` Stephane Eranian
  2012-06-07  1:25                                 ` Yan, Zheng
  2012-06-07  4:01                                 ` Yan, Zheng
  0 siblings, 2 replies; 27+ messages in thread
From: Stephane Eranian @ 2012-06-06 10:12 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Yan, Zheng, Yan, Zheng, linux-kernel

There is something wrong with this patch, I instrumented the code
and I can see:
[ 1377.324575] 1. idx=1 reg_idx=1 ref=-1 config=0xff01 era->config=0xff01
                                                   ^^^^^^
The test case on WSM (RSP0, RSP1):

$ perf stat  -a -C13 -e
offcore_response_1:dmnd_data_rd,offcore_response_1:dmnd_data_rd sleep
100 &
$ perf stat -a -C1 -e offcore_response_1:dmnd_rfo sleep 1

I think this happens during scheduling of the events, i.e., during the
run and not on initial
programming. That could happen with cgroups, for instance.

On Tue, Jun 5, 2012 at 3:51 PM, Stephane Eranian <eranian@google.com> wrote:
> On Tue, Jun 5, 2012 at 3:47 PM, Peter Zijlstra <peterz@infradead.org> wrote:
>> On Tue, 2012-06-05 at 15:38 +0200, Stephane Eranian wrote:
>>
>>> Will try it on my systems. Though it appears the patch has some
>>> ^M all over.
>>
>> Yeah, stupid evolution can only do quoted-printable these days..
>> progress they call this :/
>>
>> I can send you an awk script to unmangle it if you want.. but git can do
>> it too somehow.
>
> Will try with git, otherwise can do it manually, patch is small enough.
> Thanks.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [tip:perf/core] perf/x86: Fix Intel shared extra MSR allocation
  2012-06-05 13:30                     ` [PATCH] perf, x86: Fix Intel shared extra MSR allocation Peter Zijlstra
  2012-06-05 13:56                       ` Peter Zijlstra
@ 2012-06-06 15:57                       ` tip-bot for Peter Zijlstra
  2012-06-06 16:11                       ` tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 27+ messages in thread
From: tip-bot for Peter Zijlstra @ 2012-06-06 15:57 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, hpa, mingo, a.p.zijlstra, peterz,
	zheng.z.yan, tglx

Commit-ID:  b430f7c4706aeba4270c7ab7744fc504b9315e1c
Gitweb:     http://git.kernel.org/tip/b430f7c4706aeba4270c7ab7744fc504b9315e1c
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Tue, 5 Jun 2012 15:30:31 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 6 Jun 2012 16:59:44 +0200

perf/x86: Fix Intel shared extra MSR allocation

Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <zheng.z.yan@linux.intel.com>
Acked-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c       |    1 +
 arch/x86/kernel/cpu/perf_event.h       |    1 +
 arch/x86/kernel/cpu/perf_event_intel.c |   92 ++++++++++++++++++++++----------
 3 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6d..cb60838 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
 		if (!cpuc->shared_regs)
 			goto error;
 	}
+	cpuc->is_fake = 1;
 	return cpuc;
 error:
 	free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf..83794d8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 
 	unsigned int		group_flag;
+	int			is_fake;
 
 	/*
 	 * Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..965baa2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
 {
 	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
-		return false;
+		return idx;
 
-	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
-		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01bb;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
-		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
-	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+	if (idx == EXTRA_REG_RSP_0)
+		return EXTRA_REG_RSP_1;
+
+	if (idx == EXTRA_REG_RSP_1)
+		return EXTRA_REG_RSP_0;
+
+	return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+	event->hw.extra_reg.idx = idx;
+
+	if (idx == EXTRA_REG_RSP_0) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
 		event->hw.config |= 0x01b7;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+	} else if (idx == EXTRA_REG_RSP_1) {
+		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+		event->hw.config |= 0x01bb;
+		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
 	}
-
-	if (event->hw.extra_reg.idx == orig_idx)
-		return false;
-
-	return true;
 }
 
 /*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 	struct event_constraint *c = &emptyconstraint;
 	struct er_account *era;
 	unsigned long flags;
-	int orig_idx = reg->idx;
+	int idx = reg->idx;
 
-	/* already allocated shared msr */
-	if (reg->alloc)
+	/*
+	 * reg->alloc can be set due to existing state, so for fake cpuc we
+	 * need to ignore this, otherwise we might fail to allocate proper fake
+	 * state for this extra reg constraint. Also see the comment below.
+	 */
+	if (reg->alloc && !cpuc->is_fake)
 		return NULL; /* call x86_get_event_constraint() */
 
 again:
-	era = &cpuc->shared_regs->regs[reg->idx];
+	era = &cpuc->shared_regs->regs[idx];
 	/*
 	 * we use spin_lock_irqsave() to avoid lockdep issues when
 	 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
 
 	if (!atomic_read(&era->ref) || era->config == reg->config) {
 
+		/*
+		 * If its a fake cpuc -- as per validate_{group,event}() we
+		 * shouldn't touch event state and we can avoid doing so
+		 * since both will only call get_event_constraints() once
+		 * on each event, this avoids the need for reg->alloc.
+		 *
+		 * Not doing the ER fixup will only result in era->reg being
+		 * wrong, but since we won't actually try and program hardware
+		 * this isn't a problem either.
+		 */
+		if (!cpuc->is_fake) {
+			if (idx != reg->idx)
+				intel_fixup_er(event, idx);
+
+			/*
+			 * x86_schedule_events() can call get_event_constraints()
+			 * multiple times on events in the case of incremental
+			 * scheduling(). reg->alloc ensures we only do the ER
+			 * allocation once.
+			 */
+			reg->alloc = 1;
+		}
+
 		/* lock in msr value */
 		era->config = reg->config;
 		era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
 		/* one more user */
 		atomic_inc(&era->ref);
 
-		/* no need to reallocate during incremental event scheduling */
-		reg->alloc = 1;
-
 		/*
 		 * need to call x86_get_event_constraint()
 		 * to check if associated event has constraints
 		 */
 		c = NULL;
-	} else if (intel_try_alt_er(event, orig_idx)) {
-		raw_spin_unlock_irqrestore(&era->lock, flags);
-		goto again;
+	} else {
+		idx = intel_alt_er(idx);
+		if (idx != reg->idx) {
+			raw_spin_unlock_irqrestore(&era->lock, flags);
+			goto again;
+		}
 	}
 	raw_spin_unlock_irqrestore(&era->lock, flags);
 
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
 	struct er_account *era;
 
 	/*
-	 * only put constraint if extra reg was actually
-	 * allocated. Also takes care of event which do
-	 * not use an extra shared reg
+	 * Only put constraint if extra reg was actually allocated. Also takes
+	 * care of event which do not use an extra shared reg.
+	 *
+	 * Also, if this is a fake cpuc we shouldn't touch any event state
+	 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+	 * either since it'll be thrown out.
 	 */
-	if (!reg->alloc)
+	if (!reg->alloc || cpuc->is_fake)
 		return;
 
 	era = &cpuc->shared_regs->regs[reg->idx];

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [tip:perf/core] perf/x86: Fix Intel shared extra MSR allocation
  2012-06-05 13:30                     ` [PATCH] perf, x86: Fix Intel shared extra MSR allocation Peter Zijlstra
  2012-06-05 13:56                       ` Peter Zijlstra
  2012-06-06 15:57                       ` [tip:perf/core] perf/x86: " tip-bot for Peter Zijlstra
@ 2012-06-06 16:11                       ` tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 27+ messages in thread
From: tip-bot for Peter Zijlstra @ 2012-06-06 16:11 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, hpa, mingo, a.p.zijlstra, peterz,
	zheng.z.yan, tglx

Commit-ID:  5a425294ee7d4ab5a374248e85838dfd450caf75
Gitweb:     http://git.kernel.org/tip/5a425294ee7d4ab5a374248e85838dfd450caf75
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Tue, 5 Jun 2012 15:30:31 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 6 Jun 2012 17:22:26 +0200

perf/x86: Fix Intel shared extra MSR allocation

Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <zheng.z.yan@linux.intel.com>
Acked-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c       |    1 +
 arch/x86/kernel/cpu/perf_event.h       |    1 +
 arch/x86/kernel/cpu/perf_event_intel.c |   92 ++++++++++++++++++++++----------
 3 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6d..cb60838 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
 		if (!cpuc->shared_regs)
 			goto error;
 	}
+	cpuc->is_fake = 1;
 	return cpuc;
 error:
 	free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf..83794d8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 
 	unsigned int		group_flag;
+	int			is_fake;
 
 	/*
 	 * Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..965baa2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
 {
 	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
-		return false;
+		return idx;
 
-	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
-		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01bb;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
-		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
-	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+	if (idx == EXTRA_REG_RSP_0)
+		return EXTRA_REG_RSP_1;
+
+	if (idx == EXTRA_REG_RSP_1)
+		return EXTRA_REG_RSP_0;
+
+	return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+	event->hw.extra_reg.idx = idx;
+
+	if (idx == EXTRA_REG_RSP_0) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
 		event->hw.config |= 0x01b7;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+	} else if (idx == EXTRA_REG_RSP_1) {
+		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+		event->hw.config |= 0x01bb;
+		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
 	}
-
-	if (event->hw.extra_reg.idx == orig_idx)
-		return false;
-
-	return true;
 }
 
 /*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 	struct event_constraint *c = &emptyconstraint;
 	struct er_account *era;
 	unsigned long flags;
-	int orig_idx = reg->idx;
+	int idx = reg->idx;
 
-	/* already allocated shared msr */
-	if (reg->alloc)
+	/*
+	 * reg->alloc can be set due to existing state, so for fake cpuc we
+	 * need to ignore this, otherwise we might fail to allocate proper fake
+	 * state for this extra reg constraint. Also see the comment below.
+	 */
+	if (reg->alloc && !cpuc->is_fake)
 		return NULL; /* call x86_get_event_constraint() */
 
 again:
-	era = &cpuc->shared_regs->regs[reg->idx];
+	era = &cpuc->shared_regs->regs[idx];
 	/*
 	 * we use spin_lock_irqsave() to avoid lockdep issues when
 	 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
 
 	if (!atomic_read(&era->ref) || era->config == reg->config) {
 
+		/*
+		 * If its a fake cpuc -- as per validate_{group,event}() we
+		 * shouldn't touch event state and we can avoid doing so
+		 * since both will only call get_event_constraints() once
+		 * on each event, this avoids the need for reg->alloc.
+		 *
+		 * Not doing the ER fixup will only result in era->reg being
+		 * wrong, but since we won't actually try and program hardware
+		 * this isn't a problem either.
+		 */
+		if (!cpuc->is_fake) {
+			if (idx != reg->idx)
+				intel_fixup_er(event, idx);
+
+			/*
+			 * x86_schedule_events() can call get_event_constraints()
+			 * multiple times on events in the case of incremental
+			 * scheduling(). reg->alloc ensures we only do the ER
+			 * allocation once.
+			 */
+			reg->alloc = 1;
+		}
+
 		/* lock in msr value */
 		era->config = reg->config;
 		era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
 		/* one more user */
 		atomic_inc(&era->ref);
 
-		/* no need to reallocate during incremental event scheduling */
-		reg->alloc = 1;
-
 		/*
 		 * need to call x86_get_event_constraint()
 		 * to check if associated event has constraints
 		 */
 		c = NULL;
-	} else if (intel_try_alt_er(event, orig_idx)) {
-		raw_spin_unlock_irqrestore(&era->lock, flags);
-		goto again;
+	} else {
+		idx = intel_alt_er(idx);
+		if (idx != reg->idx) {
+			raw_spin_unlock_irqrestore(&era->lock, flags);
+			goto again;
+		}
 	}
 	raw_spin_unlock_irqrestore(&era->lock, flags);
 
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
 	struct er_account *era;
 
 	/*
-	 * only put constraint if extra reg was actually
-	 * allocated. Also takes care of event which do
-	 * not use an extra shared reg
+	 * Only put constraint if extra reg was actually allocated. Also takes
+	 * care of event which do not use an extra shared reg.
+	 *
+	 * Also, if this is a fake cpuc we shouldn't touch any event state
+	 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+	 * either since it'll be thrown out.
 	 */
-	if (!reg->alloc)
+	if (!reg->alloc || cpuc->is_fake)
 		return;
 
 	era = &cpuc->shared_regs->regs[reg->idx];

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-06 10:12                               ` Stephane Eranian
@ 2012-06-07  1:25                                 ` Yan, Zheng
  2012-06-07  4:01                                 ` Yan, Zheng
  1 sibling, 0 replies; 27+ messages in thread
From: Yan, Zheng @ 2012-06-07  1:25 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Peter Zijlstra, Yan, Zheng, linux-kernel

On 06/06/2012 06:12 PM, Stephane Eranian wrote:
> There is something wrong with this patch, I instrumented the code
> and I can see:
> [ 1377.324575] 1. idx=1 reg_idx=1 ref=-1 config=0xff01 era->config=0xff01
>                                                    ^^^^^^
> The test case on WSM (RSP0, RSP1):
> 
> $ perf stat  -a -C13 -e
> offcore_response_1:dmnd_data_rd,offcore_response_1:dmnd_data_rd sleep
> 100 &
> $ perf stat -a -C1 -e offcore_response_1:dmnd_rfo sleep 1
> 
> I think this happens during scheduling of the events, i.e., during the
> run and not on initial
> programming. That could happen with cgroups, for instance.
> 

Maybe we need adjust shared MSRs' reference counts in intel_fixup_er() ?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] perf: Fix intel shared extra msr allocation
  2012-06-06 10:12                               ` Stephane Eranian
  2012-06-07  1:25                                 ` Yan, Zheng
@ 2012-06-07  4:01                                 ` Yan, Zheng
  1 sibling, 0 replies; 27+ messages in thread
From: Yan, Zheng @ 2012-06-07  4:01 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: Peter Zijlstra, Yan, Zheng, linux-kernel

On 06/06/2012 06:12 PM, Stephane Eranian wrote:
> There is something wrong with this patch, I instrumented the code
> and I can see:
> [ 1377.324575] 1. idx=1 reg_idx=1 ref=-1 config=0xff01 era->config=0xff01
>                                                    ^^^^^^
> The test case on WSM (RSP0, RSP1):
> 
> $ perf stat  -a -C13 -e
> offcore_response_1:dmnd_data_rd,offcore_response_1:dmnd_data_rd sleep
> 100 &
> $ perf stat -a -C1 -e offcore_response_1:dmnd_rfo sleep 1
> 
> I think this happens during scheduling of the events, i.e., during the
> run and not on initial
> programming. That could happen with cgroups, for instance.
> 
The bug is in intel_fixup_er(), it should be:

static void intel_fixup_er(struct perf_event *event, int idx)
{
        if (idx == EXTRA_REG_RSP_0) {
                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
                event->hw.config |= 0x01b7;
                event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
        } else if (idx == EXTRA_REG_RSP_1) {
                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
                event->hw.config |= 0x01bb;
                event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
        }
}

Regards
Yan, Zheng

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2012-06-07  4:01 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-01  3:20 [PATCH] perf: Fix intel shared extra msr allocation Yan, Zheng
2012-06-01  9:35 ` Stephane Eranian
2012-06-01 14:11   ` Yan, Zheng
2012-06-04 13:12     ` Stephane Eranian
2012-06-05  2:18       ` Yan, Zheng
2012-06-05 10:14     ` Peter Zijlstra
2012-06-05 10:21       ` Stephane Eranian
2012-06-05 10:27         ` Peter Zijlstra
2012-06-05 10:38           ` Stephane Eranian
2012-06-05 12:07             ` Peter Zijlstra
2012-06-05 12:39               ` Peter Zijlstra
2012-06-05 12:51                 ` Stephane Eranian
2012-06-05 13:04                   ` Peter Zijlstra
2012-06-05 13:30                     ` [PATCH] perf, x86: Fix Intel shared extra MSR allocation Peter Zijlstra
2012-06-05 13:56                       ` Peter Zijlstra
2012-06-05 21:26                         ` Stephane Eranian
2012-06-06  1:00                         ` Yan, Zheng
2012-06-06 15:57                       ` [tip:perf/core] perf/x86: " tip-bot for Peter Zijlstra
2012-06-06 16:11                       ` tip-bot for Peter Zijlstra
2012-06-05 13:31                     ` [PATCH] perf: Fix intel shared extra msr allocation Stephane Eranian
2012-06-05 13:32                       ` Peter Zijlstra
2012-06-05 13:38                         ` Stephane Eranian
2012-06-05 13:47                           ` Peter Zijlstra
2012-06-05 13:51                             ` Stephane Eranian
2012-06-06 10:12                               ` Stephane Eranian
2012-06-07  1:25                                 ` Yan, Zheng
2012-06-07  4:01                                 ` Yan, Zheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).