From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755045AbbEZNiV (ORCPT ); Tue, 26 May 2015 09:38:21 -0400 Received: from mail-ob0-f180.google.com ([209.85.214.180]:35531 "EHLO mail-ob0-f180.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932256AbbEZNiR (ORCPT ); Tue, 26 May 2015 09:38:17 -0400 MIME-Version: 1.0 In-Reply-To: <20150522133135.545977325@infradead.org> References: <20150522132905.416122812@infradead.org> <20150522133135.545977325@infradead.org> Date: Tue, 26 May 2015 04:48:42 -0700 Message-ID: Subject: Re: [PATCH v2 03/11] perf/x86: Correct local vs remote sibling state From: Stephane Eranian To: Peter Zijlstra Cc: Ingo Molnar , Vince Weaver , Jiri Olsa , LKML Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Fri, May 22, 2015 at 6:29 AM, Peter Zijlstra wrote: > For some obscure reason the current code accounts the current SMT > thread's state on the remote thread and reads the remote's state on > the local SMT thread. > > While internally consistent, and 'correct' its pointless confusion we > can do without. > > Flip them the right way around. > > Signed-off-by: Peter Zijlstra (Intel) > --- > arch/x86/kernel/cpu/perf_event_intel.c | 79 +++++++++++++-------------------- > 1 file changed, 33 insertions(+), 46 deletions(-) > > --- a/arch/x86/kernel/cpu/perf_event_intel.c > +++ b/arch/x86/kernel/cpu/perf_event_intel.c > @@ -1903,9 +1903,8 @@ static void > intel_start_scheduling(struct cpu_hw_events *cpuc) > { > struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; > - struct intel_excl_states *xl, *xlo; > + struct intel_excl_states *xl; > int tid = cpuc->excl_thread_id; > - int o_tid = 1 - tid; /* sibling thread */ > > /* > * nothing needed if in group validation mode > @@ -1919,7 +1918,6 @@ intel_start_scheduling(struct cpu_hw_eve > if (!excl_cntrs) > return; > > - xlo = &excl_cntrs->states[o_tid]; > xl = &excl_cntrs->states[tid]; > > xl->sched_started = true; > @@ -1932,18 +1930,17 @@ intel_start_scheduling(struct cpu_hw_eve > raw_spin_lock(&excl_cntrs->lock); > > /* > - * save initial state of sibling thread > + * Save a copy of our state to work on. > */ > - memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state)); > + memcpy(xl->init_state, xl->state, sizeof(xl->init_state)); > } > > static void > intel_stop_scheduling(struct cpu_hw_events *cpuc) > { > struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; > - struct intel_excl_states *xl, *xlo; > + struct intel_excl_states *xl; > int tid = cpuc->excl_thread_id; > - int o_tid = 1 - tid; /* sibling thread */ > > /* > * nothing needed if in group validation mode > @@ -1956,13 +1953,12 @@ intel_stop_scheduling(struct cpu_hw_even > if (!excl_cntrs) > return; > > - xlo = &excl_cntrs->states[o_tid]; > xl = &excl_cntrs->states[tid]; > > /* > - * make new sibling thread state visible > + * Commit the working state. > */ > - memcpy(xlo->state, xlo->init_state, sizeof(xlo->state)); > + memcpy(xl->state, xl->init_state, sizeof(xl->state)); > > xl->sched_started = false; > /* > @@ -1977,10 +1973,9 @@ intel_get_excl_constraints(struct cpu_hw > { > struct event_constraint *cx; > struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; > - struct intel_excl_states *xl, *xlo; > - int is_excl, i; > + struct intel_excl_states *xlo; > int tid = cpuc->excl_thread_id; > - int o_tid = 1 - tid; /* alternate */ > + int is_excl, i; > > /* > * validating a group does not require > @@ -1994,23 +1989,6 @@ intel_get_excl_constraints(struct cpu_hw > */ > if (!excl_cntrs) > return c; > - /* > - * event requires exclusive counter access > - * across HT threads > - */ > - is_excl = c->flags & PERF_X86_EVENT_EXCL; > - if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { > - event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; > - if (!cpuc->n_excl++) > - WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); > - } > - > - /* > - * xl = state of current HT > - * xlo = state of sibling HT > - */ > - xl = &excl_cntrs->states[tid]; > - xlo = &excl_cntrs->states[o_tid]; > > cx = c; > > @@ -2054,6 +2032,22 @@ intel_get_excl_constraints(struct cpu_hw > */ > > /* > + * state of sibling HT > + */ > + xlo = &excl_cntrs->states[tid ^ 1]; > + > + /* > + * event requires exclusive counter access > + * across HT threads > + */ I think the comment needs to be changed to reflect what the test is doing. I would say: /* * account for exclusive counter usage. Needed to avoid * cross thread counter starvation problem with exclusive events. */ > + is_excl = c->flags & PERF_X86_EVENT_EXCL; > + if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { > + event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; > + if (!cpuc->n_excl++) > + WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); > + } > + > + /* > * Modify static constraint with current dynamic > * state of thread > * > @@ -2067,14 +2061,14 @@ intel_get_excl_constraints(struct cpu_hw > * our corresponding counter cannot be used > * regardless of our event > */ > - if (xl->state[i] == INTEL_EXCL_EXCLUSIVE) > + if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) > __clear_bit(i, cx->idxmsk); > /* > * if measuring an exclusive event, sibling > * measuring non-exclusive, then counter cannot > * be used > */ > - if (is_excl && xl->state[i] == INTEL_EXCL_SHARED) > + if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) > __clear_bit(i, cx->idxmsk); > } > > @@ -2124,10 +2118,9 @@ static void intel_put_excl_constraints(s > { > struct hw_perf_event *hwc = &event->hw; > struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; > - struct intel_excl_states *xlo, *xl; > - unsigned long flags = 0; /* keep compiler happy */ > int tid = cpuc->excl_thread_id; > - int o_tid = 1 - tid; > + struct intel_excl_states *xl; > + unsigned long flags = 0; /* keep compiler happy */ > > /* > * nothing needed if in group validation mode > @@ -2141,7 +2134,6 @@ static void intel_put_excl_constraints(s > return; > > xl = &excl_cntrs->states[tid]; > - xlo = &excl_cntrs->states[o_tid]; > if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { > hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; > if (!--cpuc->n_excl) > @@ -2161,7 +2153,7 @@ static void intel_put_excl_constraints(s > * counter state as unused now > */ > if (hwc->idx >= 0) > - xlo->state[hwc->idx] = INTEL_EXCL_UNUSED; > + xl->state[hwc->idx] = INTEL_EXCL_UNUSED; > > if (!xl->sched_started) > raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags); > @@ -2200,16 +2192,12 @@ static void intel_commit_scheduling(stru > { > struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; > struct event_constraint *c = cpuc->event_constraint[idx]; > - struct intel_excl_states *xlo, *xl; > + struct intel_excl_states *xl; > int tid = cpuc->excl_thread_id; > - int o_tid = 1 - tid; > - int is_excl; > > if (cpuc->is_fake || !c) > return; > > - is_excl = c->flags & PERF_X86_EVENT_EXCL; > - > if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) > return; > > @@ -2219,15 +2207,14 @@ static void intel_commit_scheduling(stru > return; > > xl = &excl_cntrs->states[tid]; > - xlo = &excl_cntrs->states[o_tid]; > > WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock)); > > if (cntr >= 0) { > - if (is_excl) > - xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; > + if (c->flags & PERF_X86_EVENT_EXCL) > + xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; > else > - xlo->init_state[cntr] = INTEL_EXCL_SHARED; > + xl->init_state[cntr] = INTEL_EXCL_SHARED; > } > } > > >