linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Stephane Eranian <eranian@google.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Vince Weaver <vincent.weaver@maine.edu>,
	Jiri Olsa <jolsa@redhat.com>, LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v2 03/11] perf/x86: Correct local vs remote sibling state
Date: Tue, 26 May 2015 04:48:42 -0700	[thread overview]
Message-ID: <CABPqkBRyqJP6AaAmhzg+F0-UnwRe3m_bTiURJaowSyEAr4R0Jg@mail.gmail.com> (raw)
In-Reply-To: <20150522133135.545977325@infradead.org>

On Fri, May 22, 2015 at 6:29 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> For some obscure reason the current code accounts the current SMT
> thread's state on the remote thread and reads the remote's state on
> the local SMT thread.
>
> While internally consistent, and 'correct' its pointless confusion we
> can do without.
>
> Flip them the right way around.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/kernel/cpu/perf_event_intel.c |   79 +++++++++++++--------------------
>  1 file changed, 33 insertions(+), 46 deletions(-)
>
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1903,9 +1903,8 @@ static void
>  intel_start_scheduling(struct cpu_hw_events *cpuc)
>  {
>         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
> -       struct intel_excl_states *xl, *xlo;
> +       struct intel_excl_states *xl;
>         int tid = cpuc->excl_thread_id;
> -       int o_tid = 1 - tid; /* sibling thread */
>
>         /*
>          * nothing needed if in group validation mode
> @@ -1919,7 +1918,6 @@ intel_start_scheduling(struct cpu_hw_eve
>         if (!excl_cntrs)
>                 return;
>
> -       xlo = &excl_cntrs->states[o_tid];
>         xl = &excl_cntrs->states[tid];
>
>         xl->sched_started = true;
> @@ -1932,18 +1930,17 @@ intel_start_scheduling(struct cpu_hw_eve
>         raw_spin_lock(&excl_cntrs->lock);
>
>         /*
> -        * save initial state of sibling thread
> +        * Save a copy of our state to work on.
>          */
> -       memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
> +       memcpy(xl->init_state, xl->state, sizeof(xl->init_state));
>  }
>
>  static void
>  intel_stop_scheduling(struct cpu_hw_events *cpuc)
>  {
>         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
> -       struct intel_excl_states *xl, *xlo;
> +       struct intel_excl_states *xl;
>         int tid = cpuc->excl_thread_id;
> -       int o_tid = 1 - tid; /* sibling thread */
>
>         /*
>          * nothing needed if in group validation mode
> @@ -1956,13 +1953,12 @@ intel_stop_scheduling(struct cpu_hw_even
>         if (!excl_cntrs)
>                 return;
>
> -       xlo = &excl_cntrs->states[o_tid];
>         xl = &excl_cntrs->states[tid];
>
>         /*
> -        * make new sibling thread state visible
> +        * Commit the working state.
>          */
> -       memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
> +       memcpy(xl->state, xl->init_state, sizeof(xl->state));
>
>         xl->sched_started = false;
>         /*
> @@ -1977,10 +1973,9 @@ intel_get_excl_constraints(struct cpu_hw
>  {
>         struct event_constraint *cx;
>         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
> -       struct intel_excl_states *xl, *xlo;
> -       int is_excl, i;
> +       struct intel_excl_states *xlo;
>         int tid = cpuc->excl_thread_id;
> -       int o_tid = 1 - tid; /* alternate */
> +       int is_excl, i;
>
>         /*
>          * validating a group does not require
> @@ -1994,23 +1989,6 @@ intel_get_excl_constraints(struct cpu_hw
>          */
>         if (!excl_cntrs)
>                 return c;
> -       /*
> -        * event requires exclusive counter access
> -        * across HT threads
> -        */
> -       is_excl = c->flags & PERF_X86_EVENT_EXCL;
> -       if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
> -               event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
> -               if (!cpuc->n_excl++)
> -                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
> -       }
> -
> -       /*
> -        * xl = state of current HT
> -        * xlo = state of sibling HT
> -        */
> -       xl = &excl_cntrs->states[tid];
> -       xlo = &excl_cntrs->states[o_tid];
>
>         cx = c;
>
> @@ -2054,6 +2032,22 @@ intel_get_excl_constraints(struct cpu_hw
>          */
>
>         /*
> +        * state of sibling HT
> +        */
> +       xlo = &excl_cntrs->states[tid ^ 1];
> +
> +       /*
> +        * event requires exclusive counter access
> +        * across HT threads
> +        */
I think the comment needs to be changed to reflect what the
test is doing. I would say:
/*
 * account for exclusive counter usage. Needed to avoid
 * cross thread counter starvation problem with exclusive events.
 */
> +       is_excl = c->flags & PERF_X86_EVENT_EXCL;
> +       if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
> +               event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
> +               if (!cpuc->n_excl++)
> +                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
> +       }
> +
> +       /*
>          * Modify static constraint with current dynamic
>          * state of thread
>          *
> @@ -2067,14 +2061,14 @@ intel_get_excl_constraints(struct cpu_hw
>                  * our corresponding counter cannot be used
>                  * regardless of our event
>                  */
> -               if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
> +               if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
>                         __clear_bit(i, cx->idxmsk);
>                 /*
>                  * if measuring an exclusive event, sibling
>                  * measuring non-exclusive, then counter cannot
>                  * be used
>                  */
> -               if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
> +               if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
>                         __clear_bit(i, cx->idxmsk);
>         }
>
> @@ -2124,10 +2118,9 @@ static void intel_put_excl_constraints(s
>  {
>         struct hw_perf_event *hwc = &event->hw;
>         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
> -       struct intel_excl_states *xlo, *xl;
> -       unsigned long flags = 0; /* keep compiler happy */
>         int tid = cpuc->excl_thread_id;
> -       int o_tid = 1 - tid;
> +       struct intel_excl_states *xl;
> +       unsigned long flags = 0; /* keep compiler happy */
>
>         /*
>          * nothing needed if in group validation mode
> @@ -2141,7 +2134,6 @@ static void intel_put_excl_constraints(s
>                 return;
>
>         xl = &excl_cntrs->states[tid];
> -       xlo = &excl_cntrs->states[o_tid];
>         if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
>                 hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
>                 if (!--cpuc->n_excl)
> @@ -2161,7 +2153,7 @@ static void intel_put_excl_constraints(s
>          * counter state as unused now
>          */
>         if (hwc->idx >= 0)
> -               xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
> +               xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
>
>         if (!xl->sched_started)
>                 raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
> @@ -2200,16 +2192,12 @@ static void intel_commit_scheduling(stru
>  {
>         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
>         struct event_constraint *c = cpuc->event_constraint[idx];
> -       struct intel_excl_states *xlo, *xl;
> +       struct intel_excl_states *xl;
>         int tid = cpuc->excl_thread_id;
> -       int o_tid = 1 - tid;
> -       int is_excl;
>
>         if (cpuc->is_fake || !c)
>                 return;
>
> -       is_excl = c->flags & PERF_X86_EVENT_EXCL;
> -
>         if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
>                 return;
>
> @@ -2219,15 +2207,14 @@ static void intel_commit_scheduling(stru
>                 return;
>
>         xl = &excl_cntrs->states[tid];
> -       xlo = &excl_cntrs->states[o_tid];
>
>         WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
>
>         if (cntr >= 0) {
> -               if (is_excl)
> -                       xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
> +               if (c->flags & PERF_X86_EVENT_EXCL)
> +                       xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
>                 else
> -                       xlo->init_state[cntr] = INTEL_EXCL_SHARED;
> +                       xl->init_state[cntr] = INTEL_EXCL_SHARED;
>         }
>  }
>
>
>

  reply	other threads:[~2015-05-26 13:38 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-22 13:29 [PATCH v2 00/11] Various x86 pmu scheduling patches Peter Zijlstra
2015-05-22 13:29 ` [PATCH v2 01/11] perf,x86: Fix event/group validation Peter Zijlstra
2015-05-22 13:40   ` Peter Zijlstra
2015-05-26  9:24     ` Stephane Eranian
2015-05-26 10:12       ` Peter Zijlstra
2015-05-26 11:46         ` Stephane Eranian
2015-05-26 12:16           ` Peter Zijlstra
2015-05-26 12:25             ` Stephane Eranian
2015-05-26 13:22               ` Peter Zijlstra
2015-05-26 13:44                 ` Stephane Eranian
2015-05-22 13:29 ` [PATCH v2 02/11] perf/x86: Improve HT workaround GP counter constraint Peter Zijlstra
2015-05-22 13:42   ` Peter Zijlstra
2015-05-26  9:37   ` Stephane Eranian
2015-05-26 10:15     ` Peter Zijlstra
2015-05-26 11:47       ` Stephane Eranian
2015-05-26 13:19         ` Peter Zijlstra
2015-05-26 16:07           ` Peter Zijlstra
2015-05-27  9:01             ` Stephane Eranian
2015-05-27 10:11               ` Peter Zijlstra
2015-05-27 11:39                 ` Stephane Eranian
2015-05-27 10:13               ` Peter Zijlstra
2015-05-27 11:44                 ` Stephane Eranian
2015-05-26 23:33   ` Andi Kleen
2015-05-27  7:48     ` Peter Zijlstra
2015-05-27 14:00       ` Andi Kleen
2015-05-22 13:29 ` [PATCH v2 03/11] perf/x86: Correct local vs remote sibling state Peter Zijlstra
2015-05-26 11:48   ` Stephane Eranian [this message]
2015-05-22 13:29 ` [PATCH v2 04/11] perf/x86: Use lockdep Peter Zijlstra
2015-05-22 13:29 ` [PATCH v2 05/11] perf/x86: Simplify dynamic constraint code somewhat Peter Zijlstra
2015-05-22 13:29 ` [PATCH v2 06/11] perf/x86: Make WARNs consistent Peter Zijlstra
2015-05-22 13:29 ` [PATCH v2 07/11] perf/x86: Move intel_commit_scheduling() Peter Zijlstra
2015-05-22 13:29 ` [PATCH v2 08/11] perf/x86: Remove pointless tests Peter Zijlstra
2015-05-22 13:29 ` [PATCH v2 09/11] perf/x86: Remove intel_excl_states::init_state Peter Zijlstra
2015-05-22 13:29 ` [PATCH v2 10/11] perf,x86: Simplify logic Peter Zijlstra
2015-05-22 13:29 ` [PATCH v2 11/11] perf/x86: Simplify put_exclusive_constraints Peter Zijlstra
2015-05-22 13:38   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CABPqkBRyqJP6AaAmhzg+F0-UnwRe3m_bTiURJaowSyEAr4R0Jg@mail.gmail.com \
    --to=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=vincent.weaver@maine.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).