From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755957AbbEZQHZ (ORCPT ); Tue, 26 May 2015 12:07:25 -0400 Received: from casper.infradead.org ([85.118.1.10]:44578 "EHLO casper.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753057AbbEZQHT (ORCPT ); Tue, 26 May 2015 12:07:19 -0400 Date: Tue, 26 May 2015 18:07:12 +0200 From: Peter Zijlstra To: Stephane Eranian Cc: Ingo Molnar , Vince Weaver , Jiri Olsa , "Liang, Kan" , LKML Subject: Re: [PATCH v2 02/11] perf/x86: Improve HT workaround GP counter constraint Message-ID: <20150526160712.GQ18673@twins.programming.kicks-ass.net> References: <20150522132905.416122812@infradead.org> <20150522133135.447912500@infradead.org> <20150526101548.GL3644@twins.programming.kicks-ass.net> <20150526131950.GO3644@twins.programming.kicks-ass.net> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20150526131950.GO3644@twins.programming.kicks-ass.net> User-Agent: Mutt/1.5.21 (2012-12-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Tue, May 26, 2015 at 03:19:50PM +0200, Peter Zijlstra wrote: > On Tue, May 26, 2015 at 04:47:11AM -0700, Stephane Eranian wrote: > > > I'm going to overhaul the whole get/put constraints stuff first. > > > > Ok, I think it would be good to balance to number of get/put. It would > > avoid the confusion. Is that what you are thinking about? > > Yes, and remove the few associated modifications to events. I have the below (in 4 patches); compile tested only so far. --- perf_event.c | 51 +++++++++++++++++++++++++-------------------------- perf_event.h | 4 ++-- perf_event_intel.c | 40 ++++------------------------------------ 3 files changed, 31 insertions(+), 64 deletions(-) --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -810,9 +810,15 @@ int x86_schedule_events(struct cpu_hw_ev x86_pmu.start_scheduling(cpuc); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { - cpuc->event_constraint[i] = NULL; - c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); - cpuc->event_constraint[i] = c; + /* + * Only call get_event_constraints() once! + */ + c = cpuc->event_constraint[i]; + if (!c) { + e = cpuc->event_list[i]; + c = x86_pmu.get_event_constraints(cpuc, i, e); + cpuc->event_constraint[i] = c; + } wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); @@ -875,27 +881,23 @@ int x86_schedule_events(struct cpu_hw_ev * validate an event group (assign == NULL) */ if (!unsched && assign) { - for (i = 0; i < n; i++) { - e = cpuc->event_list[i]; - e->hw.flags |= PERF_X86_EVENT_COMMITTED; - if (x86_pmu.commit_scheduling) + if (x86_pmu.commit_scheduling) { + for (i = 0; i < n; i++) { x86_pmu.commit_scheduling(cpuc, i, assign[i]); + } } - } else { - for (i = 0; i < n; i++) { - e = cpuc->event_list[i]; - /* - * do not put_constraint() on comitted events, - * because they are good to go - */ - if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) - continue; + } else if (x86_pmu.put_event_constraints) { + /* x86_pmu_add() will not yet have updated n_events */ + i = cpuc->n_events; + + /* x86_pmu_commit_txn() relies on n_txn */ + if (cpuc->group_flag & PERF_EVENT_TXN) + i -= cpuc->n_txn; - /* - * release events that failed scheduling - */ - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, e); + for (; i < n; i++) { + e = cpuc->event_list[i]; + /* release events that failed scheduling */ + x86_pmu.put_event_constraints(cpuc, e); } } @@ -923,6 +925,7 @@ static int collect_events(struct cpu_hw_ if (n >= max_count) return -EINVAL; cpuc->event_list[n] = leader; + cpuc->event_constraint[n] = NULL; n++; } if (!dogrp) @@ -937,6 +940,7 @@ static int collect_events(struct cpu_hw_ return -EINVAL; cpuc->event_list[n] = event; + cpuc->event_constraint[n] = NULL; n++; } return n; @@ -1295,11 +1299,6 @@ static void x86_pmu_del(struct perf_even int i; /* - * event is descheduled - */ - event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; - - /* * If we're called during a txn, we don't need to do anything. * The events never got scheduled and ->cancel_txn will truncate * the event_list. --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -68,13 +68,13 @@ struct event_constraint { #define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */ #define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */ #define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */ -#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */ + #define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */ #define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */ #define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ #define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ -#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ + #define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */ #define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */ --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1955,14 +1955,6 @@ __intel_shared_reg_get_constraints(struc unsigned long flags; int idx = reg->idx; - /* - * reg->alloc can be set due to existing state, so for fake cpuc we - * need to ignore this, otherwise we might fail to allocate proper fake - * state for this extra reg constraint. Also see the comment below. - */ - if (reg->alloc && !cpuc->is_fake) - return NULL; /* call x86_get_event_constraint() */ - again: era = &cpuc->shared_regs->regs[idx]; /* @@ -1986,14 +1978,6 @@ __intel_shared_reg_get_constraints(struc if (!cpuc->is_fake) { if (idx != reg->idx) intel_fixup_er(event, idx); - - /* - * x86_schedule_events() can call get_event_constraints() - * multiple times on events in the case of incremental - * scheduling(). reg->alloc ensures we only do the ER - * allocation once. - */ - reg->alloc = 1; } /* lock in msr value */ @@ -2026,24 +2010,12 @@ __intel_shared_reg_put_constraints(struc { struct er_account *era; - /* - * Only put constraint if extra reg was actually allocated. Also takes - * care of event which do not use an extra shared reg. - * - * Also, if this is a fake cpuc we shouldn't touch any event state - * (reg->alloc) and we don't care about leaving inconsistent cpuc state - * either since it'll be thrown out. - */ - if (!reg->alloc || cpuc->is_fake) - return; + WARN_ON_ONCE(cpuc->is_fake); era = &cpuc->shared_regs->regs[reg->idx]; /* one fewer user */ atomic_dec(&era->ref); - - /* allocate again next time */ - reg->alloc = 0; } static struct event_constraint * @@ -2261,8 +2233,7 @@ intel_get_excl_constraints(struct cpu_hw * across HT threads */ is_excl = c->flags & PERF_X86_EVENT_EXCL; - if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { - event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; + if (is_excl) { if (!cpuc->n_excl++) WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); } @@ -2350,11 +2321,8 @@ static void intel_put_excl_constraints(s if (WARN_ON_ONCE(!excl_cntrs)) return; - if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { - hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; - if (!--cpuc->n_excl) - WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); - } + if ((hwc->flags & PERF_X86_EVENT_EXCL) && !--cpuc->n_excl) + WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); /* * If event was actually assigned, then mark the counter state as