Re: [PATCH v1 2/7] perf/x86/intel: Support PEBS output to PT

From: Peter Zijlstra <peterz@infradead.org>
To: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>,
	Ingo Molnar <mingo@redhat.com>,
	linux-kernel@vger.kernel.org, kan.liang@linux.intel.com
Subject: Re: [PATCH v1 2/7] perf/x86/intel: Support PEBS output to PT
Date: Mon, 29 Jul 2019 15:37:05 +0200	[thread overview]
Message-ID: <20190729133705.GC31381@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20190704160024.56600-3-alexander.shishkin@linux.intel.com>

On Thu, Jul 04, 2019 at 07:00:19PM +0300, Alexander Shishkin wrote:

> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index f0e4804515d8..a11924e20df3 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -869,6 +869,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
>  	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
>  	struct perf_event *e;
>  	int n0, i, wmin, wmax, unsched = 0;
> +	int n_pebs_ds, n_pebs_pt;
>  	struct hw_perf_event *hwc;
>  
>  	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
> @@ -884,6 +885,37 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
>  	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
>  		n0 -= cpuc->n_txn;
>  
> +	/*
> +	 * Check for PEBS->DS and PEBS->PT events.
> +	 * 1) They can't be scheduled simultaneously;
> +	 * 2) PEBS->PT events depend on a corresponding PT event
> +	 */
> +	for (i = 0, n_pebs_ds = 0, n_pebs_pt = 0; i < n; i++) {
> +		e = cpuc->event_list[i];
> +
> +		if (e->attr.precise_ip) {
> +			if (e->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT) {
> +				/*
> +				 * New PEBS->PT event, check ->aux_event; if
> +				 * it's NULL, the group has been broken down
> +				 * and this event can't schedule any more.
> +				 */
> +				if (!cpuc->is_fake && i >= n0 && !e->aux_event)
> +					return -EINVAL;

How can this happen? Is this an artifact if creating a group, and then
destroying the group leader (the PT event) and then getting a bunch of
unschedulable events as remains?

> +				n_pebs_pt++;
> +			} else {
> +				n_pebs_ds++;
> +			}
> +		}
> +	}

This makes for the 3rd i..n iteration in a row, now the first is over
cpuc->event_constraint[], this is the second and the third isn't
guaranteed to terminate but is over both cpuc->event_list[] and
->event_constraint[].

It just feels like we can do better.

> +
> +	/*
> +	 * Fail to add conflicting PEBS events. If this happens, rotation
> +	 * takes care that all events get to run.
> +	 */
> +	if (n_pebs_ds && n_pebs_pt)
> +		return -EINVAL;

This basically means we can rewrite the above like:

	u8 pebs_pt = 0;

	if (e->attr.precise_ip) {
		bool pt = is_pebs_pt(e);

		if (pebs_pt & (1 << !pt))
			return -EINVAL;

		pebs_pt |= 1 << pt;
	}

There's no need to finish the loop or to actually count how many there
are; all we need to know is there's only one type.

Then again, if you put these counters in cpuc, you can make
collect_events() reject the event before we ever get to scheduling and
avoid the whole iteration.

> +
>  	if (x86_pmu.start_scheduling)
>  		x86_pmu.start_scheduling(cpuc);
>  

> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index bda450ff51ee..6955d4f7e7aa 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c

> @@ -3814,6 +3821,17 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
>  	return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
>  }
>  
> +static int intel_pmu_aux_source_match(struct perf_event *event)
> +{
> +	if (!x86_pmu.intel_cap.pebs_output_pt_available)
> +		return 0;
> +
> +	if (event->pmu->name && !strcmp(event->pmu->name, "intel_pt"))

Yuck, surely we can do something like:

	if (is_pt_event(event))

which is implemented in intel/pt.c and does something like:

	return event->pmu == &pt_pmu.pmu;

> +		return 1;
> +
> +	return 0;
> +}
> +
>  PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
>  
>  PMU_FORMAT_ATTR(ldlat, "config1:0-15");

> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index 7acc526b4ad2..9c59462f38a3 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c

> +static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
> +{
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +	struct hw_perf_event *hwc = &event->hw;
> +	struct debug_store *ds = cpuc->ds;
> +
> +	if (!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT))
> +		return;
> +
> +	/*
> +	 * In case there's a mix of PEBS->PT and PEBS->DS, fall back
> +	 * to DS.
> +	 */

I thought we disallowed that from happening !?

> +	if (cpuc->n_pebs != cpuc->n_pebs_via_pt) {
> +		/* PEBS-to-DS events present, fall back to DS */
> +		intel_pmu_pebs_via_pt_disable(event);
> +		return;
> +	}
> +
> +	if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
> +		cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
> +
> +	cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
> +
> +	wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
> +}
> +