linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: kan.liang@linux.intel.com
Cc: mingo@redhat.com, linux-kernel@vger.kernel.org,
	ak@linux.intel.com, kim.phillips@amd.com
Subject: Re: [PATCH] perf/x86/intel: Fix n_metric for the canceled group
Date: Fri, 2 Oct 2020 13:02:58 +0200	[thread overview]
Message-ID: <20201002110258.GV2628@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20200930142935.13482-1-kan.liang@linux.intel.com>

On Wed, Sep 30, 2020 at 07:29:35AM -0700, kan.liang@linux.intel.com wrote:
> From: Kan Liang <kan.liang@linux.intel.com>
> 
> When a group that has TopDown members is failed to be scheduled, any
> later TopDown groups will not return valid values.
> 
> Here is an example.
> 
> A background perf that occupies all the GP counters and the fixed
> counter 1.
>  $perf stat -e "{cycles,cycles,cycles,cycles,cycles,cycles,cycles,
>                  cycles,cycles}:D" -a
> 
> A user monitors a TopDown group. It works well, because the fixed
> counter 3 and the PERF_METRICS are available.
>  $perf stat -x, --topdown -- ./workload
>    retiring,bad speculation,frontend bound,backend bound,
>    18.0,16.1,40.4,25.5,
> 
> Then the user tries to monitor a group that has TopDown members.
> Because of the cycles event, the group is failed to be scheduled.
>  $perf stat -x, -e '{slots,topdown-retiring,topdown-be-bound,
>                      topdown-fe-bound,topdown-bad-spec,cycles}'
>                      -- ./workload
>     <not counted>,,slots,0,0.00,,
>     <not counted>,,topdown-retiring,0,0.00,,
>     <not counted>,,topdown-be-bound,0,0.00,,
>     <not counted>,,topdown-fe-bound,0,0.00,,
>     <not counted>,,topdown-bad-spec,0,0.00,,
>     <not counted>,,cycles,0,0.00,,
> 
> The user tries to monitor a TopDown group again. It doesn't work anymore.
>  $perf stat -x, --topdown -- ./workload
> 
>     ,,,,,
> 
> In a txn, cancel_txn() is to truncate the event_list for a canceled
> group and update the number of events added in this transaction.
> However, the number of TopDown events added in this transaction is not
> updated. The kernel will probably fail to add new Topdown events.
> 
> Check if the canceled group has Topdown events. If so, subtract the
> TopDown events from n_metric accordingly.
> 
> Fixes: 7b2c05a15d29 ("perf/x86/intel: Generic support for hardware TopDown metrics")
> Reported-by: Andi Kleen <ak@linux.intel.com>
> Reviewed-by: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> ---
>  arch/x86/events/core.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 0f3d01562ded..4cb3ccbe2d62 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -2017,6 +2017,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
>  {
>  	unsigned int txn_flags;
>  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +	int i;
>  
>  	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
>  
> @@ -2031,6 +2032,15 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
>  	 */
>  	__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
>  	__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
> +
> +	/* Subtract Topdown events in the canceled group from n_metric */
> +	if (x86_pmu.intel_cap.perf_metrics && cpuc->n_metric) {
> +		for (i = 0; i < cpuc->n_txn; i++) {
> +			if (is_metric_event(cpuc->event_list[i + cpuc->n_events]))
> +				__this_cpu_dec(cpu_hw_events.n_metric);
> +		}
> +		WARN_ON_ONCE(__this_cpu_read(cpu_hw_events.n_metric) < 0);
> +	}
>  	perf_pmu_enable(pmu);
>  }


Urgh, I'd much rather we add n_txn_metric. But also, while looking at
this, don't we have the same problem with n_pair ?

Something like this perhaps...

---
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 757e49755e7c..9b7792c0b6fb 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1066,6 +1066,7 @@ static int add_nr_metric_event(struct cpu_hw_events *cpuc,
 		if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
 			return -EINVAL;
 		cpuc->n_metric++;
+		cpuc->n_txn_metric++;
 	}
 
 	return 0;
@@ -1089,8 +1090,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
 		return -EINVAL;
 
 	cpuc->event_list[n] = event;
-	if (is_counter_pair(&event->hw))
+	if (is_counter_pair(&event->hw)) {
 		cpuc->n_pair++;
+		cpuc->n_txn_pair++;
+	}
 
 	return 0;
 }
@@ -2062,6 +2065,8 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 
 	perf_pmu_disable(pmu);
 	__this_cpu_write(cpu_hw_events.n_txn, 0);
+	__this_cpu_write(cpu_hw_events.n_txn_metric, 0);
+	__this_cpu_write(cpu_hw_events.n_txn_pair, 0);
 }
 
 /*
@@ -2087,6 +2092,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
 	 */
 	__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
 	__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
+	__this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
+	__this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
 	perf_pmu_enable(pmu);
 }
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 345442410a4d..6348105b6d30 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -235,6 +235,8 @@ struct cpu_hw_events {
 					     they've never been enabled yet */
 	int			n_txn;    /* the # last events in the below arrays;
 					     added in the current transaction */
+	int			n_txn_metric;
+	int			n_txn_pair;
 	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
 	u64			tags[X86_PMC_IDX_MAX];
 

  reply	other threads:[~2020-10-02 11:03 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-30 14:29 [PATCH] perf/x86/intel: Fix n_metric for the canceled group kan.liang
2020-10-02 11:02 ` Peter Zijlstra [this message]
2020-10-02 13:16   ` Liang, Kan
2020-10-02 21:10     ` Kim Phillips
2020-10-05  8:25       ` [PATCH] perf/x86: Fix n_pair for cancelled txn Peter Zijlstra
2020-10-07 16:04         ` [tip: perf/core] " tip-bot2 for Peter Zijlstra
2020-10-05  8:26     ` [PATCH] perf/x86: Fix n_metric " Peter Zijlstra
2020-10-07 16:04       ` [tip: perf/core] " tip-bot2 for Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201002110258.GV2628@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=ak@linux.intel.com \
    --cc=kan.liang@linux.intel.com \
    --cc=kim.phillips@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).