From: kan.liang@linux.intel.com
To: peterz@infradead.org, acme@redhat.com, mingo@kernel.org,
linux-kernel@vger.kernel.org
Cc: tglx@linutronix.de, jolsa@kernel.org, eranian@google.com,
alexander.shishkin@linux.intel.com, ak@linux.intel.com,
Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V5 RESEND 07/14] perf/x86/intel: Support per thread RDPMC TopDown metrics
Date: Mon, 6 Jan 2020 12:29:12 -0800 [thread overview]
Message-ID: <20200106202919.2943-8-kan.liang@linux.intel.com> (raw)
In-Reply-To: <20200106202919.2943-1-kan.liang@linux.intel.com>
From: Kan Liang <kan.liang@linux.intel.com>
With Ice Lake CPUs, the TopDown metrics are directly available as fixed
counters and do not require generic counters, which make it possible to
measure TopDown per thread/process instead of only per core.
The metrics and slots values have to be saved/restored during context
switching.
The saved values are also used as previous values to calculate the
delta.
The PERF_METRICS MSR value will be returned if RDPMC metrics events.
Re-use last_period and period_left, which are unused sampling fields,
for saved_metric and saved_slots.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
Changes since V4:
- Re-use last_period and period_left for saved_metric and saved_slots.
arch/x86/events/core.c | 5 +-
arch/x86/events/intel/core.c | 103 +++++++++++++++++++++++++++++------
include/linux/perf_event.h | 29 ++++++----
3 files changed, 108 insertions(+), 29 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index bfa5e8286eed..333541c05815 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2204,7 +2204,10 @@ static int x86_pmu_event_idx(struct perf_event *event)
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0;
- if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
+ /* Return PERF_METRICS MSR value for metrics event */
+ if (is_metric_idx(idx))
+ idx = 1 << 29;
+ else if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
idx -= INTEL_PMC_IDX_FIXED;
idx |= 1 << 30;
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d7aecfe03372..0d1a327c18fc 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2244,7 +2244,13 @@ static int icl_set_topdown_event_period(struct perf_event *event)
if (left == x86_pmu.max_period) {
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
wrmsrl(MSR_PERF_METRICS, 0);
- local64_set(&hwc->period_left, 0);
+ hwc->saved_slots = 0;
+ hwc->saved_metric = 0;
+ }
+
+ if ((hwc->saved_slots) && is_slots_event(event)) {
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
+ wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
}
perf_event_update_userpage(event);
@@ -2265,7 +2271,7 @@ static u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
return mul_u64_u32_div(slots, val, 0xff);
}
-static void __icl_update_topdown_event(struct perf_event *event,
+static u64 icl_get_topdown_value(struct perf_event *event,
u64 slots, u64 metrics)
{
int idx = event->hw.idx;
@@ -2276,7 +2282,50 @@ static void __icl_update_topdown_event(struct perf_event *event,
else
delta = slots;
- local64_add(delta, &event->count);
+ return delta;
+}
+
+static void __icl_update_topdown_event(struct perf_event *event,
+ u64 slots, u64 metrics,
+ u64 last_slots, u64 last_metrics)
+{
+ u64 delta, last = 0;
+
+ delta = icl_get_topdown_value(event, slots, metrics);
+ if (last_slots)
+ last = icl_get_topdown_value(event, last_slots, last_metrics);
+
+ /*
+ * The 8bit integer fraction of metric may be not accurate,
+ * especially when the changes is very small.
+ * For example, if only a few bad_spec happens, the fraction
+ * may be reduced from 1 to 0. If so, the bad_spec event value
+ * will be 0 which is definitely less than the last value.
+ * Avoid update event->count for this case.
+ */
+ if (delta > last) {
+ delta -= last;
+ local64_add(delta, &event->count);
+ }
+}
+
+static void update_saved_topdown_regs(struct perf_event *event,
+ u64 slots, u64 metrics)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *other;
+ int idx;
+
+ event->hw.saved_slots = slots;
+ event->hw.saved_metric = metrics;
+
+ for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
+ if (!is_topdown_idx(idx))
+ continue;
+ other = cpuc->events[idx];
+ other->hw.saved_slots = slots;
+ other->hw.saved_metric = metrics;
+ }
}
/*
@@ -2290,6 +2339,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *other;
u64 slots, metrics;
+ bool reset = true;
int idx;
/* read Fixed counter 3 */
@@ -2304,25 +2354,45 @@ static u64 icl_update_topdown_event(struct perf_event *event)
if (!is_topdown_idx(idx))
continue;
other = cpuc->events[idx];
- __icl_update_topdown_event(other, slots, metrics);
+ __icl_update_topdown_event(other, slots, metrics,
+ event ? event->hw.saved_slots : 0,
+ event ? event->hw.saved_metric : 0);
}
/*
* Check and update this event, which may have been cleared
* in active_mask e.g. x86_pmu_stop()
*/
- if (event && !test_bit(event->hw.idx, cpuc->active_mask))
- __icl_update_topdown_event(event, slots, metrics);
+ if (event && !test_bit(event->hw.idx, cpuc->active_mask)) {
+ __icl_update_topdown_event(event, slots, metrics,
+ event->hw.saved_slots,
+ event->hw.saved_metric);
- /*
- * Software is recommended to periodically clear both registers
- * in order to maintain accurate measurements, which is required for
- * certain scenarios that involve sampling metrics at high rates.
- * Software should always write fixed counter 3 before write to
- * PERF_METRICS.
- */
- wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
- wrmsrl(MSR_PERF_METRICS, 0);
+ /*
+ * In x86_pmu_stop(), the event is cleared in active_mask first,
+ * then drain the delta, which indicates context switch for
+ * counting.
+ * Save metric and slots for context switch.
+ * Don't need to reset the PERF_METRICS and Fixed counter 3.
+ * Because the values will be restored in next schedule in.
+ */
+ update_saved_topdown_regs(event, slots, metrics);
+ reset = false;
+ }
+
+ if (reset) {
+ /*
+ * Software is recommended to periodically clear both registers
+ * in order to maintain accurate measurements, which is required
+ * for certain scenarios that involve sampling metrics at high
+ * rates. Software should always write fixed counter 3 before
+ * write to PERF_METRICS.
+ */
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrl(MSR_PERF_METRICS, 0);
+ if (event)
+ update_saved_topdown_regs(event, 0, 0);
+ }
return slots;
}
@@ -3515,9 +3585,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
event->attr.config1 = event->hw.config &
X86_ALL_EVENT_FLAGS;
event->hw.flags |= PERF_X86_EVENT_TOPDOWN;
-
- if (is_metric_event(event))
- event->hw.flags &= ~PERF_X86_EVENT_RDPMC_ALLOWED;
}
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 011dcbdbccc2..3f58414e4a91 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -200,17 +200,26 @@ struct hw_perf_event {
*/
u64 sample_period;
- /*
- * The period we started this sample with.
- */
- u64 last_period;
+ union {
+ struct { /* Sampling */
+ /*
+ * The period we started this sample with.
+ */
+ u64 last_period;
- /*
- * However much is left of the current period; note that this is
- * a full 64bit value and allows for generation of periods longer
- * than hardware might allow.
- */
- local64_t period_left;
+ /*
+ * However much is left of the current period;
+ * note that this is a full 64bit value and
+ * allows for generation of periods longer
+ * than hardware might allow.
+ */
+ local64_t period_left;
+ };
+ struct { /* Topdown events counting for context switch */
+ u64 saved_metric;
+ u64 saved_slots;
+ };
+ };
/*
* State for throttling the event, see __perf_event_overflow() and
--
2.17.1
next prev parent reply other threads:[~2020-01-06 20:30 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-06 20:29 [PATCH V5 RESEND 00/14] TopDown metrics support for Icelake kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 01/14] perf/x86/intel: Introduce the fourth fixed counter kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 02/14] perf/x86/intel: Set correct mask for TOPDOWN.SLOTS kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 03/14] perf/x86/intel: Move BTS index to 47 kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 04/14] perf/x86/intel: Basic support for metrics counters kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 05/14] perf/x86/intel: Fix the name of perf capabilities for perf METRICS kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 06/14] perf/x86/intel: Support hardware TopDown metrics kan.liang
2020-01-06 20:29 ` kan.liang [this message]
2020-01-06 20:29 ` [PATCH V5 RESEND 08/14] perf/x86/intel: Export TopDown events for Icelake kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 09/14] perf/x86/intel: Disable sampling read slots and topdown kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 10/14] perf/x86/intel: Name global status bit in NMI handler kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 11/14] perf/x86: Use event_base_rdpmc for RDPMC userspace support kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 12/14] perf, tools, stat: Support new per thread TopDown metrics kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 13/14] perf, tools, stat: Check Topdown Metric group kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 14/14] perf, tools: Add documentation for topdown metrics kan.liang
2020-01-10 13:17 ` [PATCH V5 RESEND 00/14] TopDown metrics support for Icelake Peter Zijlstra
2020-04-20 16:00 ` Stephane Eranian
2020-04-20 17:02 ` Peter Zijlstra
-- strict thread matches above, loose matches on Subject: below --
2019-12-03 14:11 kan.liang
2019-12-03 14:12 ` [PATCH V5 RESEND 07/14] perf/x86/intel: Support per thread RDPMC TopDown metrics kan.liang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200106202919.2943-8-kan.liang@linux.intel.com \
--to=kan.liang@linux.intel.com \
--cc=acme@redhat.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=eranian@google.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).