[PATCH 6/7] perf/x86/amd/core: Add PerfMonV2 overflow handling

From: Sandipan Das <sandipan.das@amd.com>
To: <linux-kernel@vger.kernel.org>,
	<linux-perf-users@vger.kernel.org>, <x86@kernel.org>
Cc: <peterz@infradead.org>, <bp@alien8.de>,
	<dave.hansen@linux.intel.com>, <acme@kernel.org>,
	<mark.rutland@arm.com>, <alexander.shishkin@linux.intel.com>,
	<namhyung@kernel.org>, <jolsa@kernel.org>, <tglx@linutronix.de>,
	<mingo@redhat.com>, <pbonzini@redhat.com>, <jmattson@google.com>,
	<like.xu.linux@gmail.com>, <eranian@google.com>,
	<ananth.narayan@amd.com>, <ravi.bangoria@amd.com>,
	<santosh.shukla@amd.com>, <sandipan.das@amd.com>
Subject: [PATCH 6/7] perf/x86/amd/core: Add PerfMonV2 overflow handling
Date: Thu, 17 Mar 2022 11:58:35 +0530	[thread overview]
Message-ID: <7d43b4ba8a7c3c0833495f3fabfcfc6df8db3732.1647498015.git.sandipan.das@amd.com> (raw)
In-Reply-To: <cover.1647498015.git.sandipan.das@amd.com>

If AMD Performance Monitoring Version 2 (PerfMonV2) is
supported, use a new scheme to process Core PMC overflows
in the NMI handler using the new global control and status
registers. This will be bypassed on unsupported hardware
(x86_pmu.version < 2).

In x86_pmu_handle_irq(), overflows are detected by testing
the contents of the PERF_CTR register for each active PMC in
a loop. The new scheme instead inspects the overflow bits of
the global status register.

The Performance Counter Global Status (PerfCntrGlobalStatus)
register has overflow (PerfCntrOvfl) bits for each PMC. This
is, however, a read-only MSR. To acknowledge that overflows
have been processed, the NMI handler must clear the bits by
writing to the PerfCntrGlobalStatusClr register.

In x86_pmu_handle_irq(), PMCs counting the same event that
are started and stopped at the same time record slightly
different counts due to delays in between reads from the
PERF_CTR registers. This is fixed by stopping and starting
the PMCs at the same before and with a single write to the
Performance Counter Global Control (PerfCntrGlobalCtl) upon
entering and before exiting the NMI handler.

Signed-off-by: Sandipan Das <sandipan.das@amd.com>
---
 arch/x86/events/amd/core.c | 125 +++++++++++++++++++++++++++++++++++--
 1 file changed, 121 insertions(+), 4 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 532e9bd76bf1..fbbba981d0bd 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -7,6 +7,7 @@
 #include <linux/delay.h>
 #include <linux/jiffies.h>
 #include <asm/apicdef.h>
+#include <asm/apic.h>
 #include <asm/nmi.h>
 
 #include "../perf_event.h"
@@ -601,6 +602,45 @@ static inline void amd_pmu_set_global_ctl(u64 ctl)
 	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
 }
 
+static inline u64 amd_pmu_get_global_overflow(void)
+{
+	u64 status;
+
+	/* PerfCntrGlobalStatus is read-only */
+	rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
+
+	return status & amd_pmu_global_cntr_mask;
+}
+
+static inline void amd_pmu_ack_global_overflow(u64 status)
+{
+	/*
+	 * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
+	 * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
+	 * clears the same bit in PerfCntrGlobalStatus
+	 */
+
+	/* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
+	status &= amd_pmu_global_cntr_mask;
+	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
+}
+
+static bool amd_pmu_legacy_has_overflow(int idx)
+{
+	u64 counter;
+
+	rdmsrl(x86_pmu_event_addr(idx), counter);
+
+	return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
+}
+
+static bool amd_pmu_global_has_overflow(int idx)
+{
+	return amd_pmu_get_global_overflow() & BIT_ULL(idx);
+}
+
+DEFINE_STATIC_CALL(amd_pmu_has_overflow, amd_pmu_legacy_has_overflow);
+
 /*
  * When a PMC counter overflows, an NMI is used to process the event and
  * reset the counter. NMI latency can result in the counter being updated
@@ -613,7 +653,6 @@ static inline void amd_pmu_set_global_ctl(u64 ctl)
 static void amd_pmu_wait_on_overflow(int idx)
 {
 	unsigned int i;
-	u64 counter;
 
 	/*
 	 * Wait for the counter to be reset if it has overflowed. This loop
@@ -621,8 +660,7 @@ static void amd_pmu_wait_on_overflow(int idx)
 	 * forever...
 	 */
 	for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
-		rdmsrl(x86_pmu_event_addr(idx), counter);
-		if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+		if (!static_call(amd_pmu_has_overflow)(idx))
 			break;
 
 		/* Might be in IRQ context, so can't sleep */
@@ -718,6 +756,83 @@ static void amd_pmu_enable_event(struct perf_event *event)
 	static_call(amd_pmu_enable_event)(event);
 }
 
+static int amd_pmu_global_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct hw_perf_event *hwc;
+	struct perf_event *event;
+	u64 val, status, mask;
+	int handled = 0, idx;
+
+	status = amd_pmu_get_global_overflow();
+
+	/* Check if any overflows are pending */
+	if (!status)
+		return 0;
+
+	/* Stop counting */
+	amd_pmu_global_disable_all();
+
+	cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	/*
+	 * Some chipsets need to unmask the LVTPC in a particular spot
+	 * inside the nmi handler.  As a result, the unmasking was
+	 * pushed into all the nmi handlers.
+	 *
+	 * This generic handler doesn't seem to have any issues where
+	 * the unmasking occurs so it was left at the top.
+	 *
+	 * N.B. Taken from x86_pmu_handle_irq()
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		event = cpuc->events[idx];
+		hwc = &event->hw;
+		val = x86_perf_event_update(event);
+		mask = BIT_ULL(idx);
+
+		if (!(status & mask))
+			continue;
+
+		/* Event overflow */
+		handled++;
+		perf_sample_data_init(&data, 0, hwc->last_period);
+
+		if (!x86_perf_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			x86_pmu_stop(event, 0);
+
+		status &= ~mask;
+	}
+
+	/*
+	 * It should never be the case that some overflows are not handled as
+	 * the corresponding PMCs are expected to be inactive according to the
+	 * active_mask
+	 */
+	WARN_ON(status > 0);
+
+	/* Clear overflow bits */
+	amd_pmu_ack_global_overflow(~status);
+
+	inc_irq_stat(apic_perf_irqs);
+
+	/* Resume counting */
+	amd_pmu_global_enable_all(0);
+
+	return handled;
+}
+
+DEFINE_STATIC_CALL(amd_pmu_handle_irq, x86_pmu_handle_irq);
+
 /*
  * Because of NMI latency, if multiple PMC counters are active or other sources
  * of NMIs are received, the perf NMI handler can handle one or more overflowed
@@ -741,7 +856,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 	int handled;
 
 	/* Process any counter overflows */
-	handled = x86_pmu_handle_irq(regs);
+	handled = static_call(amd_pmu_handle_irq)(regs);
 
 	/*
 	 * If a counter was handled, record a timestamp such that un-handled
@@ -1041,6 +1156,8 @@ static int __init amd_core_pmu_init(void)
 		static_call_update(amd_pmu_enable_all, amd_pmu_global_enable_all);
 		static_call_update(amd_pmu_disable_all, amd_pmu_global_disable_all);
 		static_call_update(amd_pmu_enable_event, amd_pmu_global_enable_event);
+		static_call_update(amd_pmu_has_overflow, amd_pmu_global_has_overflow);
+		static_call_update(amd_pmu_handle_irq, amd_pmu_global_handle_irq);
 	}
 
 	/*
-- 
2.32.0