[PATCH 1/2] perf/x86/intel: Fix PEBS memory access info encoding for ADL

* [PATCH 1/2] perf/x86/intel: Fix PEBS memory access info encoding for ADL
@ 2022-04-21 13:26 kan.liang
  2022-04-21 13:26 ` [PATCH 2/2] perf/x86/intel: Fix PEBS data source " kan.liang
  0 siblings, 1 reply; 2+ messages in thread
From: kan.liang @ 2022-04-21 13:26 UTC (permalink / raw)
  To: peterz, mingo, linux-kernel; +Cc: alexander.shishkin, ak, Kan Liang

From: Kan Liang <kan.liang@linux.intel.com>

The PEBS memory access latency encoding for the e-core is slightly
different from the p-core. The bit 4 is Lock, while the bit 5 is TLB
access.

Add flag pebs_adl_atom_dse to indicate this case. Swap the
ld_stlb_miss and ld_locked for the e-core of ADL.

Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support")
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c |  1 +
 arch/x86/events/intel/ds.c   | 32 ++++++++++++++++++++++++++++----
 arch/x86/events/perf_event.h |  3 ++-
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fc7f458eb3de..ee3ee4b5c53a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6233,6 +6233,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
 		x86_pmu.pebs_block = true;
+		x86_pmu.pebs_adl_atom_dse = true;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 		x86_pmu.flags |= PMU_FL_PEBS_ALL;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 376cc3d66094..98370650525d 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -171,7 +171,24 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status)
 	return dse.val;
 }
 
-static u64 load_latency_data(u64 status)
+static inline void pebs_dse_adl(struct perf_event *event,
+				union intel_x86_pebs_dse *dse)
+{
+	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+	if (pmu->cpu_type == hybrid_big)
+		return;
+	/*
+	 * For the atom core on ADL, bit 4: lock, bit 5: TLB access.
+	 * Swap the ld_stlb_miss and ld_locked.
+	 */
+	if (dse->ld_stlb_miss != dse->ld_locked) {
+		dse->ld_stlb_miss = !dse->ld_stlb_miss;
+		dse->ld_locked = !dse->ld_locked;
+	}
+}
+
+static u64 load_latency_data(struct perf_event *event, u64 status)
 {
 	union intel_x86_pebs_dse dse;
 	u64 val;
@@ -190,6 +207,10 @@ static u64 load_latency_data(u64 status)
 		val |= P(TLB, NA) | P(LOCK, NA);
 		return val;
 	}
+
+	if (x86_pmu.pebs_adl_atom_dse)
+		pebs_dse_adl(event, &dse);
+
 	/*
 	 * bit 4: TLB access
 	 * 0 = did not miss 2nd level TLB
@@ -233,7 +254,7 @@ static u64 load_latency_data(u64 status)
 	return val;
 }
 
-static u64 store_latency_data(u64 status)
+static u64 store_latency_data(struct perf_event *event, u64 status)
 {
 	union intel_x86_pebs_dse dse;
 	u64 val;
@@ -245,6 +266,9 @@ static u64 store_latency_data(u64 status)
 	 */
 	val = pebs_data_source[dse.st_lat_dse];
 
+	if (x86_pmu.pebs_adl_atom_dse)
+		pebs_dse_adl(event, &dse);
+
 	/*
 	 * bit 4: TLB access
 	 * 0 = did not miss 2nd level TLB
@@ -1443,9 +1467,9 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
 	bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
 
 	if (fl & PERF_X86_EVENT_PEBS_LDLAT)
-		val = load_latency_data(aux);
+		val = load_latency_data(event, aux);
 	else if (fl & PERF_X86_EVENT_PEBS_STLAT)
-		val = store_latency_data(aux);
+		val = store_latency_data(event, aux);
 	else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
 		val = precise_datala_hsw(event, aux);
 	else if (fst)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 150261d929b9..0ed1413b0eb7 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -815,7 +815,8 @@ struct x86_pmu {
 			pebs_prec_dist		:1,
 			pebs_no_tlb		:1,
 			pebs_no_isolation	:1,
-			pebs_block		:1;
+			pebs_block		:1,
+			pebs_adl_atom_dse	:1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
 	int		max_pebs_events;
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread