linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
@ 2023-05-22 11:30 kan.liang
  2023-05-22 11:30 ` [PATCH V2 2/6] perf: Add branch stack extension kan.liang
                   ` (7 more replies)
  0 siblings, 8 replies; 26+ messages in thread
From: kan.liang @ 2023-05-22 11:30 UTC (permalink / raw)
  To: peterz, mingo, acme, linux-kernel
  Cc: mark.rutland, alexander.shishkin, jolsa, namhyung, irogers,
	adrian.hunter, ak, eranian, alexey.v.bayduraev, tinghao.zhang,
	Kan Liang

From: Kan Liang <kan.liang@linux.intel.com>

The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
both have Crestmont core. From the core PMU's perspective, they are
similar to the e-core of MTL. The only difference is the LBR event
logging feature, which will be implemented in the following patches.

Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

No changes since V1.

 arch/x86/events/intel/core.c | 52 +++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/ds.c   |  9 +++++--
 arch/x86/events/perf_event.h |  2 ++
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a3fb996a86a1..ba2a971e6b8a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2119,6 +2119,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
+EVENT_ATTR_STR(topdown-retiring,       td_retiring_cmt,        "event=0x72,umask=0x0");
+EVENT_ATTR_STR(topdown-bad-spec,       td_bad_spec_cmt,        "event=0x73,umask=0x0");
+
+static struct attribute *cmt_events_attrs[] = {
+	EVENT_PTR(td_fe_bound_tnt),
+	EVENT_PTR(td_retiring_cmt),
+	EVENT_PTR(td_bad_spec_cmt),
+	EVENT_PTR(td_be_bound_tnt),
+	NULL
+};
+
 static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
@@ -4830,6 +4841,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");
 
 PMU_FORMAT_ATTR(frontend, "config1:0-23");
 
+PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
+
 static struct attribute *intel_arch3_formats_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_umask.attr,
@@ -4860,6 +4873,13 @@ static struct attribute *slm_format_attr[] = {
 	NULL
 };
 
+static struct attribute *cmt_format_attr[] = {
+	&format_attr_offcore_rsp.attr,
+	&format_attr_ldlat.attr,
+	&format_attr_snoop_rsp.attr,
+	NULL
+};
+
 static struct attribute *skl_format_attr[] = {
 	&format_attr_frontend.attr,
 	NULL,
@@ -5630,7 +5650,6 @@ static struct attribute *adl_hybrid_extra_attr[] = {
 	NULL
 };
 
-PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
 FORMAT_ATTR_HYBRID(snoop_rsp,	hybrid_small);
 
 static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
@@ -6178,6 +6197,37 @@ __init int intel_pmu_init(void)
 		name = "gracemont";
 		break;
 
+	case INTEL_FAM6_GRANDRIDGE:
+	case INTEL_FAM6_SIERRAFOREST_X:
+		x86_pmu.mid_ack = true;
+		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_cmt_extra_regs;
+
+		x86_pmu.pebs_aliases = NULL;
+		x86_pmu.pebs_prec_dist = true;
+		x86_pmu.lbr_pt_coexist = true;
+		x86_pmu.pebs_block = true;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+		intel_pmu_pebs_data_source_cmt();
+		x86_pmu.pebs_latency_data = mtl_latency_data_small;
+		x86_pmu.get_event_constraints = cmt_get_event_constraints;
+		x86_pmu.limit_period = spr_limit_period;
+		td_attr = cmt_events_attrs;
+		mem_attr = grt_mem_attrs;
+		extra_attr = cmt_format_attr;
+		pr_cont("Crestmont events, ");
+		name = "crestmont";
+		break;
+
 	case INTEL_FAM6_WESTMERE:
 	case INTEL_FAM6_WESTMERE_EP:
 	case INTEL_FAM6_WESTMERE_EX:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index a2e566e53076..608e220e46aa 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void)
 	__intel_pmu_pebs_data_source_grt(data_source);
 }
 
-static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
+static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
 {
 	data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
 	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
@@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void)
 
 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
-	intel_pmu_pebs_data_source_cmt(data_source);
+	__intel_pmu_pebs_data_source_cmt(data_source);
+}
+
+void __init intel_pmu_pebs_data_source_cmt(void)
+{
+	__intel_pmu_pebs_data_source_cmt(pebs_data_source);
 }
 
 static u64 precise_store_data(u64 status)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index d6de4487348c..c8ba2be7585d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void);
 
 void intel_pmu_pebs_data_source_mtl(void);
 
+void intel_pmu_pebs_data_source_cmt(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V2 2/6] perf: Add branch stack extension
  2023-05-22 11:30 [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest kan.liang
@ 2023-05-22 11:30 ` kan.liang
  2023-05-23  6:03   ` Sandipan Das
  2023-08-02 21:58   ` Peter Zijlstra
  2023-05-22 11:30 ` [PATCH V2 3/6] perf: Support branch events kan.liang
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 26+ messages in thread
From: kan.liang @ 2023-05-22 11:30 UTC (permalink / raw)
  To: peterz, mingo, acme, linux-kernel
  Cc: mark.rutland, alexander.shishkin, jolsa, namhyung, irogers,
	adrian.hunter, ak, eranian, alexey.v.bayduraev, tinghao.zhang,
	Kan Liang, Sandipan Das, Ravi Bangoria, Athira Rajeev

From: Kan Liang <kan.liang@linux.intel.com>

Currently, the extra information of a branch entry is stored in a u64
space. With more and more information added, the space is running out.
For example, the information of occurrences of events will be added for
each branch.

Add an extension space to record the new information for each branch
entry. The space is appended after the struct perf_branch_stack.

Add a bit in struct perf_branch_entry to indicate whether the extra
information is included.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
---

New patch
- Introduce a generic extension space which can be used to
  store the LBR event information for Intel. It can also be used by
  other ARCHs for the other purpose.
- Add a new bit in struct perf_branch_entry to indicate whether the
  extra information is included.

 arch/powerpc/perf/core-book3s.c |  2 +-
 arch/x86/events/amd/core.c      |  2 +-
 arch/x86/events/intel/core.c    |  2 +-
 arch/x86/events/intel/ds.c      |  4 ++--
 include/linux/perf_event.h      | 18 +++++++++++++++++-
 include/uapi/linux/perf_event.h |  4 +++-
 kernel/events/core.c            |  5 +++++
 7 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 8c1f7def596e..3c14596bbfaf 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2313,7 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			struct cpu_hw_events *cpuhw;
 			cpuhw = this_cpu_ptr(&cpu_hw_events);
 			power_pmu_bhrb_read(event, cpuhw);
-			perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
+			perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
 		}
 
 		if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index bccea57dee81..facee84aeecb 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -930,7 +930,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 			continue;
 
 		if (has_branch_stack(event))
-			perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+			perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
 
 		if (perf_event_overflow(event, &data, regs))
 			x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ba2a971e6b8a..21566f66bfd8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3048,7 +3048,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
 		perf_sample_data_init(&data, 0, event->hw.last_period);
 
 		if (has_branch_stack(event))
-			perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+			perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
 
 		if (perf_event_overflow(event, &data, regs))
 			x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 608e220e46aa..3f16e95e99dd 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1747,7 +1747,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
 		setup_pebs_time(event, data, pebs->tsc);
 
 	if (has_branch_stack(event))
-		perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+		perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
 }
 
 static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1904,7 +1904,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 
 		if (has_branch_stack(event)) {
 			intel_pmu_store_pebs_lbrs(lbr);
-			perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+			perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
 		}
 	}
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d5628a7b5eaa..e2e04dc39199 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -126,6 +126,16 @@ struct perf_branch_stack {
 	struct perf_branch_entry	entries[];
 };
 
+/*
+ * The extension space is appended after the struct perf_branch_stack.
+ * It is used to store the extra data of each branch, e.g.,
+ * the occurrences of events since the last branch entry for Intel LBR.
+ */
+struct perf_branch_stack_ext {
+	__u64				nr;
+	__u64				data[];
+};
+
 struct task_struct;
 
 /*
@@ -1161,6 +1171,7 @@ struct perf_sample_data {
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
 	struct perf_branch_stack	*br_stack;
+	struct perf_branch_stack_ext	*br_stack_ext;
 	union perf_sample_weight	weight;
 	union  perf_mem_data_src	data_src;
 	u64				txn;
@@ -1237,7 +1248,8 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
 
 static inline void perf_sample_save_brstack(struct perf_sample_data *data,
 					    struct perf_event *event,
-					    struct perf_branch_stack *brs)
+					    struct perf_branch_stack *brs,
+					    struct perf_branch_stack_ext *brs_ext)
 {
 	int size = sizeof(u64); /* nr */
 
@@ -1245,7 +1257,11 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
 		size += sizeof(u64);
 	size += brs->nr * sizeof(struct perf_branch_entry);
 
+	if (brs_ext)
+		size += (1 + brs_ext->nr) * sizeof(u64);
+
 	data->br_stack = brs;
+	data->br_stack_ext = brs_ext;
 	data->dyn_size += size;
 	data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
 }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 37675437b768..1b3b90965b6b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1410,6 +1410,7 @@ union perf_mem_data_src {
  *    cycles: cycles from last branch (or 0 if not supported)
  *      type: branch type
  *      spec: branch speculation info (or 0 if not supported)
+ *       ext: has extension space for extra info (or 0 if not supported)
  */
 struct perf_branch_entry {
 	__u64	from;
@@ -1423,7 +1424,8 @@ struct perf_branch_entry {
 		spec:2,     /* branch speculation info */
 		new_type:4, /* additional branch type */
 		priv:3,     /* privilege level */
-		reserved:31;
+		ext:1,      /* has extension */
+		reserved:30;
 };
 
 union perf_sample_weight {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 435815d3be3f..dfd6703139a1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7324,6 +7324,11 @@ void perf_output_sample(struct perf_output_handle *handle,
 			if (branch_sample_hw_index(event))
 				perf_output_put(handle, data->br_stack->hw_idx);
 			perf_output_copy(handle, data->br_stack->entries, size);
+			if (data->br_stack_ext) {
+				size = data->br_stack_ext->nr * sizeof(u64);
+				perf_output_put(handle, data->br_stack_ext->nr);
+				perf_output_copy(handle, data->br_stack_ext->data, size);
+			}
 		} else {
 			/*
 			 * we always store at least the value of nr
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V2 3/6] perf: Support branch events
  2023-05-22 11:30 [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest kan.liang
  2023-05-22 11:30 ` [PATCH V2 2/6] perf: Add branch stack extension kan.liang
@ 2023-05-22 11:30 ` kan.liang
  2023-05-22 11:30 ` [PATCH V2 4/6] perf/x86/intel: Support LBR event logging kan.liang
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 26+ messages in thread
From: kan.liang @ 2023-05-22 11:30 UTC (permalink / raw)
  To: peterz, mingo, acme, linux-kernel
  Cc: mark.rutland, alexander.shishkin, jolsa, namhyung, irogers,
	adrian.hunter, ak, eranian, alexey.v.bayduraev, tinghao.zhang,
	Kan Liang

From: Kan Liang <kan.liang@linux.intel.com>

With the cycle time information between branches, stalls can be easily
observed. But it's difficult to explain what causes the long delay.

Add a new branch sample type to indicate whether include occurrences of
events in branch info. The information will be stored in the branch
stack extension space.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

Changes since V1:
- Rename to PERF_SAMPLE_BRANCH_EVT_CNTRS
- Drop the event ID sample type

 include/linux/perf_event.h      | 4 ++++
 include/uapi/linux/perf_event.h | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e2e04dc39199..823c6779a96d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1137,6 +1137,10 @@ static inline bool branch_sample_priv(const struct perf_event *event)
 	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
 }
 
+static inline bool branch_sample_evt_cntrs(const struct perf_event *event)
+{
+	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_EVT_CNTRS;
+}
 
 struct perf_sample_data {
 	/*
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1b3b90965b6b..3911cf000e8a 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift {
 
 	PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT	= 18, /* save privilege mode */
 
+	PERF_SAMPLE_BRANCH_EVT_CNTRS_SHIFT	= 19, /* save occurrences of events on a branch */
+
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
 
@@ -235,6 +237,8 @@ enum perf_branch_sample_type {
 
 	PERF_SAMPLE_BRANCH_PRIV_SAVE	= 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
 
+	PERF_SAMPLE_BRANCH_EVT_CNTRS	= 1U << PERF_SAMPLE_BRANCH_EVT_CNTRS_SHIFT,
+
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V2 4/6] perf/x86/intel: Support LBR event logging
  2023-05-22 11:30 [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest kan.liang
  2023-05-22 11:30 ` [PATCH V2 2/6] perf: Add branch stack extension kan.liang
  2023-05-22 11:30 ` [PATCH V2 3/6] perf: Support branch events kan.liang
@ 2023-05-22 11:30 ` kan.liang
  2023-05-22 11:30 ` [PATCH V2 5/6] tools headers UAPI: Sync include/uapi/linux/perf_event.h header with the kernel kan.liang
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 26+ messages in thread
From: kan.liang @ 2023-05-22 11:30 UTC (permalink / raw)
  To: peterz, mingo, acme, linux-kernel
  Cc: mark.rutland, alexander.shishkin, jolsa, namhyung, irogers,
	adrian.hunter, ak, eranian, alexey.v.bayduraev, tinghao.zhang,
	Kan Liang

From: Kan Liang <kan.liang@linux.intel.com>

The LBR event logging introduces a per-counter indication of precise
event occurrences in LBRs. It can provide a means to attribute exposed
retirement latency to combinations of events across a block of
instructions. It also provides a means of attributing Timed LBR
latencies to events.

The feature is first introduced on SRF/GRR. It is an enhancement of the
ARCH LBR. It adds new fields in the LBR_INFO MSRs to log the occurrences
of events on the GP counters. The information is displayed by the order
of counters.

The design proposed in this patch requires that the events which are
logged must be in a group with the event that has LBR. If there are
more than one LBR group, the event logging information only from the
current group (overflowed) are stored for the perf tool, otherwise the
perf tool cannot know which and when other groups are scheduled
especially when multiplexing is triggered. The user can ensure it uses
the maximum number of counters that support LBR info (4 by now) by
making the group large enough.

The HW only logs events by the order of counters. The order may be
different from the order of enabling which the perf tool can understand.
When parsing the information of each branch entry, convert the counter
order to the enabled order, and store the enabled order in the extension
space.

Unconditionally reset LBRs for an LBR event group when it's deleted. The
logged events' occurrences information is only valid for the current LBR
group. If another LBR group is scheduled later, the information from the
stale LBRs would be otherwise wrongly interpreted.

Add a sanity check in intel_pmu_hw_config(). Disable the feature if other
counter filters (inv, cmask, edge, in_tx) are set or LBR call stack mode
is enabled. (For the LBR call stack mode, we cannot simply flush the
LBR, since it will break the call stack. Also, there is no obvious usage
with the call stack mode for now.)

Adding a new event kernel flag, PERF_X86_EVENT_LBR_EVENT, to indicate
the event whose occurrences information is recorded in the branch
information in kernel. The event was marked by the
PERF_SAMPLE_BRANCH_EVT_CNTRS bit in the attr struct. Any perf sample
branch type will triggers branch stack setup. But the event itself
doesn't require a branch stack setup. When initializing the event, clear
the PERF_SAMPLE_BRANCH_EVT_CNTRS bit to avoid a branch stack setup.

Add a SW bit LBR_EVENT_LOG_BIT to indicate a LBR event logging group.
Users may not want to record the event occurrences of the event which
collect LBR, e.g., -e "{cpu/E1,branch_type=any/,cpu/E2,branch_type=event/}".
The PERF_X86_EVENT_LBR_EVENT may not be set for the LBR event. When
saving the LBRs, a LBR flag is required to tell whether storing the
event occurrences information into the extension space.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

Changes since V1:
- Using the enabling order. The kernel will change the order of counters
  to the order of enabling.
- Remove the PERF_MAX_BRANCH_EVENTS. The max value should read from the
  CPUID enumeration.
- The enabled order may be undetermined (multiple LBR groups). Only keep
  the event logging information for the current group.
- Add a new event kernel flag, PERF_X86_EVENT_LBR_EVENT, to indicate
  the event whose occurrences information is recorded in the branch
  information in kernel.
- Add a new LBR flag, LBR_EVENT_LOG_BIT, to indicate a LBR event logging
  group.

 arch/x86/events/core.c             |   2 +-
 arch/x86/events/intel/core.c       |  41 +++++++-
 arch/x86/events/intel/ds.c         |   2 +-
 arch/x86/events/intel/lbr.c        | 162 ++++++++++++++++++++++++++++-
 arch/x86/events/perf_event.h       |  17 +++
 arch/x86/events/perf_event_flags.h |   1 +
 arch/x86/include/asm/msr-index.h   |   2 +
 arch/x86/include/asm/perf_event.h  |   4 +
 include/linux/perf_event.h         |   5 +
 9 files changed, 228 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d096b04bf80e..2f1b161cb2bc 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -603,7 +603,7 @@ int x86_pmu_hw_config(struct perf_event *event)
 		}
 	}
 
-	if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
+	if (branch_sample_call_stack(event))
 		event->attach_state |= PERF_ATTACH_TASK_DATA;
 
 	/*
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 21566f66bfd8..ec3939fe9098 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2788,6 +2788,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
 
 static void intel_pmu_enable_event(struct perf_event *event)
 {
+	u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
@@ -2796,8 +2797,10 @@ static void intel_pmu_enable_event(struct perf_event *event)
 
 	switch (idx) {
 	case 0 ... INTEL_PMC_IDX_FIXED - 1:
+		if (log_event_in_branch(event))
+			enable_mask |= ARCH_PERFMON_EVENTSEL_LBR_LOG;
 		intel_set_masks(event, idx);
-		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+		__x86_pmu_enable_event(hwc, enable_mask);
 		break;
 	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
 	case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
@@ -3048,7 +3051,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
 		perf_sample_data_init(&data, 0, event->hw.last_period);
 
 		if (has_branch_stack(event))
-			perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
+			intel_pmu_lbr_save_brstack(&data, cpuc, event);
 
 		if (perf_event_overflow(event, &data, regs))
 			x86_pmu_stop(event, 0);
@@ -3613,6 +3616,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	if (cpuc->excl_cntrs)
 		return intel_get_excl_constraints(cpuc, event, idx, c2);
 
+	/* The LBR event logging is only available for some counters. */
+	if (log_event_in_branch(event)) {
+		c2 = dyn_constraint(cpuc, c2, idx);
+		c2->idxmsk64 &= x86_pmu.lbr_events;
+		c2->weight = hweight64(c2->idxmsk64);
+	}
+
 	return c2;
 }
 
@@ -3871,6 +3881,17 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
 	return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
 }
 
+static bool intel_pmu_needs_branch_stack(struct perf_event *event)
+{
+	/* NO LBR setup for a counting event */
+	if (!is_sampling_event(event)) {
+		event->attr.branch_sample_type = 0;
+		return false;
+	}
+
+	return needs_branch_stack(event);
+}
+
 static int intel_pmu_hw_config(struct perf_event *event)
 {
 	int ret = x86_pmu_hw_config(event);
@@ -3898,7 +3919,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
 			x86_pmu.pebs_aliases(event);
 	}
 
-	if (needs_branch_stack(event)) {
+	if (branch_sample_evt_cntrs(event)) {
+		if (!(x86_pmu.flags & PMU_FL_LBR_EVENT) ||
+		    (event->attr.config & ~INTEL_ARCH_EVENT_MASK))
+			return -EINVAL;
+
+		ret = intel_pmu_setup_lbr_event(event);
+		if (ret)
+			return ret;
+
+		event->hw.flags  |= PERF_X86_EVENT_LBR_EVENT;
+	}
+
+	if (intel_pmu_needs_branch_stack(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
 		if (ret)
 			return ret;
@@ -4549,7 +4582,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
 			goto err;
 	}
 
-	if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
+	if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_LBR_EVENT)) {
 		size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
 
 		cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3f16e95e99dd..47c0ecbc301d 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1904,7 +1904,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 
 		if (has_branch_stack(event)) {
 			intel_pmu_store_pebs_lbrs(lbr);
-			perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+			intel_pmu_lbr_save_brstack(data, cpuc, event);
 		}
 	}
 
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index c3b0d15a9841..6ee9d9e88586 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -29,7 +29,13 @@
  * the actual MSR. But it helps the constraint perf code to understand
  * that this is a separate configuration.
  */
-#define LBR_NO_INFO_BIT	       63 /* don't read LBR_INFO. */
+#define LBR_NO_INFO_BIT		63 /* don't read LBR_INFO. */
+/*
+ * Indicate a LBR event logging group.
+ * The event logging feature is only available for the ARCH LBR,
+ * while the NO INFO is only applied for the legacy LBR. Reuse the bit.
+ */
+#define LBR_EVENT_LOG_BIT	63
 
 #define LBR_KERNEL	(1 << LBR_KERNEL_BIT)
 #define LBR_USER	(1 << LBR_USER_BIT)
@@ -42,6 +48,7 @@
 #define LBR_FAR		(1 << LBR_FAR_BIT)
 #define LBR_CALL_STACK	(1 << LBR_CALL_STACK_BIT)
 #define LBR_NO_INFO	(1ULL << LBR_NO_INFO_BIT)
+#define LBR_EVENT_LOG	(1ULL << LBR_EVENT_LOG_BIT)
 
 #define LBR_PLM (LBR_KERNEL | LBR_USER)
 
@@ -676,6 +683,21 @@ void intel_pmu_lbr_del(struct perf_event *event)
 	WARN_ON_ONCE(cpuc->lbr_users < 0);
 	WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
 	perf_sched_cb_dec(event->pmu);
+
+	/*
+	 * The logged occurrences information is only valid for the
+	 * current LBR group. If another LBR group is scheduled in
+	 * later, the information from the stale LBRs will be wrongly
+	 * interpreted. Reset the LBRs here.
+	 * For the context switch, the LBR will be unconditionally
+	 * flushed when a new task is scheduled in. If both the new task
+	 * and the old task are monitored by a LBR event group. The
+	 * reset here is redundant. But the extra reset doesn't impact
+	 * the functionality. It's hard to distinguish the above case.
+	 * Keep the unconditionally reset for a LBR event group for now.
+	 */
+	if (intel_pmu_lbr_has_event_log(cpuc))
+		intel_pmu_lbr_reset();
 }
 
 static inline bool vlbr_exclude_host(void)
@@ -866,6 +888,18 @@ static __always_inline u16 get_lbr_cycles(u64 info)
 	return cycles;
 }
 
+static __always_inline void get_lbr_events(struct cpu_hw_events *cpuc,
+					   int i, u64 info)
+{
+	/*
+	 * The later code will decide what content can be disclosed
+	 * to the perf tool. It's no harmful to unconditionally update
+	 * the cpuc->lbr_events.
+	 * Pleae see intel_pmu_lbr_event_reorder()
+	 */
+	cpuc->lbr_events[i] = info & LBR_INFO_EVENTS;
+}
+
 static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
 				struct lbr_entry *entries)
 {
@@ -898,11 +932,73 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
 		e->abort	= !!(info & LBR_INFO_ABORT);
 		e->cycles	= get_lbr_cycles(info);
 		e->type		= get_lbr_br_type(info);
+
+		get_lbr_events(cpuc, i, info);
 	}
 
 	cpuc->lbr_stack.nr = i;
 }
 
+#define ARCH_LBR_EVENT_LOG_WIDTH	2
+#define ARCH_LBR_EVENT_LOG_MASK		0x3
+
+static __always_inline void intel_pmu_update_lbr_event(u64 *lbr_events, int idx, int pos)
+{
+	u64 logs = *lbr_events >> (LBR_INFO_EVENTS_OFFSET +
+				   idx * ARCH_LBR_EVENT_LOG_WIDTH);
+
+	logs &= ARCH_LBR_EVENT_LOG_MASK;
+	*lbr_events |= logs << (pos * ARCH_LBR_EVENT_LOG_WIDTH);
+}
+
+/*
+ * The enabled order may be different from the counter order.
+ * Update the lbr_events with the enabled order.
+ */
+static void intel_pmu_lbr_event_reorder(struct cpu_hw_events *cpuc,
+					struct perf_event *event)
+{
+	int i, j, pos = 0, enabled[X86_PMC_IDX_MAX];
+	struct perf_event *leader, *sibling;
+
+	leader = event->group_leader;
+	if (log_event_in_branch(event))
+		enabled[pos++] = leader->hw.idx;
+
+	for_each_sibling_event(sibling, leader) {
+		if (!log_event_in_branch(sibling))
+			continue;
+		enabled[pos++] = sibling->hw.idx;
+	}
+
+	if (!pos) {
+		cpuc->lbr_stack_ext.nr = 0;
+		return;
+	}
+
+	cpuc->lbr_stack_ext.nr = cpuc->lbr_stack.nr;
+	for (i = 0; i < cpuc->lbr_stack_ext.nr; i++) {
+		cpuc->lbr_entries[i].ext = true;
+
+		for (j = 0; j < pos; j++)
+			intel_pmu_update_lbr_event(&cpuc->lbr_events[i], enabled[j], j);
+
+		/* Clear the original counter order */
+		cpuc->lbr_events[i] &= ~LBR_INFO_EVENTS;
+	}
+}
+
+void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
+				struct cpu_hw_events *cpuc,
+				struct perf_event *event)
+{
+	if (!intel_pmu_lbr_has_event_log(cpuc))
+		perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+
+	intel_pmu_lbr_event_reorder(cpuc, event);
+	perf_sample_save_brstack(data, event, &cpuc->lbr_stack, &cpuc->lbr_stack_ext);
+}
+
 static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
 {
 	intel_pmu_store_lbr(cpuc, NULL);
@@ -1045,6 +1141,10 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 		 * Enable the branch type by default for the Arch LBR.
 		 */
 		reg->reg |= X86_BR_TYPE_SAVE;
+
+		if (log_event_in_branch(event))
+			reg->config |= LBR_EVENT_LOG;
+
 		return 0;
 	}
 
@@ -1091,6 +1191,54 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
 	return ret;
 }
 
+bool intel_pmu_lbr_has_event_log(struct cpu_hw_events *cpuc)
+{
+	return cpuc->lbr_sel && (cpuc->lbr_sel->config & LBR_EVENT_LOG);
+}
+
+int intel_pmu_setup_lbr_event(struct perf_event *event)
+{
+	struct perf_event *leader, *sibling;
+
+	/*
+	 * The event logging is not supported in the call stack mode
+	 * yet, since we cannot simply flush the LBR during e.g.,
+	 * multiplexing. Also, there is no obvious usage with the call
+	 * stack mode. Simply forbids it for now.
+	 *
+	 * If any events in the group which enable the LBR event logging
+	 * feature, mark it as a LBR event logging group.
+	 */
+	leader = event->group_leader;
+	if (branch_sample_call_stack(leader))
+		return -EINVAL;
+	if (leader->hw.branch_reg.idx == EXTRA_REG_LBR)
+		leader->hw.branch_reg.config |= LBR_EVENT_LOG;
+
+	for_each_sibling_event(sibling, leader) {
+		if (branch_sample_call_stack(sibling))
+			return -EINVAL;
+		if (sibling->hw.branch_reg.idx == EXTRA_REG_LBR)
+			sibling->hw.branch_reg.config |= LBR_EVENT_LOG;
+	}
+
+	/*
+	 *  The PERF_SAMPLE_BRANCH_EVT_CNTRS is used to mark an event
+	 *  whose occurrences information should be recorded in the
+	 *  branch information.
+	 *  Only applying the PERF_SAMPLE_BRANCH_EVT_CNTRS doesn't
+	 *  require any branch stack setup. Clear the bit to avoid
+	 *  any branch stack setup.
+	 */
+	if (event->attr.branch_sample_type &
+	    ~(PERF_SAMPLE_BRANCH_EVT_CNTRS | PERF_SAMPLE_BRANCH_PLM_ALL))
+		event->attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_EVT_CNTRS;
+	else
+		event->attr.branch_sample_type = 0;
+
+	return 0;
+}
+
 enum {
 	ARCH_LBR_BR_TYPE_JCC			= 0,
 	ARCH_LBR_BR_TYPE_NEAR_IND_JMP		= 1,
@@ -1173,14 +1321,20 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 	for (i = 0; i < cpuc->lbr_stack.nr; ) {
 		if (!cpuc->lbr_entries[i].from) {
 			j = i;
-			while (++j < cpuc->lbr_stack.nr)
+			while (++j < cpuc->lbr_stack.nr) {
 				cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+				if (cpuc->lbr_stack_ext.nr)
+					cpuc->lbr_events[j-1] = cpuc->lbr_events[j];
+			}
 			cpuc->lbr_stack.nr--;
 			if (!cpuc->lbr_entries[i].from)
 				continue;
 		}
 		i++;
 	}
+
+	if (cpuc->lbr_stack_ext.nr)
+		cpuc->lbr_stack_ext.nr = cpuc->lbr_stack.nr;
 }
 
 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
@@ -1525,8 +1679,12 @@ void __init intel_pmu_arch_lbr_init(void)
 	x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
 	x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
 	x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
+	x86_pmu.lbr_events = ecx.split.lbr_events;
 	x86_pmu.lbr_nr = lbr_nr;
 
+	if (!!x86_pmu.lbr_events)
+		x86_pmu.flags |= PMU_FL_LBR_EVENT;
+
 	if (x86_pmu.lbr_mispred)
 		static_branch_enable(&x86_lbr_mispred);
 	if (x86_pmu.lbr_timed_lbr)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index c8ba2be7585d..70207ac8e193 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -283,6 +283,8 @@ struct cpu_hw_events {
 	int				lbr_pebs_users;
 	struct perf_branch_stack	lbr_stack;
 	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
+	struct perf_branch_stack_ext	lbr_stack_ext;
+	u64				lbr_events[MAX_LBR_ENTRIES];
 	union {
 		struct er_account		*lbr_sel;
 		struct er_account		*lbr_ctl;
@@ -881,6 +883,7 @@ struct x86_pmu {
 	unsigned int	lbr_mispred:1;
 	unsigned int	lbr_timed_lbr:1;
 	unsigned int	lbr_br_type:1;
+	unsigned int	lbr_events:4;
 
 	void		(*lbr_reset)(void);
 	void		(*lbr_read)(struct cpu_hw_events *cpuc);
@@ -1005,6 +1008,7 @@ do {									\
 #define PMU_FL_INSTR_LATENCY	0x80 /* Support Instruction Latency in PEBS Memory Info Record */
 #define PMU_FL_MEM_LOADS_AUX	0x100 /* Require an auxiliary event for the complete memory info */
 #define PMU_FL_RETIRE_LATENCY	0x200 /* Support Retire Latency in PEBS */
+#define PMU_FL_LBR_EVENT	0x400 /* Support LBR event logging */
 
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -1457,6 +1461,11 @@ static __always_inline void __intel_pmu_lbr_disable(void)
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 }
 
+static __always_inline bool log_event_in_branch(struct perf_event *event)
+{
+	return event->hw.flags & PERF_X86_EVENT_LBR_EVENT;
+}
+
 int intel_pmu_save_and_restart(struct perf_event *event);
 
 struct event_constraint *
@@ -1545,6 +1554,14 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
 
 void intel_ds_init(void);
 
+bool intel_pmu_lbr_has_event_log(struct cpu_hw_events *cpuc);
+
+void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
+				struct cpu_hw_events *cpuc,
+				struct perf_event *event);
+
+int intel_pmu_setup_lbr_event(struct perf_event *event);
+
 void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
 				 struct perf_event_pmu_context *next_epc);
 
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 1dc19b9b4426..03e27af123af 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -20,3 +20,4 @@ PERF_ARCH(TOPDOWN,		0x04000) /* Count Topdown slots/metrics events */
 PERF_ARCH(PEBS_STLAT,		0x08000) /* st+stlat data address sampling */
 PERF_ARCH(AMD_BRS,		0x10000) /* AMD Branch Sampling */
 PERF_ARCH(PEBS_LAT_HYBRID,	0x20000) /* ld and st lat for hybrid */
+PERF_ARCH(LBR_EVENT,		0x40000) /* log the event in LBR */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ad35355ee43e..b845eeb527ef 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -222,6 +222,8 @@
 #define LBR_INFO_CYCLES			0xffff
 #define LBR_INFO_BR_TYPE_OFFSET		56
 #define LBR_INFO_BR_TYPE		(0xfull << LBR_INFO_BR_TYPE_OFFSET)
+#define LBR_INFO_EVENTS_OFFSET		32
+#define LBR_INFO_EVENTS			(0xffull << LBR_INFO_EVENTS_OFFSET)
 
 #define MSR_ARCH_LBR_CTL		0x000014ce
 #define ARCH_LBR_CTL_LBREN		BIT(0)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8fc15ed5e60b..2ae60c378e3a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -31,6 +31,7 @@
 #define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
+#define ARCH_PERFMON_EVENTSEL_LBR_LOG			(1ULL << 35)
 
 #define HSW_IN_TX					(1ULL << 32)
 #define HSW_IN_TX_CHECKPOINTED				(1ULL << 33)
@@ -203,6 +204,9 @@ union cpuid28_ecx {
 		unsigned int    lbr_timed_lbr:1;
 		/* Branch Type Field Supported */
 		unsigned int    lbr_br_type:1;
+		unsigned int	reserved:13;
+		/* Event Logging Supported */
+		unsigned int	lbr_events:4;
 	} split;
 	unsigned int            full;
 };
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 823c6779a96d..618c0d8ce88f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1142,6 +1142,11 @@ static inline bool branch_sample_evt_cntrs(const struct perf_event *event)
 	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_EVT_CNTRS;
 }
 
+static inline bool branch_sample_call_stack(const struct perf_event *event)
+{
+	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
+}
+
 struct perf_sample_data {
 	/*
 	 * Fields set by perf_sample_data_init() unconditionally,
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V2 5/6] tools headers UAPI: Sync include/uapi/linux/perf_event.h header with the kernel
  2023-05-22 11:30 [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest kan.liang
                   ` (2 preceding siblings ...)
  2023-05-22 11:30 ` [PATCH V2 4/6] perf/x86/intel: Support LBR event logging kan.liang
@ 2023-05-22 11:30 ` kan.liang
  2023-05-22 11:30 ` [PATCH V2 6/6] perf tools: Add branch event knob kan.liang
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 26+ messages in thread
From: kan.liang @ 2023-05-22 11:30 UTC (permalink / raw)
  To: peterz, mingo, acme, linux-kernel
  Cc: mark.rutland, alexander.shishkin, jolsa, namhyung, irogers,
	adrian.hunter, ak, eranian, alexey.v.bayduraev, tinghao.zhang,
	Kan Liang

From: Kan Liang <kan.liang@linux.intel.com>

Sync the new sample type and extension bit for the branch event feature.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

Changes since V1:
- Rename to PERF_SAMPLE_BRANCH_EVT_CNTRS
- Drop the event ID sample type
- Add extension bit

 tools/include/uapi/linux/perf_event.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 37675437b768..3911cf000e8a 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift {
 
 	PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT	= 18, /* save privilege mode */
 
+	PERF_SAMPLE_BRANCH_EVT_CNTRS_SHIFT	= 19, /* save occurrences of events on a branch */
+
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
 
@@ -235,6 +237,8 @@ enum perf_branch_sample_type {
 
 	PERF_SAMPLE_BRANCH_PRIV_SAVE	= 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
 
+	PERF_SAMPLE_BRANCH_EVT_CNTRS	= 1U << PERF_SAMPLE_BRANCH_EVT_CNTRS_SHIFT,
+
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
@@ -1410,6 +1414,7 @@ union perf_mem_data_src {
  *    cycles: cycles from last branch (or 0 if not supported)
  *      type: branch type
  *      spec: branch speculation info (or 0 if not supported)
+ *       ext: has extension space for extra info (or 0 if not supported)
  */
 struct perf_branch_entry {
 	__u64	from;
@@ -1423,7 +1428,8 @@ struct perf_branch_entry {
 		spec:2,     /* branch speculation info */
 		new_type:4, /* additional branch type */
 		priv:3,     /* privilege level */
-		reserved:31;
+		ext:1,      /* has extension */
+		reserved:30;
 };
 
 union perf_sample_weight {
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V2 6/6] perf tools: Add branch event knob
  2023-05-22 11:30 [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest kan.liang
                   ` (3 preceding siblings ...)
  2023-05-22 11:30 ` [PATCH V2 5/6] tools headers UAPI: Sync include/uapi/linux/perf_event.h header with the kernel kan.liang
@ 2023-05-22 11:30 ` kan.liang
  2023-05-22 20:26 ` [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest Peter Zijlstra
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 26+ messages in thread
From: kan.liang @ 2023-05-22 11:30 UTC (permalink / raw)
  To: peterz, mingo, acme, linux-kernel
  Cc: mark.rutland, alexander.shishkin, jolsa, namhyung, irogers,
	adrian.hunter, ak, eranian, alexey.v.bayduraev, tinghao.zhang,
	Kan Liang

From: Kan Liang <kan.liang@linux.intel.com>

Add a new branch filter, "event", for the branch event option. It is
used to mark the events which should be logged in the branch. If it is
applied with the -j option, all the events should be logged in the
branch. If the legacy kernel doesn't support the new branch sample type,
switching off the branch event filter.

The new extension space of each branch is dumped right after the regular
branch stack information via perf report -D.

Usage examples:

perf record -e "{branch-instructions,branch-misses}:S" -j any,event

Only the first event, branch-instructions, collect the LBR. Both
branch-instructions and branch-misses are marked as logged events.
The occurrences information of them can be found in the branch stack
extension space of each branch.

perf record -e "{cpu/branch-instructions,branch_type=any/,
cpu/branch-misses,branch_type=event/}"

Only the first event, branch-instructions, collect the LBR. Only the
branch-misses event is marked as a logged event.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

Notes: Since the new interfaces are still under review and may be
changed later, the perf tool patch only provides minimum support for the
current version. Once the interfaces are finalized, a more complete perf
tool patch can be expected.

Changes since V1:
- Drop the support of the event ID sample type
- Support the new branch stack extension

 tools/perf/Documentation/perf-record.txt  |  4 +++
 tools/perf/util/branch.h                  |  8 ++++-
 tools/perf/util/evsel.c                   | 39 ++++++++++++++++++++---
 tools/perf/util/evsel.h                   |  6 ++++
 tools/perf/util/parse-branch-options.c    |  1 +
 tools/perf/util/perf_event_attr_fprintf.c |  1 +
 tools/perf/util/sample.h                  |  1 +
 tools/perf/util/session.c                 |  8 +++++
 8 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index ff815c2f67e8..9183d9c414de 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -402,6 +402,10 @@ following filters are defined:
 		     4th-Gen Xeon+ server), the save branch type is unconditionally enabled
 		     when the taken branch stack sampling is enabled.
 	- priv: save privilege state during sampling in case binary is not available later
+	- event: save occurrences of the event since the last branch entry. Currently, the
+		 feature is only supported by a newer CPU, e.g., Intel Sierra Forest and
+		 later platforms. An error out is expected if it's used on the unsupported
+		 kernel or CPUs.
 
 +
 The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index e41bfffe2217..f765b05bbe5f 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -25,7 +25,8 @@ struct branch_flags {
 			u64 spec:2;
 			u64 new_type:4;
 			u64 priv:3;
-			u64 reserved:31;
+			u64 ext:1;
+			u64 reserved:30;
 		};
 	};
 };
@@ -50,6 +51,11 @@ struct branch_stack {
 	struct branch_entry	entries[];
 };
 
+struct branch_stack_ext {
+	u64			nr;
+	u64			data[];
+};
+
 /*
  * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
  * Otherwise, the output format of a sample with branch stack is
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 51e8ce6edddc..19cc9272b669 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1850,6 +1850,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
 
 static void evsel__disable_missing_features(struct evsel *evsel)
 {
+	if (perf_missing_features.branch_event)
+		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_EVT_CNTRS;
 	if (perf_missing_features.read_lost)
 		evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
 	if (perf_missing_features.weight_struct) {
@@ -1903,7 +1905,12 @@ bool evsel__detect_missing_features(struct evsel *evsel)
 	 * Must probe features in the order they were added to the
 	 * perf_event_attr interface.
 	 */
-	if (!perf_missing_features.read_lost &&
+	if (!perf_missing_features.branch_event &&
+	    (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_EVT_CNTRS)) {
+		perf_missing_features.branch_event = true;
+		pr_debug2("switching off branch event support\n");
+		return true;
+	} else if (!perf_missing_features.read_lost &&
 	    (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
 		perf_missing_features.read_lost = true;
 		pr_debug2("switching off PERF_FORMAT_LOST support\n");
@@ -2339,7 +2346,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
 		new_val |= bitfield_swap(value, 24, 2);
 		new_val |= bitfield_swap(value, 26, 4);
 		new_val |= bitfield_swap(value, 30, 3);
-		new_val |= bitfield_swap(value, 33, 31);
+		new_val |= bitfield_swap(value, 33, 1);
+		new_val |= bitfield_swap(value, 34, 30);
 	} else {
 		new_val = bitfield_swap(value, 63, 1);
 		new_val |= bitfield_swap(value, 62, 1);
@@ -2350,7 +2358,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
 		new_val |= bitfield_swap(value, 38, 2);
 		new_val |= bitfield_swap(value, 34, 4);
 		new_val |= bitfield_swap(value, 31, 3);
-		new_val |= bitfield_swap(value, 0, 31);
+		new_val |= bitfield_swap(value, 30, 1);
+		new_val |= bitfield_swap(value, 0, 30);
 	}
 
 	return new_val;
@@ -2550,7 +2559,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 	if (type & PERF_SAMPLE_BRANCH_STACK) {
 		const u64 max_branch_nr = UINT64_MAX /
 					  sizeof(struct branch_entry);
-		struct branch_entry *e;
+		struct branch_entry *e, *e0;
+		bool has_ext = false;
 		unsigned int i;
 
 		OVERFLOW_CHECK_u64(array);
@@ -2571,7 +2581,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 			 */
 			e = (struct branch_entry *)&data->branch_stack->hw_idx;
 		}
-
+		e0 = e;
 		if (swapped) {
 			/*
 			 * struct branch_flag does not have endian
@@ -2589,6 +2599,25 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 
 		OVERFLOW_CHECK(array, sz, max_size);
 		array = (void *)array + sz;
+
+		for (i = 0, e = e0; i < data->branch_stack->nr; i++, e++) {
+			if (e->flags.ext) {
+				has_ext = true;
+				break;
+			}
+		}
+
+		if (has_ext) {
+			OVERFLOW_CHECK_u64(array);
+
+			data->branch_stack_ext = (struct branch_stack_ext *)array++;
+			if (data->branch_stack_ext->nr > max_branch_nr)
+				return -EFAULT;
+			sz = data->branch_stack_ext->nr * sizeof(u64);
+
+			OVERFLOW_CHECK(array, sz, max_size);
+			array = (void *)array + sz;
+		}
 	}
 
 	if (type & PERF_SAMPLE_REGS_USER) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 24cb807ef6ce..aa666e24f8e6 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -187,6 +187,7 @@ struct perf_missing_features {
 	bool code_page_size;
 	bool weight_struct;
 	bool read_lost;
+	bool branch_event;
 };
 
 extern struct perf_missing_features perf_missing_features;
@@ -473,6 +474,11 @@ static inline bool evsel__has_branch_hw_idx(const struct evsel *evsel)
 	return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
 }
 
+static inline bool evsel__has_branch_evt_cntrs(const struct evsel *evsel)
+{
+	return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_EVT_CNTRS;
+}
+
 static inline bool evsel__has_callchain(const struct evsel *evsel)
 {
 	/*
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
index fd67d204d720..ab5d6dabe659 100644
--- a/tools/perf/util/parse-branch-options.c
+++ b/tools/perf/util/parse-branch-options.c
@@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = {
 	BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
 	BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX),
 	BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
+	BRANCH_OPT("event", PERF_SAMPLE_BRANCH_EVT_CNTRS),
 	BRANCH_END
 };
 
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 7e5e7b30510d..3133a4f003eb 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -53,6 +53,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
 		bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
 		bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
 		bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
+		bit_name(EVT_CNTRS),
 		{ .name = NULL, }
 	};
 #undef bit_name
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 33b08e0ac746..62abae1c9cd3 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -101,6 +101,7 @@ struct perf_sample {
 	void *raw_data;
 	struct ip_callchain *callchain;
 	struct branch_stack *branch_stack;
+	struct branch_stack_ext *branch_stack_ext;
 	struct regs_dump  user_regs;
 	struct regs_dump  intr_regs;
 	struct stack_dump user_stack;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 749d5b5c135b..a1e303c2eaa8 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1159,6 +1159,7 @@ static void callchain__printf(struct evsel *evsel,
 static void branch_stack__printf(struct perf_sample *sample, bool callstack)
 {
 	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	struct branch_stack_ext *branch_stack_ext = sample->branch_stack_ext;
 	uint64_t i;
 
 	if (!callstack) {
@@ -1200,6 +1201,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
 			}
 		}
 	}
+
+	if (branch_stack_ext) {
+		printf("... branch stack ext: nr:%" PRIu64 "\n", sample->branch_stack_ext->nr);
+		for (i = 0; i < branch_stack_ext->nr; i++) {
+			printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_ext->data[i]);
+		}
+	}
 }
 
 static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-05-22 11:30 [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest kan.liang
                   ` (4 preceding siblings ...)
  2023-05-22 11:30 ` [PATCH V2 6/6] perf tools: Add branch event knob kan.liang
@ 2023-05-22 20:26 ` Peter Zijlstra
  2023-05-22 20:42   ` Luck, Tony
  2023-06-07 21:43   ` Luck, Tony
  2023-06-06 12:42 ` Liang, Kan
  2023-08-09 20:04 ` [tip: perf/core] perf/x86/intel: Add Crestmont PMU tip-bot2 for Kan Liang
  7 siblings, 2 replies; 26+ messages in thread
From: Peter Zijlstra @ 2023-05-22 20:26 UTC (permalink / raw)
  To: kan.liang
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang, tony.luck

On Mon, May 22, 2023 at 04:30:35AM -0700, kan.liang@linux.intel.com wrote:
> From: Kan Liang <kan.liang@linux.intel.com>
> 
> The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
> both have Crestmont core. From the core PMU's perspective, they are
> similar to the e-core of MTL. The only difference is the LBR event
> logging feature, which will be implemented in the following patches.
> 
> Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.

Moo... Tony, did you sneak product names instead of uarch names in the
intel-family thing again?

That is; I'm thinking we want the below, no?

---

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index b3af2d45bbbb..0e804d189e63 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -116,16 +116,16 @@
 #define INTEL_FAM6_ALDERLAKE_L		0x9A	/* Golden Cove / Gracemont */
 #define INTEL_FAM6_ALDERLAKE_N		0xBE
 
-#define INTEL_FAM6_RAPTORLAKE		0xB7
+#define INTEL_FAM6_RAPTORLAKE		0xB7	/* Raptor Cove / Enhanced Gracemont */
 #define INTEL_FAM6_RAPTORLAKE_P		0xBA
 #define INTEL_FAM6_RAPTORLAKE_S		0xBF
 
-#define INTEL_FAM6_METEORLAKE		0xAC
+#define INTEL_FAM6_METEORLAKE		0xAC	/* Redwood Cove / Crestmont */
 #define INTEL_FAM6_METEORLAKE_L		0xAA
 
-#define INTEL_FAM6_LUNARLAKE_M		0xBD
+#define INTEL_FAM6_ARROWLAKE		0xC6	/* Lion Cove / Skymont */
 
-#define INTEL_FAM6_ARROWLAKE		0xC6
+#define INTEL_FAM6_LUNARLAKE_M		0xBD
 
 /* "Small Core" Processors (Atom/E-Core) */
 
@@ -154,9 +154,8 @@
 #define INTEL_FAM6_ATOM_TREMONT		0x96 /* Elkhart Lake */
 #define INTEL_FAM6_ATOM_TREMONT_L	0x9C /* Jasper Lake */
 
-#define INTEL_FAM6_SIERRAFOREST_X	0xAF
-
-#define INTEL_FAM6_GRANDRIDGE		0xB6
+#define INTEL_FAM6_ATOM_CRESTMONT_X	0xAF /* Sierra Forest */
+#define INTEL_FAM6_ATOM_CRESTMONT	0xB6 /* Grand Ridge */
 
 /* Xeon Phi */
 

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* RE: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-05-22 20:26 ` [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest Peter Zijlstra
@ 2023-05-22 20:42   ` Luck, Tony
  2023-05-22 20:48     ` Peter Zijlstra
  2023-06-07 21:43   ` Luck, Tony
  1 sibling, 1 reply; 26+ messages in thread
From: Luck, Tony @ 2023-05-22 20:42 UTC (permalink / raw)
  To: Peter Zijlstra, kan.liang
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, Hunter, Adrian, ak, Eranian, Stephane,
	alexey.v.bayduraev, Zhang, Tinghao

> Moo... Tony, did you sneak product names instead of uarch names in the
> intel-family thing again?

The difficult part is that I've finally got Intel to release product names reasonable
time ahead of launch. But details like which core architecture is used by each
lags behind.

But I think you just announced the Crestmont core.

-Tony
 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-05-22 20:42   ` Luck, Tony
@ 2023-05-22 20:48     ` Peter Zijlstra
  0 siblings, 0 replies; 26+ messages in thread
From: Peter Zijlstra @ 2023-05-22 20:48 UTC (permalink / raw)
  To: Luck, Tony
  Cc: kan.liang, mingo, acme, linux-kernel, mark.rutland,
	alexander.shishkin, jolsa, namhyung, irogers, Hunter, Adrian, ak,
	Eranian, Stephane, alexey.v.bayduraev, Zhang, Tinghao

On Mon, May 22, 2023 at 08:42:14PM +0000, Luck, Tony wrote:
> > Moo... Tony, did you sneak product names instead of uarch names in the
> > intel-family thing again?
> 
> The difficult part is that I've finally got Intel to release product names reasonable
> time ahead of launch. But details like which core architecture is used by each
> lags behind.
> 
> But I think you just announced the Crestmont core.

Crestmont was in Kan's original Changelog, also:

  https://en.wikichip.org/wiki/intel/microarchitectures/meteor_lake

has: Core Names	Redwood Cove, Crestmont

And:

  https://wccftech.com/intel-next-gen-arrow-lake-lunar-lake-cpus-get-preliminary-support-in-aida64/

has a giant list of names too.


Basically, I never can remember these things and I use Google because I
can't be bothered to learn how the intraweb muck works.



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 2/6] perf: Add branch stack extension
  2023-05-22 11:30 ` [PATCH V2 2/6] perf: Add branch stack extension kan.liang
@ 2023-05-23  6:03   ` Sandipan Das
  2023-05-23 13:08     ` Liang, Kan
  2023-08-02 21:58   ` Peter Zijlstra
  1 sibling, 1 reply; 26+ messages in thread
From: Sandipan Das @ 2023-05-23  6:03 UTC (permalink / raw)
  To: kan.liang
  Cc: linux-kernel, peterz, mingo, acme, mark.rutland,
	alexander.shishkin, jolsa, namhyung, irogers, adrian.hunter, ak,
	eranian, alexey.v.bayduraev, tinghao.zhang, Ravi Bangoria,
	Athira Rajeev

Hi Kan,

On 5/22/2023 5:00 PM, kan.liang@linux.intel.com wrote:
> From: Kan Liang <kan.liang@linux.intel.com>
> 
> Currently, the extra information of a branch entry is stored in a u64
> space. With more and more information added, the space is running out.
> For example, the information of occurrences of events will be added for
> each branch.
> 
> Add an extension space to record the new information for each branch
> entry. The space is appended after the struct perf_branch_stack.
> 
> Add a bit in struct perf_branch_entry to indicate whether the extra
> information is included.
> 
> Reviewed-by: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> Cc: Sandipan Das <sandipan.das@amd.com>
> Cc: Ravi Bangoria <ravi.bangoria@amd.com>
> Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> ---
> 
> New patch
> - Introduce a generic extension space which can be used to
>   store the LBR event information for Intel. It can also be used by
>   other ARCHs for the other purpose.
> - Add a new bit in struct perf_branch_entry to indicate whether the
>   extra information is included.
> 
>  arch/powerpc/perf/core-book3s.c |  2 +-
>  arch/x86/events/amd/core.c      |  2 +-
>  arch/x86/events/intel/core.c    |  2 +-
>  arch/x86/events/intel/ds.c      |  4 ++--
>  include/linux/perf_event.h      | 18 +++++++++++++++++-
>  include/uapi/linux/perf_event.h |  4 +++-
>  kernel/events/core.c            |  5 +++++
>  7 files changed, 30 insertions(+), 7 deletions(-)
> 

This seems to be missing the following:

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6310fc5c9f52..b6739f63dc34 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1704,7 +1704,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
                perf_sample_data_init(&data, 0, event->hw.last_period);

                if (has_branch_stack(event))
-                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);

                if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);


Otherwise, the changes look good to me.

Reviewed-by: Sandipan Das <sandipan.das@amd.com>


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 2/6] perf: Add branch stack extension
  2023-05-23  6:03   ` Sandipan Das
@ 2023-05-23 13:08     ` Liang, Kan
  0 siblings, 0 replies; 26+ messages in thread
From: Liang, Kan @ 2023-05-23 13:08 UTC (permalink / raw)
  To: Sandipan Das, Peter Zijlstra
  Cc: linux-kernel, mingo, acme, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang, Ravi Bangoria, Athira Rajeev



On 2023-05-23 2:03 a.m., Sandipan Das wrote:
> Hi Kan,
> 
> On 5/22/2023 5:00 PM, kan.liang@linux.intel.com wrote:
>> From: Kan Liang <kan.liang@linux.intel.com>
>>
>> Currently, the extra information of a branch entry is stored in a u64
>> space. With more and more information added, the space is running out.
>> For example, the information of occurrences of events will be added for
>> each branch.
>>
>> Add an extension space to record the new information for each branch
>> entry. The space is appended after the struct perf_branch_stack.
>>
>> Add a bit in struct perf_branch_entry to indicate whether the extra
>> information is included.
>>
>> Reviewed-by: Andi Kleen <ak@linux.intel.com>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> Cc: Sandipan Das <sandipan.das@amd.com>
>> Cc: Ravi Bangoria <ravi.bangoria@amd.com>
>> Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> ---
>>
>> New patch
>> - Introduce a generic extension space which can be used to
>>   store the LBR event information for Intel. It can also be used by
>>   other ARCHs for the other purpose.
>> - Add a new bit in struct perf_branch_entry to indicate whether the
>>   extra information is included.
>>
>>  arch/powerpc/perf/core-book3s.c |  2 +-
>>  arch/x86/events/amd/core.c      |  2 +-
>>  arch/x86/events/intel/core.c    |  2 +-
>>  arch/x86/events/intel/ds.c      |  4 ++--
>>  include/linux/perf_event.h      | 18 +++++++++++++++++-
>>  include/uapi/linux/perf_event.h |  4 +++-
>>  kernel/events/core.c            |  5 +++++
>>  7 files changed, 30 insertions(+), 7 deletions(-)
>>
> 
> This seems to be missing the following:

Oh, right, the patch set based on the perf/core branch which doesn't
include the latest fix, 90befef5a9e8 ("perf/x86: Fix missing sample size
update on AMD BRS").

Peter,

Could you please backport the 90befef5a9e8 to your perf/core branch?
Then I will fold the below change into V3.
Or should I rebase the patch set on top of perf/urgent?


> 
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 6310fc5c9f52..b6739f63dc34 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -1704,7 +1704,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
>                 perf_sample_data_init(&data, 0, event->hw.last_period);
> 
>                 if (has_branch_stack(event))
> -                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
> +                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
> 
>                 if (perf_event_overflow(event, &data, regs))
>                         x86_pmu_stop(event, 0);
> 
> 
> Otherwise, the changes look good to me.
> 
> Reviewed-by: Sandipan Das <sandipan.das@amd.com>


Thanks!

Kan


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-05-22 11:30 [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest kan.liang
                   ` (5 preceding siblings ...)
  2023-05-22 20:26 ` [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest Peter Zijlstra
@ 2023-06-06 12:42 ` Liang, Kan
  2023-06-06 13:24   ` Peter Zijlstra
  2023-08-09 20:04 ` [tip: perf/core] perf/x86/intel: Add Crestmont PMU tip-bot2 for Kan Liang
  7 siblings, 1 reply; 26+ messages in thread
From: Liang, Kan @ 2023-06-06 12:42 UTC (permalink / raw)
  To: peterz, mingo, acme, linux-kernel
  Cc: mark.rutland, alexander.shishkin, jolsa, namhyung, irogers,
	adrian.hunter, ak, eranian, alexey.v.bayduraev, tinghao.zhang

Hi Peter,

On 2023-05-22 7:30 a.m., kan.liang@linux.intel.com wrote:
> From: Kan Liang <kan.liang@linux.intel.com>
> 
> The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
> both have Crestmont core. From the core PMU's perspective, they are
> similar to the e-core of MTL. The only difference is the LBR event
> logging feature, which will be implemented in the following patches.
> 
> Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
> 
> Reviewed-by: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> ---
> 


Gentle ping.

Do you have any comments for the patch set?

The patch set based on the perf/core branch which doesn't
include the latest fix, 90befef5a9e8 ("perf/x86: Fix missing sample size
update on AMD BRS").
https://lore.kernel.org/lkml/2f09023a-cccb-35df-da0a-d245ee5238be@linux.intel.com/

Should I rebase it on the perf/urgent and send the V3?


Thanks,
Kan


> No changes since V1.
> 
>  arch/x86/events/intel/core.c | 52 +++++++++++++++++++++++++++++++++++-
>  arch/x86/events/intel/ds.c   |  9 +++++--
>  arch/x86/events/perf_event.h |  2 ++
>  3 files changed, 60 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index a3fb996a86a1..ba2a971e6b8a 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2119,6 +2119,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
>  	EVENT_EXTRA_END
>  };
>  
> +EVENT_ATTR_STR(topdown-retiring,       td_retiring_cmt,        "event=0x72,umask=0x0");
> +EVENT_ATTR_STR(topdown-bad-spec,       td_bad_spec_cmt,        "event=0x73,umask=0x0");
> +
> +static struct attribute *cmt_events_attrs[] = {
> +	EVENT_PTR(td_fe_bound_tnt),
> +	EVENT_PTR(td_retiring_cmt),
> +	EVENT_PTR(td_bad_spec_cmt),
> +	EVENT_PTR(td_be_bound_tnt),
> +	NULL
> +};
> +
>  static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
>  	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
>  	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
> @@ -4830,6 +4841,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");
>  
>  PMU_FORMAT_ATTR(frontend, "config1:0-23");
>  
> +PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
> +
>  static struct attribute *intel_arch3_formats_attr[] = {
>  	&format_attr_event.attr,
>  	&format_attr_umask.attr,
> @@ -4860,6 +4873,13 @@ static struct attribute *slm_format_attr[] = {
>  	NULL
>  };
>  
> +static struct attribute *cmt_format_attr[] = {
> +	&format_attr_offcore_rsp.attr,
> +	&format_attr_ldlat.attr,
> +	&format_attr_snoop_rsp.attr,
> +	NULL
> +};
> +
>  static struct attribute *skl_format_attr[] = {
>  	&format_attr_frontend.attr,
>  	NULL,
> @@ -5630,7 +5650,6 @@ static struct attribute *adl_hybrid_extra_attr[] = {
>  	NULL
>  };
>  
> -PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
>  FORMAT_ATTR_HYBRID(snoop_rsp,	hybrid_small);
>  
>  static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
> @@ -6178,6 +6197,37 @@ __init int intel_pmu_init(void)
>  		name = "gracemont";
>  		break;
>  
> +	case INTEL_FAM6_GRANDRIDGE:
> +	case INTEL_FAM6_SIERRAFOREST_X:
> +		x86_pmu.mid_ack = true;
> +		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
> +		       sizeof(hw_cache_event_ids));
> +		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
> +		       sizeof(hw_cache_extra_regs));
> +		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
> +
> +		x86_pmu.event_constraints = intel_slm_event_constraints;
> +		x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
> +		x86_pmu.extra_regs = intel_cmt_extra_regs;
> +
> +		x86_pmu.pebs_aliases = NULL;
> +		x86_pmu.pebs_prec_dist = true;
> +		x86_pmu.lbr_pt_coexist = true;
> +		x86_pmu.pebs_block = true;
> +		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
> +		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
> +
> +		intel_pmu_pebs_data_source_cmt();
> +		x86_pmu.pebs_latency_data = mtl_latency_data_small;
> +		x86_pmu.get_event_constraints = cmt_get_event_constraints;
> +		x86_pmu.limit_period = spr_limit_period;
> +		td_attr = cmt_events_attrs;
> +		mem_attr = grt_mem_attrs;
> +		extra_attr = cmt_format_attr;
> +		pr_cont("Crestmont events, ");
> +		name = "crestmont";
> +		break;
> +
>  	case INTEL_FAM6_WESTMERE:
>  	case INTEL_FAM6_WESTMERE_EP:
>  	case INTEL_FAM6_WESTMERE_EX:
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index a2e566e53076..608e220e46aa 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void)
>  	__intel_pmu_pebs_data_source_grt(data_source);
>  }
>  
> -static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
> +static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
>  {
>  	data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
>  	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
> @@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void)
>  
>  	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
>  	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
> -	intel_pmu_pebs_data_source_cmt(data_source);
> +	__intel_pmu_pebs_data_source_cmt(data_source);
> +}
> +
> +void __init intel_pmu_pebs_data_source_cmt(void)
> +{
> +	__intel_pmu_pebs_data_source_cmt(pebs_data_source);
>  }
>  
>  static u64 precise_store_data(u64 status)
> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
> index d6de4487348c..c8ba2be7585d 100644
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void);
>  
>  void intel_pmu_pebs_data_source_mtl(void);
>  
> +void intel_pmu_pebs_data_source_cmt(void);
> +
>  int intel_pmu_setup_lbr_filter(struct perf_event *event);
>  
>  void intel_pt_interrupt(void);

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-06 12:42 ` Liang, Kan
@ 2023-06-06 13:24   ` Peter Zijlstra
  2023-06-06 16:16     ` Liang, Kan
  0 siblings, 1 reply; 26+ messages in thread
From: Peter Zijlstra @ 2023-06-06 13:24 UTC (permalink / raw)
  To: Liang, Kan
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang

On Tue, Jun 06, 2023 at 08:42:42AM -0400, Liang, Kan wrote:
> Hi Peter,
> 
> On 2023-05-22 7:30 a.m., kan.liang@linux.intel.com wrote:
> > From: Kan Liang <kan.liang@linux.intel.com>
> > 
> > The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
> > both have Crestmont core. From the core PMU's perspective, they are
> > similar to the e-core of MTL. The only difference is the LBR event
> > logging feature, which will be implemented in the following patches.
> > 
> > Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
> > 
> > Reviewed-by: Andi Kleen <ak@linux.intel.com>
> > Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> > ---
> > 
> 
> 
> Gentle ping.
> 
> Do you have any comments for the patch set?
> 
> The patch set based on the perf/core branch which doesn't
> include the latest fix, 90befef5a9e8 ("perf/x86: Fix missing sample size
> update on AMD BRS").
> https://lore.kernel.org/lkml/2f09023a-cccb-35df-da0a-d245ee5238be@linux.intel.com/
> 
> Should I rebase it on the perf/urgent and send the V3?
> 

I can pull urgent into perf/core, but:

> > +	case INTEL_FAM6_GRANDRIDGE:
> > +	case INTEL_FAM6_SIERRAFOREST_X:
                        ^^^^^^^^^^^^^^^

Those are just plain wrong; please fix up the intel-family.h thing like
suggested earlier in this thread.

And Tony, please no more of that platform name nonsense.. we want uarch
names for a reason, so that enums like the above become something
sensible like:

	case INTEL_FAM6_ATOM_CRESTMONT:
	case INTEL_FAM6_ATOM_CRESTMONT_X:

and now it's super obvious why they're grouped.

> > +		pr_cont("Crestmont events, ");

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-06 13:24   ` Peter Zijlstra
@ 2023-06-06 16:16     ` Liang, Kan
  2023-06-06 18:17       ` Peter Zijlstra
  0 siblings, 1 reply; 26+ messages in thread
From: Liang, Kan @ 2023-06-06 16:16 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang



On 2023-06-06 9:24 a.m., Peter Zijlstra wrote:
> On Tue, Jun 06, 2023 at 08:42:42AM -0400, Liang, Kan wrote:
>> Hi Peter,
>>
>> On 2023-05-22 7:30 a.m., kan.liang@linux.intel.com wrote:
>>> From: Kan Liang <kan.liang@linux.intel.com>
>>>
>>> The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
>>> both have Crestmont core. From the core PMU's perspective, they are
>>> similar to the e-core of MTL. The only difference is the LBR event
>>> logging feature, which will be implemented in the following patches.
>>>
>>> Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
>>>
>>> Reviewed-by: Andi Kleen <ak@linux.intel.com>
>>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>>> ---
>>>
>>
>>
>> Gentle ping.
>>
>> Do you have any comments for the patch set?
>>
>> The patch set based on the perf/core branch which doesn't
>> include the latest fix, 90befef5a9e8 ("perf/x86: Fix missing sample size
>> update on AMD BRS").
>> https://lore.kernel.org/lkml/2f09023a-cccb-35df-da0a-d245ee5238be@linux.intel.com/
>>
>> Should I rebase it on the perf/urgent and send the V3?
>>
> 
> I can pull urgent into perf/core, but:

Thanks.

> 
>>> +	case INTEL_FAM6_GRANDRIDGE:
>>> +	case INTEL_FAM6_SIERRAFOREST_X:
>                         ^^^^^^^^^^^^^^^
> 
> Those are just plain wrong; please fix up the intel-family.h thing like
> suggested earlier in this thread.
>> And Tony, please no more of that platform name nonsense.. we want uarch
> names for a reason, so that enums like the above become something
> sensible like:
> 
> 	case INTEL_FAM6_ATOM_CRESTMONT:
> 	case INTEL_FAM6_ATOM_CRESTMONT_X:
> 
> and now it's super obvious why they're grouped.
> 
>>> +		pr_cont("Crestmont events, ");

The Sierra Forest should not be a platform name. I think it's the code
name of the processor.

The problem is that the uarch name doesn't work for the hybrid, since it
has different uarchs in the same processors. To make the naming rules
consistent among big core, atom, and hybrid, maybe we should use the
code name of the processor in intel-family.h.

I will propose a patch to update the rules of using the processor name.
I think we may want to have further discussion there.

Thanks,
Kan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-06 16:16     ` Liang, Kan
@ 2023-06-06 18:17       ` Peter Zijlstra
  2023-06-06 18:34         ` Liang, Kan
  0 siblings, 1 reply; 26+ messages in thread
From: Peter Zijlstra @ 2023-06-06 18:17 UTC (permalink / raw)
  To: Liang, Kan
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang

On Tue, Jun 06, 2023 at 12:16:29PM -0400, Liang, Kan wrote:

> > names for a reason, so that enums like the above become something
> > sensible like:
> > 
> > 	case INTEL_FAM6_ATOM_CRESTMONT:
> > 	case INTEL_FAM6_ATOM_CRESTMONT_X:
> > 
> > and now it's super obvious why they're grouped.
> > 
> >>> +		pr_cont("Crestmont events, ");
> 
> The Sierra Forest should not be a platform name. I think it's the code
> name of the processor.
> 
> The problem is that the uarch name doesn't work for the hybrid, since it
> has different uarchs in the same processors. To make the naming rules
> consistent among big core, atom, and hybrid, maybe we should use the
> code name of the processor in intel-family.h.

I obviously disagree; these are not hybrid and calling them both
CRESTMONT makes *far* more sense than the random gibberish they're
called now.

Yes, hybrid made a complete mess of things (in many ways), but we should
then not do the obvious correct thing for when we can.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-06 18:17       ` Peter Zijlstra
@ 2023-06-06 18:34         ` Liang, Kan
  2023-06-06 19:37           ` Peter Zijlstra
  0 siblings, 1 reply; 26+ messages in thread
From: Liang, Kan @ 2023-06-06 18:34 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang



On 2023-06-06 2:17 p.m., Peter Zijlstra wrote:
> On Tue, Jun 06, 2023 at 12:16:29PM -0400, Liang, Kan wrote:
> 
>>> names for a reason, so that enums like the above become something
>>> sensible like:
>>>
>>> 	case INTEL_FAM6_ATOM_CRESTMONT:
>>> 	case INTEL_FAM6_ATOM_CRESTMONT_X:
>>>
>>> and now it's super obvious why they're grouped.
>>>
>>>>> +		pr_cont("Crestmont events, ");
>>
>> The Sierra Forest should not be a platform name. I think it's the code
>> name of the processor.
>>
>> The problem is that the uarch name doesn't work for the hybrid, since it
>> has different uarchs in the same processors. To make the naming rules
>> consistent among big core, atom, and hybrid, maybe we should use the
>> code name of the processor in intel-family.h.
> 
> I obviously disagree; these are not hybrid and calling them both
> CRESTMONT makes *far* more sense than the random gibberish they're
> called now.
> 
> Yes, hybrid made a complete mess of things (in many ways), but we should
> then not do the obvious correct thing for when we can.

Besides hybrid, it seems there is a bigger problem for the big core.

The big core uses the processor code name since Ice Lake. In the perf
code, we also uses the processor code name for the big core.
	pr_cont("Icelake events, ");

Is it OK to leave the big core as is (using processor code name), but
only change the name for Grand Ridge and Sierra Forest?

Thanks,
Kan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-06 18:34         ` Liang, Kan
@ 2023-06-06 19:37           ` Peter Zijlstra
  2023-06-06 19:54             ` Liang, Kan
  0 siblings, 1 reply; 26+ messages in thread
From: Peter Zijlstra @ 2023-06-06 19:37 UTC (permalink / raw)
  To: Liang, Kan
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang

On Tue, Jun 06, 2023 at 02:34:47PM -0400, Liang, Kan wrote:
> 
> 
> On 2023-06-06 2:17 p.m., Peter Zijlstra wrote:
> > On Tue, Jun 06, 2023 at 12:16:29PM -0400, Liang, Kan wrote:
> > 
> >>> names for a reason, so that enums like the above become something
> >>> sensible like:
> >>>
> >>> 	case INTEL_FAM6_ATOM_CRESTMONT:
> >>> 	case INTEL_FAM6_ATOM_CRESTMONT_X:
> >>>
> >>> and now it's super obvious why they're grouped.
> >>>
> >>>>> +		pr_cont("Crestmont events, ");
> >>
> >> The Sierra Forest should not be a platform name. I think it's the code
> >> name of the processor.
> >>
> >> The problem is that the uarch name doesn't work for the hybrid, since it
> >> has different uarchs in the same processors. To make the naming rules
> >> consistent among big core, atom, and hybrid, maybe we should use the
> >> code name of the processor in intel-family.h.
> > 
> > I obviously disagree; these are not hybrid and calling them both
> > CRESTMONT makes *far* more sense than the random gibberish they're
> > called now.
> > 
> > Yes, hybrid made a complete mess of things (in many ways), but we should
> > then not do the obvious correct thing for when we can.
> 
> Besides hybrid, it seems there is a bigger problem for the big core.
> 
> The big core uses the processor code name since Ice Lake. In the perf
> code, we also uses the processor code name for the big core.
> 	pr_cont("Icelake events, ");

Yeah, it's a mess :/ Ideally that would print "Sunny Cove", but I think
there's userspace looking at that string :-(

> Is it OK to leave the big core as is (using processor code name), but
> only change the name for Grand Ridge and Sierra Forest?

Arguably we should also rename ALDERLAKE_N to ATOM_GRACEMONT


We should also do something about that whole hybrid init thing, the
meteorlake stuff is quite a mess as well. Perhaps we can add hybrid_pmu
next to intel_pmu to have a better start in life for x86_pmu.

Anyway, we should really try not to make a bigger mess and try and clean
up where we can.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-06 19:37           ` Peter Zijlstra
@ 2023-06-06 19:54             ` Liang, Kan
  0 siblings, 0 replies; 26+ messages in thread
From: Liang, Kan @ 2023-06-06 19:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang



On 2023-06-06 3:37 p.m., Peter Zijlstra wrote:
> On Tue, Jun 06, 2023 at 02:34:47PM -0400, Liang, Kan wrote:
>>
>>
>> On 2023-06-06 2:17 p.m., Peter Zijlstra wrote:
>>> On Tue, Jun 06, 2023 at 12:16:29PM -0400, Liang, Kan wrote:
>>>
>>>>> names for a reason, so that enums like the above become something
>>>>> sensible like:
>>>>>
>>>>> 	case INTEL_FAM6_ATOM_CRESTMONT:
>>>>> 	case INTEL_FAM6_ATOM_CRESTMONT_X:
>>>>>
>>>>> and now it's super obvious why they're grouped.
>>>>>
>>>>>>> +		pr_cont("Crestmont events, ");
>>>>
>>>> The Sierra Forest should not be a platform name. I think it's the code
>>>> name of the processor.
>>>>
>>>> The problem is that the uarch name doesn't work for the hybrid, since it
>>>> has different uarchs in the same processors. To make the naming rules
>>>> consistent among big core, atom, and hybrid, maybe we should use the
>>>> code name of the processor in intel-family.h.
>>>
>>> I obviously disagree; these are not hybrid and calling them both
>>> CRESTMONT makes *far* more sense than the random gibberish they're
>>> called now.
>>>
>>> Yes, hybrid made a complete mess of things (in many ways), but we should
>>> then not do the obvious correct thing for when we can.
>>
>> Besides hybrid, it seems there is a bigger problem for the big core.
>>
>> The big core uses the processor code name since Ice Lake. In the perf
>> code, we also uses the processor code name for the big core.
>> 	pr_cont("Icelake events, ");
> 
> Yeah, it's a mess :/ Ideally that would print "Sunny Cove", but I think
> there's userspace looking at that string :-(

Yes, for the existing names, we probably cannot change it. I will try to
only use the micro-architecture name for the future platforms in perf.

> 
>> Is it OK to leave the big core as is (using processor code name), but
>> only change the name for Grand Ridge and Sierra Forest?
> 
> Arguably we should also rename ALDERLAKE_N to ATOM_GRACEMONT
> 
> 
> We should also do something about that whole hybrid init thing, the
> meteorlake stuff is quite a mess as well. Perhaps we can add hybrid_pmu
> next to intel_pmu to have a better start in life for x86_pmu.
>

I will think about it and try to clean up the hybrid init.

> Anyway, we should really try not to make a bigger mess and try and clean
> up where we can.

Sure.

Thanks,
Kan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-05-22 20:26 ` [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest Peter Zijlstra
  2023-05-22 20:42   ` Luck, Tony
@ 2023-06-07 21:43   ` Luck, Tony
  2023-06-08  7:24     ` Peter Zijlstra
  1 sibling, 1 reply; 26+ messages in thread
From: Luck, Tony @ 2023-06-07 21:43 UTC (permalink / raw)
  To: Peter Zijlstra, kan.liang
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, Hunter, Adrian, ak, Eranian, Stephane,
	alexey.v.bayduraev, Zhang, Tinghao

> > Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
>
> Moo... Tony, did you sneak product names instead of uarch names in the
> intel-family thing again?
>
> That is; I'm thinking we want the below, no?
>
> -#define INTEL_FAM6_SIERRAFOREST_X    0xAF
> -
> -#define INTEL_FAM6_GRANDRIDGE                0xB6
> +#define INTEL_FAM6_ATOM_CRESTMONT_X  0xAF /* Sierra Forest */
> +#define INTEL_FAM6_ATOM_CRESTMONT    0xB6 /* Grand Ridge */

I don't think that's really any more helpful.

Using the code name of the model makes it easy to look things
up in ark.intel.com. Using the "core" name doesn't even work for
hybrid CPU models which have more than one core type.
So I'd like to keep it as it is.

But if you want to change to the core name, then please just
do it now.

There are folks internally worried that all upstream work for
these two CPU models is going to be blocked while this
is discussed.

-Tony

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-07 21:43   ` Luck, Tony
@ 2023-06-08  7:24     ` Peter Zijlstra
  2023-06-08 16:20       ` Luck, Tony
  0 siblings, 1 reply; 26+ messages in thread
From: Peter Zijlstra @ 2023-06-08  7:24 UTC (permalink / raw)
  To: Luck, Tony
  Cc: kan.liang, mingo, acme, linux-kernel, mark.rutland,
	alexander.shishkin, jolsa, namhyung, irogers, Hunter, Adrian, ak,
	Eranian, Stephane, alexey.v.bayduraev, Zhang, Tinghao

On Wed, Jun 07, 2023 at 09:43:57PM +0000, Luck, Tony wrote:
> > > Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.
> >
> > Moo... Tony, did you sneak product names instead of uarch names in the
> > intel-family thing again?
> >
> > That is; I'm thinking we want the below, no?
> >
> > -#define INTEL_FAM6_SIERRAFOREST_X    0xAF
> > -
> > -#define INTEL_FAM6_GRANDRIDGE                0xB6
> > +#define INTEL_FAM6_ATOM_CRESTMONT_X  0xAF /* Sierra Forest */
> > +#define INTEL_FAM6_ATOM_CRESTMONT    0xB6 /* Grand Ridge */
> 
> I don't think that's really any more helpful.

Well, it clearly shows why these two are grouped together. They are the
same bloody uarch. OTOH 'Sierra Forest' and 'Grand Ridge' is just random
gibberish that nobody can relate without looking up some website.

> Using the code name of the model makes it easy to look things
> up in ark.intel.com. Using the "core" name doesn't even work for
> hybrid CPU models which have more than one core type.
> So I'd like to keep it as it is.

ark.intel.com is a travesty anyway... if I get it as a result to a
Google query I will almost always avoid it because it is not useful.

Wikipedia and Wikichip are by far more useful.

> But if you want to change to the core name, then please just
> do it now.
> 
> There are folks internally worried that all upstream work for
> these two CPU models is going to be blocked while this
> is discussed.

Then I'm hoping their take-away is that random gibberish names don't
help anybody. The whole Intel naming scheme is impenetrable crap.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-08  7:24     ` Peter Zijlstra
@ 2023-06-08 16:20       ` Luck, Tony
  2023-06-29 22:39         ` Tony Luck
  2023-08-02 15:01         ` Peter Zijlstra
  0 siblings, 2 replies; 26+ messages in thread
From: Luck, Tony @ 2023-06-08 16:20 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: kan.liang, mingo, acme, linux-kernel, mark.rutland,
	alexander.shishkin, jolsa, namhyung, irogers, Hunter, Adrian, ak,
	Eranian, Stephane, alexey.v.bayduraev, Zhang, Tinghao

> Then I'm hoping their take-away is that random gibberish names don't
> help anybody. The whole Intel naming scheme is impenetrable crap.

> > +#define INTEL_FAM6_ATOM_CRESTMONT_X  0xAF /* Sierra Forest */
> > +#define INTEL_FAM6_ATOM_CRESTMONT    0xB6 /* Grand Ridge */

This just adds another layer of confusion. Sure, these two models are based
on the same core. But giving the illusion that they are somehow the same will
lead to tears before bedtime:

1) They each took a snapshot of that core design on different dates, so there
   are logic differences.
2) Feature fuses will be different
3) Microcode will be different
4) BIOS will be different
5) "uncore" is different, so anything implemented outside of the core
    will be different.

-Tony



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-08 16:20       ` Luck, Tony
@ 2023-06-29 22:39         ` Tony Luck
  2023-08-02 15:01         ` Peter Zijlstra
  1 sibling, 0 replies; 26+ messages in thread
From: Tony Luck @ 2023-06-29 22:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: kan.liang, mingo, acme, linux-kernel, mark.rutland,
	alexander.shishkin, jolsa, namhyung, irogers, Hunter, Adrian, ak,
	Eranian, Stephane, alexey.v.bayduraev, Zhang, Tinghao

On Thu, Jun 08, 2023 at 04:20:22PM +0000, Luck, Tony wrote:
> > Then I'm hoping their take-away is that random gibberish names don't
> > help anybody. The whole Intel naming scheme is impenetrable crap.
> 
> > > +#define INTEL_FAM6_ATOM_CRESTMONT_X  0xAF /* Sierra Forest */
> > > +#define INTEL_FAM6_ATOM_CRESTMONT    0xB6 /* Grand Ridge */
> 
> This just adds another layer of confusion. Sure, these two models are based
> on the same core. But giving the illusion that they are somehow the same will
> lead to tears before bedtime:
> 
> 1) They each took a snapshot of that core design on different dates, so there
>    are logic differences.
> 2) Feature fuses will be different
> 3) Microcode will be different
> 4) BIOS will be different
> 5) "uncore" is different, so anything implemented outside of the core
>     will be different.

This thread stalled. But the internal conversation continued. There
seems a strong argument that enough things changed when Xeon-izing
the core to go into Sierra Forest that using Crestmont will cause
confusion in more places than it helps. There seem to be some internal
folks using an entirely different name for this core (which I won't
repeat here, but some of the usual external sites have mentions of
this other name).

Can we just keep:

#define INTEL_FAM6_SIERRAFOREST_X       0xAF

and move on to more interesting things?

-Tony

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest
  2023-06-08 16:20       ` Luck, Tony
  2023-06-29 22:39         ` Tony Luck
@ 2023-08-02 15:01         ` Peter Zijlstra
  1 sibling, 0 replies; 26+ messages in thread
From: Peter Zijlstra @ 2023-08-02 15:01 UTC (permalink / raw)
  To: Luck, Tony
  Cc: kan.liang, mingo, acme, linux-kernel, mark.rutland,
	alexander.shishkin, jolsa, namhyung, irogers, Hunter, Adrian, ak,
	Eranian, Stephane, alexey.v.bayduraev, Zhang, Tinghao

On Thu, Jun 08, 2023 at 04:20:22PM +0000, Luck, Tony wrote:
> > Then I'm hoping their take-away is that random gibberish names don't
> > help anybody. The whole Intel naming scheme is impenetrable crap.
> 
> > > +#define INTEL_FAM6_ATOM_CRESTMONT_X  0xAF /* Sierra Forest */
> > > +#define INTEL_FAM6_ATOM_CRESTMONT    0xB6 /* Grand Ridge */
> 
> This just adds another layer of confusion. Sure, these two models are based
> on the same core. But giving the illusion that they are somehow the same will
> lead to tears before bedtime:
> 
> 1) They each took a snapshot of that core design on different dates, so there
>    are logic differences.
> 2) Feature fuses will be different
> 3) Microcode will be different
> 4) BIOS will be different
> 5) "uncore" is different, so anything implemented outside of the core
>     will be different.

All those things are true for INTEL_FAM6_SKYLAKE vs INTEL_FAM6_SKYLAKE_X
and all the other pre-hybrid desktop/server parts.

And we used to do the same with the previous ATOM things, see GOLDMONT /
GOLDMONT_D, TREMONT / TREMONT_D etc..

So why should this atom be treated differently? We get a server atom and
a client atom, yes they different in all the usual way, but they still
more similar to one another than to any other random chip we have.

In short, we used to have this for core parts, we used to have this for
atom parts, but now we magically need to break from it?

Anyway, let me do the rename and squish everything into a git tree.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 2/6] perf: Add branch stack extension
  2023-05-22 11:30 ` [PATCH V2 2/6] perf: Add branch stack extension kan.liang
  2023-05-23  6:03   ` Sandipan Das
@ 2023-08-02 21:58   ` Peter Zijlstra
  2023-08-03 14:22     ` Liang, Kan
  1 sibling, 1 reply; 26+ messages in thread
From: Peter Zijlstra @ 2023-08-02 21:58 UTC (permalink / raw)
  To: kan.liang
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang, Sandipan Das, Ravi Bangoria,
	Athira Rajeev

On Mon, May 22, 2023 at 04:30:36AM -0700, kan.liang@linux.intel.com wrote:
> From: Kan Liang <kan.liang@linux.intel.com>
> 
> Currently, the extra information of a branch entry is stored in a u64
> space. With more and more information added, the space is running out.
> For example, the information of occurrences of events will be added for
> each branch.
> 
> Add an extension space to record the new information for each branch
> entry. The space is appended after the struct perf_branch_stack.
> 
> Add a bit in struct perf_branch_entry to indicate whether the extra
> information is included.
> 
> Reviewed-by: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> Cc: Sandipan Das <sandipan.das@amd.com>
> Cc: Ravi Bangoria <ravi.bangoria@amd.com>
> Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> ---
> 
> New patch
> - Introduce a generic extension space which can be used to
>   store the LBR event information for Intel. It can also be used by
>   other ARCHs for the other purpose.
> - Add a new bit in struct perf_branch_entry to indicate whether the
>   extra information is included.

Bah.. I don't like this, also the actual format isn't clear to me.

The uapi part is severely lacking, it just adds the ext:1 thing, but
doesn't describe what if anything happens when it's set.

The internal perf_branch_stack_ext thing is just that, internal.
Additionally it contains a nr member, which seems to suggest it can be
different from the number of entries in the branch-stack itself -- which
would be odd indeed.

So we have an 'ext' bit per branch entry to indicate the existance of
this extra data, this again suggests no 1:1 correspondence, but at most
one extra entry per set bit.

Parsing this will be pretty horrible, no?

So what we have now is:

	{ u64			nr;
	  { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
	  { u64 from, to, flags; } lbr[nr];
	} && PERF_SAMPLE_BRANCH_STACK

and AFAICT you're doing:

	{ u64			nr;
	  { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
	  { u64 from, to, flags; } lbr[nr];
+	  { u64	nr2;
+	    { u64 extra; } extra[nr2];
+         } && OR_i{lbr[i].flags.ext}
	} && PERF_SAMPLE_BRANCH_STACK

Which is pretty horrific, no? The straight forward:

	{ u64			nr;
	  { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
	  { u64 from, to, flags; } lbr[nr];
+	  { u64 extra; } ext[nr] && SOMETHING
	} && PERF_SAMPLE_BRANCH_STACK

Or perhaps even:

	{ u64			nr;
	  { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
	  { u64 from, to, flags; 
+	    u64 extra; && SOMETHING
	  } lbr[nr];
	} && PERF_SAMPLE_BRANCH_STACK

With the obvious question what 'SOMETHING' should be. I suppose
PERF_SAMPLE_BRANCH_EXTRA was considered and discarded?

Implementing the last suggestion wouldn't even be too bad, since having
PERF_SAMPLE_BRANCH_EXTRA set, we know to allocate and cast the existing
perf_sample_data::br_stack to a convenient new type, something like:

struct perf_branch_entry_ext {
	__u64	from;
	__u64	to;
	__u64	mispred:1,  /* target mispredicted */
		predicted:1,/* target predicted */
		in_tx:1,    /* in transaction */
		abort:1,    /* transaction abort */
		cycles:16,  /* cycle count to last branch */
		type:4,     /* branch type */
		spec:2,     /* branch speculation info */
		new_type:4, /* additional branch type */
		priv:3,     /* privilege level */
		reserved:31;
	__u64	extra;
};

Except at that point I think I would suggest doing s/EXTRA/COUNTERS/g
and making it something like:

	union {
		__u64	counters;
		__u8 	c[8];
	};

Or something daft like that.

Wouldn't all that make *MUCH* more sense?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V2 2/6] perf: Add branch stack extension
  2023-08-02 21:58   ` Peter Zijlstra
@ 2023-08-03 14:22     ` Liang, Kan
  0 siblings, 0 replies; 26+ messages in thread
From: Liang, Kan @ 2023-08-03 14:22 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, acme, linux-kernel, mark.rutland, alexander.shishkin,
	jolsa, namhyung, irogers, adrian.hunter, ak, eranian,
	alexey.v.bayduraev, tinghao.zhang, Sandipan Das, Ravi Bangoria,
	Athira Rajeev



On 2023-08-02 5:58 p.m., Peter Zijlstra wrote:
> On Mon, May 22, 2023 at 04:30:36AM -0700, kan.liang@linux.intel.com wrote:
>> From: Kan Liang <kan.liang@linux.intel.com>
>>
>> Currently, the extra information of a branch entry is stored in a u64
>> space. With more and more information added, the space is running out.
>> For example, the information of occurrences of events will be added for
>> each branch.
>>
>> Add an extension space to record the new information for each branch
>> entry. The space is appended after the struct perf_branch_stack.
>>
>> Add a bit in struct perf_branch_entry to indicate whether the extra
>> information is included.
>>
>> Reviewed-by: Andi Kleen <ak@linux.intel.com>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> Cc: Sandipan Das <sandipan.das@amd.com>
>> Cc: Ravi Bangoria <ravi.bangoria@amd.com>
>> Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> ---
>>
>> New patch
>> - Introduce a generic extension space which can be used to
>>   store the LBR event information for Intel. It can also be used by
>>   other ARCHs for the other purpose.
>> - Add a new bit in struct perf_branch_entry to indicate whether the
>>   extra information is included.
> 
> Bah.. I don't like this, also the actual format isn't clear to me.
> 
> The uapi part is severely lacking, it just adds the ext:1 thing, but
> doesn't describe what if anything happens when it's set.
> 
> The internal perf_branch_stack_ext thing is just that, internal.
> Additionally it contains a nr member, which seems to suggest it can be
> different from the number of entries in the branch-stack itself -- which
> would be odd indeed.
> 
> So we have an 'ext' bit per branch entry to indicate the existance of
> this extra data, this again suggests no 1:1 correspondence, but at most
> one extra entry per set bit.
> 
> Parsing this will be pretty horrible, no?
> 
> So what we have now is:
> 
> 	{ u64			nr;
> 	  { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
> 	  { u64 from, to, flags; } lbr[nr];
> 	} && PERF_SAMPLE_BRANCH_STACK
> 
> and AFAICT you're doing:
> 
> 	{ u64			nr;
> 	  { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
> 	  { u64 from, to, flags; } lbr[nr];
> +	  { u64	nr2;
> +	    { u64 extra; } extra[nr2];
> +         } && OR_i{lbr[i].flags.ext}
> 	} && PERF_SAMPLE_BRANCH_STACK
> 
> Which is pretty horrific, no? The straight forward:

I just tried to make the interface more flexible, since I had no idea
how other ARCHs would use the extra space. But it seems such flexibility
is not necessary. It is indeed not easy to be parsed.

> 
> 	{ u64			nr;
> 	  { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
> 	  { u64 from, to, flags; } lbr[nr];
> +	  { u64 extra; } ext[nr] && SOMETHING
> 	} && PERF_SAMPLE_BRANCH_STACK
> 
> Or perhaps even:
> 
> 	{ u64			nr;
> 	  { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
> 	  { u64 from, to, flags; 
> +	    u64 extra; && SOMETHING
> 	  } lbr[nr];
> 	} && PERF_SAMPLE_BRANCH_STACK
> 
> With the obvious question what 'SOMETHING' should be. I suppose
> PERF_SAMPLE_BRANCH_EXTRA was considered and discarded?

Yes, it's considered. I once tried to reuse the existing space/structure
as much as possible. So it's dropped.

Other than that, using a new sample type as an indicator should be a
better way and much straight forward. I will use it in V3.

> 
> Implementing the last suggestion wouldn't even be too bad, since having
> PERF_SAMPLE_BRANCH_EXTRA set, we know to allocate and cast the existing
> perf_sample_data::br_stack to a convenient new type, something like:
> 
> struct perf_branch_entry_ext {
> 	__u64	from;
> 	__u64	to;
> 	__u64	mispred:1,  /* target mispredicted */
> 		predicted:1,/* target predicted */
> 		in_tx:1,    /* in transaction */
> 		abort:1,    /* transaction abort */
> 		cycles:16,  /* cycle count to last branch */
> 		type:4,     /* branch type */
> 		spec:2,     /* branch speculation info */
> 		new_type:4, /* additional branch type */
> 		priv:3,     /* privilege level */
> 		reserved:31;
> 	__u64	extra;
> };
> 
> Except at that point I think I would suggest doing s/EXTRA/COUNTERS/g
> and making it something like:
> 
> 	union {
> 		__u64	counters;
> 		__u8 	c[8];
> 	};
> 

It's good enough for this feature and Intel LBR.
My only concern is that it's only a 64 bit extra space. If we need more
space later, we have to keep adding perf_branch_entry_ext2 and
PERF_SAMPLE_BRANCH_EXTRA2. But I don't have such use case now. Maybe I'm
just too paranoid. :)

I will use the suggested structure in V3. If anyone has other concerns,
we can discuss them from there.

Thanks,
Kan

> Or something daft like that.
> 
> Wouldn't all that make *MUCH* more sense?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [tip: perf/core] perf/x86/intel: Add Crestmont PMU
  2023-05-22 11:30 [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest kan.liang
                   ` (6 preceding siblings ...)
  2023-06-06 12:42 ` Liang, Kan
@ 2023-08-09 20:04 ` tip-bot2 for Kan Liang
  7 siblings, 0 replies; 26+ messages in thread
From: tip-bot2 for Kan Liang @ 2023-08-09 20:04 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Kan Liang, Peter Zijlstra (Intel), Andi Kleen, x86, linux-kernel

The following commit has been merged into the perf/core branch of tip:

Commit-ID:     a430021faad6b4fa86c820fc3e7f8dbfc2f14fb4
Gitweb:        https://git.kernel.org/tip/a430021faad6b4fa86c820fc3e7f8dbfc2f14fb4
Author:        Kan Liang <kan.liang@linux.intel.com>
AuthorDate:    Mon, 22 May 2023 04:30:35 -07:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 09 Aug 2023 21:51:07 +02:00

perf/x86/intel: Add Crestmont PMU

The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
both have Crestmont core. From the core PMU's perspective, they are
similar to the e-core of MTL. The only difference is the LBR event
logging feature, which will be implemented in the following patches.

Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/20230522113040.2329924-1-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 52 ++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/ds.c   |  9 ++++--
 arch/x86/events/perf_event.h |  2 +-
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index c7e7ed6..64a3533 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2129,6 +2129,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
+EVENT_ATTR_STR(topdown-retiring,       td_retiring_cmt,        "event=0x72,umask=0x0");
+EVENT_ATTR_STR(topdown-bad-spec,       td_bad_spec_cmt,        "event=0x73,umask=0x0");
+
+static struct attribute *cmt_events_attrs[] = {
+	EVENT_PTR(td_fe_bound_tnt),
+	EVENT_PTR(td_retiring_cmt),
+	EVENT_PTR(td_bad_spec_cmt),
+	EVENT_PTR(td_be_bound_tnt),
+	NULL
+};
+
 static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
@@ -4840,6 +4851,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");
 
 PMU_FORMAT_ATTR(frontend, "config1:0-23");
 
+PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
+
 static struct attribute *intel_arch3_formats_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_umask.attr,
@@ -4870,6 +4883,13 @@ static struct attribute *slm_format_attr[] = {
 	NULL
 };
 
+static struct attribute *cmt_format_attr[] = {
+	&format_attr_offcore_rsp.attr,
+	&format_attr_ldlat.attr,
+	&format_attr_snoop_rsp.attr,
+	NULL
+};
+
 static struct attribute *skl_format_attr[] = {
 	&format_attr_frontend.attr,
 	NULL,
@@ -5649,7 +5669,6 @@ static struct attribute *adl_hybrid_extra_attr[] = {
 	NULL
 };
 
-PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
 FORMAT_ATTR_HYBRID(snoop_rsp,	hybrid_small);
 
 static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
@@ -6197,6 +6216,37 @@ __init int intel_pmu_init(void)
 		name = "gracemont";
 		break;
 
+	case INTEL_FAM6_ATOM_CRESTMONT:
+	case INTEL_FAM6_ATOM_CRESTMONT_X:
+		x86_pmu.mid_ack = true;
+		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_cmt_extra_regs;
+
+		x86_pmu.pebs_aliases = NULL;
+		x86_pmu.pebs_prec_dist = true;
+		x86_pmu.lbr_pt_coexist = true;
+		x86_pmu.pebs_block = true;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+		intel_pmu_pebs_data_source_cmt();
+		x86_pmu.pebs_latency_data = mtl_latency_data_small;
+		x86_pmu.get_event_constraints = cmt_get_event_constraints;
+		x86_pmu.limit_period = spr_limit_period;
+		td_attr = cmt_events_attrs;
+		mem_attr = grt_mem_attrs;
+		extra_attr = cmt_format_attr;
+		pr_cont("Crestmont events, ");
+		name = "crestmont";
+		break;
+
 	case INTEL_FAM6_WESTMERE:
 	case INTEL_FAM6_WESTMERE_EP:
 	case INTEL_FAM6_WESTMERE_EX:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index df88576..eb8dd8b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void)
 	__intel_pmu_pebs_data_source_grt(data_source);
 }
 
-static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
+static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
 {
 	data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
 	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
@@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void)
 
 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
-	intel_pmu_pebs_data_source_cmt(data_source);
+	__intel_pmu_pebs_data_source_cmt(data_source);
+}
+
+void __init intel_pmu_pebs_data_source_cmt(void)
+{
+	__intel_pmu_pebs_data_source_cmt(pebs_data_source);
 }
 
 static u64 precise_store_data(u64 status)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index d6de448..c8ba2be 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void);
 
 void intel_pmu_pebs_data_source_mtl(void);
 
+void intel_pmu_pebs_data_source_cmt(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);

^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2023-08-09 20:05 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-22 11:30 [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest kan.liang
2023-05-22 11:30 ` [PATCH V2 2/6] perf: Add branch stack extension kan.liang
2023-05-23  6:03   ` Sandipan Das
2023-05-23 13:08     ` Liang, Kan
2023-08-02 21:58   ` Peter Zijlstra
2023-08-03 14:22     ` Liang, Kan
2023-05-22 11:30 ` [PATCH V2 3/6] perf: Support branch events kan.liang
2023-05-22 11:30 ` [PATCH V2 4/6] perf/x86/intel: Support LBR event logging kan.liang
2023-05-22 11:30 ` [PATCH V2 5/6] tools headers UAPI: Sync include/uapi/linux/perf_event.h header with the kernel kan.liang
2023-05-22 11:30 ` [PATCH V2 6/6] perf tools: Add branch event knob kan.liang
2023-05-22 20:26 ` [PATCH V2 1/6] perf/x86/intel: Add Grand Ridge and Sierra Forest Peter Zijlstra
2023-05-22 20:42   ` Luck, Tony
2023-05-22 20:48     ` Peter Zijlstra
2023-06-07 21:43   ` Luck, Tony
2023-06-08  7:24     ` Peter Zijlstra
2023-06-08 16:20       ` Luck, Tony
2023-06-29 22:39         ` Tony Luck
2023-08-02 15:01         ` Peter Zijlstra
2023-06-06 12:42 ` Liang, Kan
2023-06-06 13:24   ` Peter Zijlstra
2023-06-06 16:16     ` Liang, Kan
2023-06-06 18:17       ` Peter Zijlstra
2023-06-06 18:34         ` Liang, Kan
2023-06-06 19:37           ` Peter Zijlstra
2023-06-06 19:54             ` Liang, Kan
2023-08-09 20:04 ` [tip: perf/core] perf/x86/intel: Add Crestmont PMU tip-bot2 for Kan Liang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).