Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/4] Add support for ARMv8.3-SPE
@ 2020-07-24  9:16 Wei Li
  2020-07-24  9:16 ` [PATCH 1/4] drivers/perf: " Wei Li
                   ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: Wei Li @ 2020-07-24  9:16 UTC (permalink / raw)
  To: Adrian Hunter, Alexander Shishkin, Arnaldo Carvalho de Melo,
	Catalin Marinas, James Clark, Jiri Olsa, Leo Yan, Mark Rutland,
	Namhyung Kim, Suzuki K Poulose, Will Deacon, zhangshaokun
  Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, linux-arm-kernel, guohanjun

The ARMv8.3-SPE adds an Alignment Flag in the Events packet and filtering
on this event using PMSEVFR_EL1, together with support for the profiling
of Scalable Vector Extension operations.

Patch 1: Update the kernel driver, mainly for PMSEVFR_EL1.

Patch 2: Update the decode process of Events packet and Operation Type
  packet in perf tool.

Patch 3-4: Synthesize unaligned address access events and partial/empty
  predicated SVE events, also add two itrace options for filtering.

Wei Li (4):
  drivers/perf: Add support for ARMv8.3-SPE
  perf: arm-spe: Add support for ARMv8.3-SPE
  perf auxtrace: Add new itrace options for ARMv8.3-SPE
  perf: arm-spe: Synthesize new events for ARMv8.3-SPE

 arch/arm64/include/asm/sysreg.h               |  4 +-
 drivers/perf/arm_spe_pmu.c                    | 18 +++--
 tools/perf/Documentation/itrace.txt           |  2 +
 .../util/arm-spe-decoder/arm-spe-decoder.c    | 11 +++
 .../util/arm-spe-decoder/arm-spe-decoder.h    |  3 +
 .../arm-spe-decoder/arm-spe-pkt-decoder.c     | 69 ++++++++++++++++++-
 tools/perf/util/arm-spe.c                     | 61 ++++++++++++++++
 tools/perf/util/auxtrace.c                    |  8 +++
 tools/perf/util/auxtrace.h                    |  4 ++
 9 files changed, 173 insertions(+), 7 deletions(-)

-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/4] drivers/perf: Add support for ARMv8.3-SPE
  2020-07-24  9:16 [PATCH 0/4] Add support for ARMv8.3-SPE Wei Li
@ 2020-07-24  9:16 ` Wei Li
  2020-07-28 12:27   ` Leo Yan
  2020-07-29  9:12   ` Suzuki K Poulose
  2020-07-24  9:16 ` [PATCH 2/4] perf: arm-spe: " Wei Li
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 19+ messages in thread
From: Wei Li @ 2020-07-24  9:16 UTC (permalink / raw)
  To: Adrian Hunter, Alexander Shishkin, Arnaldo Carvalho de Melo,
	Catalin Marinas, James Clark, Jiri Olsa, Leo Yan, Mark Rutland,
	Namhyung Kim, Suzuki K Poulose, Will Deacon, zhangshaokun
  Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, linux-arm-kernel, guohanjun

Armv8.3 extends the SPE by adding:
- Alignment field in the Events packet, and filtering on this event
  using PMSEVFR_EL1.
- Support for the Scalable Vector Extension (SVE).

The main additions for SVE are:
- Recording the vector length for SVE operations in the Operation Type
  packet. It is not possible to filter on vector length.
- Incomplete predicate and empty predicate fields in the Events packet,
  and filtering on these events using PMSEVFR_EL1.

Update the check of pmsevfr for empty/partial predicated SVE and
alignment event in kernel driver.

Signed-off-by: Wei Li <liwei391@huawei.com>
---
 arch/arm64/include/asm/sysreg.h |  4 +++-
 drivers/perf/arm_spe_pmu.c      | 18 ++++++++++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 463175f80341..be4c44ccdb56 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -281,7 +281,6 @@
 #define SYS_PMSFCR_EL1_ST_SHIFT		18
 
 #define SYS_PMSEVFR_EL1			sys_reg(3, 0, 9, 9, 5)
-#define SYS_PMSEVFR_EL1_RES0		0x0000ffff00ff0f55UL
 
 #define SYS_PMSLATFR_EL1		sys_reg(3, 0, 9, 9, 6)
 #define SYS_PMSLATFR_EL1_MINLAT_SHIFT	0
@@ -769,6 +768,9 @@
 #define ID_AA64DFR0_PMUVER_8_5		0x6
 #define ID_AA64DFR0_PMUVER_IMP_DEF	0xf
 
+#define ID_AA64DFR0_PMSVER_8_2		0x1
+#define ID_AA64DFR0_PMSVER_8_3		0x2
+
 #define ID_DFR0_PERFMON_SHIFT		24
 
 #define ID_DFR0_PERFMON_8_1		0x4
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index e51ddb6d63ed..5ec7ee0c8fa1 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -54,7 +54,7 @@ struct arm_spe_pmu {
 	struct hlist_node			hotplug_node;
 
 	int					irq; /* PPI */
-
+	int					pmuver;
 	u16					min_period;
 	u16					counter_sz;
 
@@ -80,6 +80,15 @@ struct arm_spe_pmu {
 /* Keep track of our dynamic hotplug state */
 static enum cpuhp_state arm_spe_pmu_online;
 
+static u64 sys_pmsevfr_el1_mask[] = {
+	[ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
+		GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
+		BIT_ULL(1),
+	[ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
+		GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
+		BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
+};
+
 enum arm_spe_pmu_buf_fault_action {
 	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
 	SPE_PMU_BUF_FAULT_ACT_FATAL,
@@ -670,7 +679,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
 	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
 		return -ENOENT;
 
-	if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
+	if (arm_spe_event_to_pmsevfr(event) & ~sys_pmsevfr_el1_mask[spe_pmu->pmuver])
 		return -EOPNOTSUPP;
 
 	if (attr->exclude_idle)
@@ -937,6 +946,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
 			fld, smp_processor_id());
 		return;
 	}
+	spe_pmu->pmuver = fld;
 
 	/* Read PMBIDR first to determine whether or not we have access */
 	reg = read_sysreg_s(SYS_PMBIDR_EL1);
@@ -1027,8 +1037,8 @@ static void __arm_spe_pmu_dev_probe(void *info)
 	}
 
 	dev_info(dev,
-		 "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
-		 cpumask_pr_args(&spe_pmu->supported_cpus),
+		 "v%d probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
+		 spe_pmu->pmuver, cpumask_pr_args(&spe_pmu->supported_cpus),
 		 spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features);
 
 	spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED;
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 2/4] perf: arm-spe: Add support for ARMv8.3-SPE
  2020-07-24  9:16 [PATCH 0/4] Add support for ARMv8.3-SPE Wei Li
  2020-07-24  9:16 ` [PATCH 1/4] drivers/perf: " Wei Li
@ 2020-07-24  9:16 ` Wei Li
  2020-07-29  6:29   ` Leo Yan
  2020-07-24  9:16 ` [PATCH 3/4] perf auxtrace: Add new itrace options " Wei Li
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 19+ messages in thread
From: Wei Li @ 2020-07-24  9:16 UTC (permalink / raw)
  To: Adrian Hunter, Alexander Shishkin, Arnaldo Carvalho de Melo,
	Catalin Marinas, James Clark, Jiri Olsa, Leo Yan, Mark Rutland,
	Namhyung Kim, Suzuki K Poulose, Will Deacon, zhangshaokun
  Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, linux-arm-kernel, guohanjun

Armv8.3 extends the SPE by adding:
- Alignment field in the Events packet, and filtering on this event
  using PMSEVFR_EL1.
- Support for the Scalable Vector Extension (SVE).

The main additions for SVE are:
- Recording the vector length for SVE operations in the Operation Type
  packet. It is not possible to filter on vector length.
- Incomplete predicate and empty predicate fields in the Events packet,
  and filtering on these events using PMSEVFR_EL1.

Add the corresponding decode process of Events packet and Operation Type
packet in perf tool.

Signed-off-by: Wei Li <liwei391@huawei.com>
---
 .../arm-spe-decoder/arm-spe-pkt-decoder.c     | 69 ++++++++++++++++++-
 1 file changed, 67 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index b94001b756c7..10a3692839de 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -347,6 +347,24 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 				blen -= ret;
 			}
 		}
+		if (idx > 2) {
+			if (payload & 0x800) {
+				ret = snprintf(buf, buf_len, " ALIGNMENT");
+				buf += ret;
+				blen -= ret;
+			}
+			if (payload & 0x20000) {
+				ret = snprintf(buf, buf_len, " SVE-PRED-PARTIAL");
+				buf += ret;
+				blen -= ret;
+			}
+			if (payload & 0x40000) {
+				ret = snprintf(buf, buf_len, " SVE-PRED-EMPTY");
+				buf += ret;
+				blen -= ret;
+			}
+		}
+
 		if (ret < 0)
 			return ret;
 		blen -= ret;
@@ -354,8 +372,38 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 	}
 	case ARM_SPE_OP_TYPE:
 		switch (idx) {
-		case 0:	return snprintf(buf, buf_len, "%s", payload & 0x1 ?
-					"COND-SELECT" : "INSN-OTHER");
+		case 0:	{
+			if (payload & 0x8) {
+				size_t blen = buf_len;
+
+				ret = snprintf(buf, buf_len, "SVE-OTHER");
+				buf += ret;
+				blen -= ret;
+				if (payload & 0x2) {
+					ret = snprintf(buf, buf_len, " FP");
+					buf += ret;
+					blen -= ret;
+				}
+				if (payload & 0x4) {
+					ret = snprintf(buf, buf_len, " PRED");
+					buf += ret;
+					blen -= ret;
+				}
+				if (payload & 0x70) {
+					ret = snprintf(buf, buf_len, " EVL %d",
+						32 << ((payload & 0x70) >> 4));
+					buf += ret;
+					blen -= ret;
+				}
+				if (ret < 0)
+					return ret;
+				blen -= ret;
+				return buf_len - blen;
+			} else {
+				return snprintf(buf, buf_len, "%s", payload & 0x1 ?
+						"COND-SELECT" : "INSN-OTHER");
+			}
+		}
 		case 1:	{
 			size_t blen = buf_len;
 
@@ -385,6 +433,23 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 				ret = snprintf(buf, buf_len, " SIMD-FP");
 				buf += ret;
 				blen -= ret;
+			} else if (payload & 0x8) {
+				if (payload & 0x4) {
+					ret = snprintf(buf, buf_len, " PRED");
+					buf += ret;
+					blen -= ret;
+				}
+				if (payload & 0x70) {
+					ret = snprintf(buf, buf_len, " EVL %d",
+						32 << ((payload & 0x70) >> 4));
+					buf += ret;
+					blen -= ret;
+				}
+				if (payload & 0x80) {
+					ret = snprintf(buf, buf_len, " SG");
+					buf += ret;
+					blen -= ret;
+				}
 			}
 			if (ret < 0)
 				return ret;
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 3/4] perf auxtrace: Add new itrace options for ARMv8.3-SPE
  2020-07-24  9:16 [PATCH 0/4] Add support for ARMv8.3-SPE Wei Li
  2020-07-24  9:16 ` [PATCH 1/4] drivers/perf: " Wei Li
  2020-07-24  9:16 ` [PATCH 2/4] perf: arm-spe: " Wei Li
@ 2020-07-24  9:16 ` Wei Li
  2020-07-29  6:51   ` Leo Yan
  2020-07-24  9:16 ` [PATCH 4/4] perf: arm-spe: Synthesize new events " Wei Li
  2020-07-28 12:06 ` [PATCH 0/4] Add support " Arnaldo Carvalho de Melo
  4 siblings, 1 reply; 19+ messages in thread
From: Wei Li @ 2020-07-24  9:16 UTC (permalink / raw)
  To: Adrian Hunter, Alexander Shishkin, Arnaldo Carvalho de Melo,
	Catalin Marinas, James Clark, Jiri Olsa, Leo Yan, Mark Rutland,
	Namhyung Kim, Suzuki K Poulose, Will Deacon, zhangshaokun
  Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, linux-arm-kernel, guohanjun

This patch is to add two options to synthesize events which are
described as below:

 'u': synthesize unaligned address access events
 'v': synthesize partial/empty predicated SVE events

This two options will be used by ARM SPE as their first consumer.

Signed-off-by: Wei Li <liwei391@huawei.com>
---
 tools/perf/Documentation/itrace.txt | 2 ++
 tools/perf/util/auxtrace.c          | 8 ++++++++
 tools/perf/util/auxtrace.h          | 4 ++++
 3 files changed, 14 insertions(+)

diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index e817179c5027..25bcf3622709 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -13,6 +13,8 @@
 		m	synthesize last level cache events
 		t	synthesize TLB events
 		a	synthesize remote access events
+		u	synthesize unaligned address access events
+		v	synthesize partial/empty predicated SVE events
 		g	synthesize a call chain (use with i or x)
 		G	synthesize a call chain on existing event records
 		l	synthesize last branch entries (use with i or x)
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 25c639ac4ad4..2033eb3708ec 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1334,6 +1334,8 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
 	synth_opts->llc = true;
 	synth_opts->tlb = true;
 	synth_opts->remote_access = true;
+	synth_opts->alignment = true;
+	synth_opts->sve = true;
 
 	if (no_sample) {
 		synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
@@ -1507,6 +1509,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 		case 'a':
 			synth_opts->remote_access = true;
 			break;
+		case 'u':
+			synth_opts->alignment = true;
+			break;
+		case 'v':
+			synth_opts->sve = true;
+			break;
 		case ' ':
 		case ',':
 			break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 142ccf7d34df..972df7b06b0d 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -116,6 +116,8 @@ struct itrace_synth_opts {
 	bool			llc;
 	bool			tlb;
 	bool			remote_access;
+	bool			alignment;
+	bool			sve;
 	unsigned int		callchain_sz;
 	unsigned int		last_branch_sz;
 	unsigned long long	period;
@@ -617,6 +619,8 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
 "				m:	    		synthesize last level cache events\n" \
 "				t:	    		synthesize TLB events\n" \
 "				a:	    		synthesize remote access events\n" \
+"				u:	    		synthesize unaligned address access events\n" \
+"				v:	    		synthesize partial/empty predicated SVE events\n" \
 "				g[len]:     		synthesize a call chain (use with i or x)\n" \
 "				l[len]:     		synthesize last branch entries (use with i or x)\n" \
 "				sNUMBER:    		skip initial number of events\n"		\
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 4/4] perf: arm-spe: Synthesize new events for ARMv8.3-SPE
  2020-07-24  9:16 [PATCH 0/4] Add support for ARMv8.3-SPE Wei Li
                   ` (2 preceding siblings ...)
  2020-07-24  9:16 ` [PATCH 3/4] perf auxtrace: Add new itrace options " Wei Li
@ 2020-07-24  9:16 ` Wei Li
  2020-07-28 12:06 ` [PATCH 0/4] Add support " Arnaldo Carvalho de Melo
  4 siblings, 0 replies; 19+ messages in thread
From: Wei Li @ 2020-07-24  9:16 UTC (permalink / raw)
  To: Adrian Hunter, Alexander Shishkin, Arnaldo Carvalho de Melo,
	Catalin Marinas, James Clark, Jiri Olsa, Leo Yan, Mark Rutland,
	Namhyung Kim, Suzuki K Poulose, Will Deacon, zhangshaokun
  Cc: Peter Zijlstra, Ingo Molnar, linux-kernel, linux-arm-kernel, guohanjun

Synthesize unaligned address access events and partial/empty
predicated SVE operation introduced by ARMv8.3-SPE.

They can be filtered by itrace options when reporting.

Signed-off-by: Wei Li <liwei391@huawei.com>
---
 .../util/arm-spe-decoder/arm-spe-decoder.c    | 11 ++++
 .../util/arm-spe-decoder/arm-spe-decoder.h    |  3 +
 tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
 3 files changed, 75 insertions(+)

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 93e063f22be5..fac8102c0149 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -197,6 +197,17 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
 			if (payload & BIT(EV_MISPRED))
 				decoder->record.type |= ARM_SPE_BRANCH_MISS;
 
+			if ((idx == 4 || idx == 8) &&
+			    (payload & BIT(EV_ALIGNMENT)))
+				decoder->record.type |= ARM_SPE_ALIGNMENT;
+
+			if ((idx == 4 || idx == 8) &&
+			    (payload & BIT(EV_PARTIAL_PREDICATE)))
+				decoder->record.type |= ARM_SPE_PARTIAL_PREDICATE;
+
+			if ((idx == 4 || idx == 8) &&
+			    (payload & BIT(EV_EMPTY_PREDICATE)))
+				decoder->record.type |= ARM_SPE_EMPTY_PREDICATE;
 			break;
 		case ARM_SPE_DATA_SOURCE:
 			break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index a5111a8d4360..d165418fcc13 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -39,6 +39,9 @@ enum arm_spe_sample_type {
 	ARM_SPE_TLB_MISS	= 1 << 5,
 	ARM_SPE_BRANCH_MISS	= 1 << 6,
 	ARM_SPE_REMOTE_ACCESS	= 1 << 7,
+	ARM_SPE_ALIGNMENT	= 1 << 8,
+	ARM_SPE_PARTIAL_PREDICATE	= 1 << 9,
+	ARM_SPE_EMPTY_PREDICATE	= 1 << 10,
 };
 
 struct arm_spe_record {
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 3882a5360ada..e36d6eea269b 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -53,6 +53,8 @@ struct arm_spe {
 	u8				sample_tlb;
 	u8				sample_branch;
 	u8				sample_remote_access;
+	u8				sample_alignment;
+	u8				sample_sve;
 
 	u64				l1d_miss_id;
 	u64				l1d_access_id;
@@ -62,6 +64,9 @@ struct arm_spe {
 	u64				tlb_access_id;
 	u64				branch_miss_id;
 	u64				remote_access_id;
+	u64				alignment_id;
+	u64				epred_sve_id;
+	u64				ppred_sve_id;
 
 	u64				kernel_start;
 
@@ -344,6 +349,30 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
 			return err;
 	}
 
+	if (spe->sample_alignment &&
+	    (record->type & ARM_SPE_ALIGNMENT)) {
+		err = arm_spe_synth_spe_events_sample(speq,
+						      spe->alignment_id);
+		if (err)
+			return err;
+	}
+
+	if (spe->sample_sve) {
+		if (record->type & ARM_SPE_EMPTY_PREDICATE) {
+			err = arm_spe_synth_spe_events_sample(
+					speq, spe->epred_sve_id);
+			if (err)
+				return err;
+		}
+
+		if (record->type & ARM_SPE_PARTIAL_PREDICATE) {
+			err = arm_spe_synth_spe_events_sample(
+					speq, spe->ppred_sve_id);
+			if (err)
+				return err;
+		}
+	}
+
 	return 0;
 }
 
@@ -907,6 +936,38 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
 		id += 1;
 	}
 
+	if (spe->synth_opts.alignment) {
+		spe->sample_alignment = true;
+
+		/* Alignment */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->alignment_id = id;
+		arm_spe_set_event_name(evlist, id, "alignment");
+		id += 1;
+	}
+
+	if (spe->synth_opts.sve) {
+		spe->sample_sve = true;
+
+		/* Empty predicated SVE */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->epred_sve_id = id;
+		arm_spe_set_event_name(evlist, id, "sve-pred-empty");
+		id += 1;
+
+		/* Partial predicated SVE */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->ppred_sve_id = id;
+		arm_spe_set_event_name(evlist, id, "sve-pred-partial");
+		id += 1;
+	}
+
 	return 0;
 }
 
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/4] Add support for ARMv8.3-SPE
  2020-07-24  9:16 [PATCH 0/4] Add support for ARMv8.3-SPE Wei Li
                   ` (3 preceding siblings ...)
  2020-07-24  9:16 ` [PATCH 4/4] perf: arm-spe: Synthesize new events " Wei Li
@ 2020-07-28 12:06 ` Arnaldo Carvalho de Melo
  2020-07-28 12:41   ` Leo Yan
  4 siblings, 1 reply; 19+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-07-28 12:06 UTC (permalink / raw)
  To: Wei Li
  Cc: Mark Rutland, Will Deacon, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, guohanjun, Adrian Hunter, linux-kernel,
	zhangshaokun, Peter Zijlstra, Ingo Molnar, James Clark, Leo Yan,
	Namhyung Kim, Jiri Olsa, linux-arm-kernel

Em Fri, Jul 24, 2020 at 05:16:03PM +0800, Wei Li escreveu:
> The ARMv8.3-SPE adds an Alignment Flag in the Events packet and filtering
> on this event using PMSEVFR_EL1, together with support for the profiling
> of Scalable Vector Extension operations.

Leo, Matthieu, have you had the time to review this? I can take the
tools/ part, the kernel bits should go via the ARM tree or via PeterZ,

- Arnaldo
 
> Patch 1: Update the kernel driver, mainly for PMSEVFR_EL1.
> 
> Patch 2: Update the decode process of Events packet and Operation Type
>   packet in perf tool.
> 
> Patch 3-4: Synthesize unaligned address access events and partial/empty
>   predicated SVE events, also add two itrace options for filtering.
> 
> Wei Li (4):
>   drivers/perf: Add support for ARMv8.3-SPE
>   perf: arm-spe: Add support for ARMv8.3-SPE
>   perf auxtrace: Add new itrace options for ARMv8.3-SPE
>   perf: arm-spe: Synthesize new events for ARMv8.3-SPE
> 
>  arch/arm64/include/asm/sysreg.h               |  4 +-
>  drivers/perf/arm_spe_pmu.c                    | 18 +++--
>  tools/perf/Documentation/itrace.txt           |  2 +
>  .../util/arm-spe-decoder/arm-spe-decoder.c    | 11 +++
>  .../util/arm-spe-decoder/arm-spe-decoder.h    |  3 +
>  .../arm-spe-decoder/arm-spe-pkt-decoder.c     | 69 ++++++++++++++++++-
>  tools/perf/util/arm-spe.c                     | 61 ++++++++++++++++
>  tools/perf/util/auxtrace.c                    |  8 +++
>  tools/perf/util/auxtrace.h                    |  4 ++
>  9 files changed, 173 insertions(+), 7 deletions(-)
> 
> -- 
> 2.17.1
> 

-- 

- Arnaldo

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] drivers/perf: Add support for ARMv8.3-SPE
  2020-07-24  9:16 ` [PATCH 1/4] drivers/perf: " Wei Li
@ 2020-07-28 12:27   ` Leo Yan
  2020-07-28 13:24     ` liwei (GF)
  2020-07-29  9:12   ` Suzuki K Poulose
  1 sibling, 1 reply; 19+ messages in thread
From: Leo Yan @ 2020-07-28 12:27 UTC (permalink / raw)
  To: Wei Li
  Cc: Mark Rutland, Will Deacon, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, Adrian Hunter, Arnaldo Carvalho de Melo,
	linux-kernel, zhangshaokun, Peter Zijlstra, Ingo Molnar,
	James Clark, guohanjun, Namhyung Kim, Jiri Olsa,
	linux-arm-kernel

Hi Wei,

On Fri, Jul 24, 2020 at 05:16:04PM +0800, Wei Li wrote:
> Armv8.3 extends the SPE by adding:
> - Alignment field in the Events packet, and filtering on this event
>   using PMSEVFR_EL1.
> - Support for the Scalable Vector Extension (SVE).
> 
> The main additions for SVE are:
> - Recording the vector length for SVE operations in the Operation Type
>   packet. It is not possible to filter on vector length.
> - Incomplete predicate and empty predicate fields in the Events packet,
>   and filtering on these events using PMSEVFR_EL1.
> 
> Update the check of pmsevfr for empty/partial predicated SVE and
> alignment event in kernel driver.
> 
> Signed-off-by: Wei Li <liwei391@huawei.com>
> ---
>  arch/arm64/include/asm/sysreg.h |  4 +++-
>  drivers/perf/arm_spe_pmu.c      | 18 ++++++++++++++----
>  2 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 463175f80341..be4c44ccdb56 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -281,7 +281,6 @@
>  #define SYS_PMSFCR_EL1_ST_SHIFT		18
>  
>  #define SYS_PMSEVFR_EL1			sys_reg(3, 0, 9, 9, 5)
> -#define SYS_PMSEVFR_EL1_RES0		0x0000ffff00ff0f55UL
>  
>  #define SYS_PMSLATFR_EL1		sys_reg(3, 0, 9, 9, 6)
>  #define SYS_PMSLATFR_EL1_MINLAT_SHIFT	0
> @@ -769,6 +768,9 @@
>  #define ID_AA64DFR0_PMUVER_8_5		0x6
>  #define ID_AA64DFR0_PMUVER_IMP_DEF	0xf
>  
> +#define ID_AA64DFR0_PMSVER_8_2		0x1
> +#define ID_AA64DFR0_PMSVER_8_3		0x2
> +
>  #define ID_DFR0_PERFMON_SHIFT		24
>  
>  #define ID_DFR0_PERFMON_8_1		0x4
> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
> index e51ddb6d63ed..5ec7ee0c8fa1 100644
> --- a/drivers/perf/arm_spe_pmu.c
> +++ b/drivers/perf/arm_spe_pmu.c
> @@ -54,7 +54,7 @@ struct arm_spe_pmu {
>  	struct hlist_node			hotplug_node;
>  
>  	int					irq; /* PPI */
> -
> +	int					pmuver;

Since the version number is only 4 bits width, 'u16' would be enough
to record SPE version number.

>  	u16					min_period;
>  	u16					counter_sz;
>  
> @@ -80,6 +80,15 @@ struct arm_spe_pmu {
>  /* Keep track of our dynamic hotplug state */
>  static enum cpuhp_state arm_spe_pmu_online;
>  
> +static u64 sys_pmsevfr_el1_mask[] = {
> +	[ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> +		GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
> +		BIT_ULL(1),
> +	[ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> +		GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
> +		BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
> +};

Seems to me, the definitions for Aarch64 system registers should be
placed into the file 'arch/arm64/include/asm/sysreg.h'.  Like below
two macros:

  #define SYS_PMSEVFR_EL1_RES0_8_2		0x0000ffff00ff0f55UL
  #define SYS_PMSEVFR_EL1_RES0_8_3		...

Let's wait for Will or Mark Rutland's comments for this, in case I
mislead for this.

> +
>  enum arm_spe_pmu_buf_fault_action {
>  	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
>  	SPE_PMU_BUF_FAULT_ACT_FATAL,
> @@ -670,7 +679,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>  	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
>  		return -ENOENT;
>  
> -	if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
> +	if (arm_spe_event_to_pmsevfr(event) & ~sys_pmsevfr_el1_mask[spe_pmu->pmuver])
>  		return -EOPNOTSUPP;
>  
>  	if (attr->exclude_idle)
> @@ -937,6 +946,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
>  			fld, smp_processor_id());
>  		return;
>  	}
> +	spe_pmu->pmuver = fld;
>  
>  	/* Read PMBIDR first to determine whether or not we have access */
>  	reg = read_sysreg_s(SYS_PMBIDR_EL1);
> @@ -1027,8 +1037,8 @@ static void __arm_spe_pmu_dev_probe(void *info)
>  	}
>  
>  	dev_info(dev,
> -		 "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
> -		 cpumask_pr_args(&spe_pmu->supported_cpus),
> +		 "v%d probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",

Let's output explict info, like:

  "probed for CPUs %*pbl [pmuver %d, max_record_sz %u, align %u, features 0x%llx]\n",

Thanks,
Leo

> +		 spe_pmu->pmuver, cpumask_pr_args(&spe_pmu->supported_cpus),
>  		 spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features);
>  
>  	spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED;
> -- 
> 2.17.1
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 0/4] Add support for ARMv8.3-SPE
  2020-07-28 12:06 ` [PATCH 0/4] Add support " Arnaldo Carvalho de Melo
@ 2020-07-28 12:41   ` Leo Yan
  0 siblings, 0 replies; 19+ messages in thread
From: Leo Yan @ 2020-07-28 12:41 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Mark Rutland, Will Deacon, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, Adrian Hunter, linux-kernel, zhangshaokun,
	Peter Zijlstra, Ingo Molnar, James Clark, guohanjun,
	Namhyung Kim, Jiri Olsa, linux-arm-kernel, Wei Li

Hi Arnaldo,

On Tue, Jul 28, 2020 at 09:06:23AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Fri, Jul 24, 2020 at 05:16:03PM +0800, Wei Li escreveu:
> > The ARMv8.3-SPE adds an Alignment Flag in the Events packet and filtering
> > on this event using PMSEVFR_EL1, together with support for the profiling
> > of Scalable Vector Extension operations.
> 
> Leo, Matthieu, have you had the time to review this? I can take the
> tools/ part, the kernel bits should go via the ARM tree or via PeterZ,

Yes, I will study and try to give some comments for this patch set in
today / tomorrow.

Thanks for reminding,
Leo

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] drivers/perf: Add support for ARMv8.3-SPE
  2020-07-28 12:27   ` Leo Yan
@ 2020-07-28 13:24     ` liwei (GF)
  2020-07-29  7:08       ` Leo Yan
  0 siblings, 1 reply; 19+ messages in thread
From: liwei (GF) @ 2020-07-28 13:24 UTC (permalink / raw)
  To: Leo Yan, Will Deacon
  Cc: Mark Rutland, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, Adrian Hunter, Arnaldo Carvalho de Melo,
	linux-kernel, zhangshaokun, Peter Zijlstra, Ingo Molnar,
	James Clark, guohanjun, Namhyung Kim, Jiri Olsa,
	linux-arm-kernel

Hi Leo,

On 2020/7/28 20:27, Leo Yan wrote:
> Hi Wei,
> 
> On Fri, Jul 24, 2020 at 05:16:04PM +0800, Wei Li wrote:
>> Armv8.3 extends the SPE by adding:
>> - Alignment field in the Events packet, and filtering on this event
>>   using PMSEVFR_EL1.
>> - Support for the Scalable Vector Extension (SVE).
>>
>> The main additions for SVE are:
>> - Recording the vector length for SVE operations in the Operation Type
>>   packet. It is not possible to filter on vector length.
>> - Incomplete predicate and empty predicate fields in the Events packet,
>>   and filtering on these events using PMSEVFR_EL1.
>>
>> Update the check of pmsevfr for empty/partial predicated SVE and
>> alignment event in kernel driver.
>>
>> Signed-off-by: Wei Li <liwei391@huawei.com>
>> ---
>>  arch/arm64/include/asm/sysreg.h |  4 +++-
>>  drivers/perf/arm_spe_pmu.c      | 18 ++++++++++++++----
>>  2 files changed, 17 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> index 463175f80341..be4c44ccdb56 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -281,7 +281,6 @@
>>  #define SYS_PMSFCR_EL1_ST_SHIFT		18
>>  
>>  #define SYS_PMSEVFR_EL1			sys_reg(3, 0, 9, 9, 5)
>> -#define SYS_PMSEVFR_EL1_RES0		0x0000ffff00ff0f55UL
>>  
>>  #define SYS_PMSLATFR_EL1		sys_reg(3, 0, 9, 9, 6)
>>  #define SYS_PMSLATFR_EL1_MINLAT_SHIFT	0
>> @@ -769,6 +768,9 @@
>>  #define ID_AA64DFR0_PMUVER_8_5		0x6
>>  #define ID_AA64DFR0_PMUVER_IMP_DEF	0xf
>>  
>> +#define ID_AA64DFR0_PMSVER_8_2		0x1
>> +#define ID_AA64DFR0_PMSVER_8_3		0x2
>> +
>>  #define ID_DFR0_PERFMON_SHIFT		24
>>  
>>  #define ID_DFR0_PERFMON_8_1		0x4
>> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
>> index e51ddb6d63ed..5ec7ee0c8fa1 100644
>> --- a/drivers/perf/arm_spe_pmu.c
>> +++ b/drivers/perf/arm_spe_pmu.c
>> @@ -54,7 +54,7 @@ struct arm_spe_pmu {
>>  	struct hlist_node			hotplug_node;
>>  
>>  	int					irq; /* PPI */
>> -
>> +	int					pmuver;
> 
> Since the version number is only 4 bits width, 'u16' would be enough
> to record SPE version number.

Sounds reasonable, i can change it to 'u16' if you insist.

>>  	u16					min_period;
>>  	u16					counter_sz;
>>  
>> @@ -80,6 +80,15 @@ struct arm_spe_pmu {
>>  /* Keep track of our dynamic hotplug state */
>>  static enum cpuhp_state arm_spe_pmu_online;
>>  
>> +static u64 sys_pmsevfr_el1_mask[] = {
>> +	[ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
>> +		GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
>> +		BIT_ULL(1),
>> +	[ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
>> +		GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
>> +		BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
>> +};
> 
> Seems to me, the definitions for Aarch64 system registers should be
> placed into the file 'arch/arm64/include/asm/sysreg.h'.  Like below
> two macros:
> 
>   #define SYS_PMSEVFR_EL1_RES0_8_2		0x0000ffff00ff0f55UL
>   #define SYS_PMSEVFR_EL1_RES0_8_3		...

I really think using GENMASK_ULL() to generate the mask is better than a definition
with magic number. It is beneficial to be reviewed and extended later.

> Let's wait for Will or Mark Rutland's comments for this, in case I
> mislead for this.
>> +
>>  enum arm_spe_pmu_buf_fault_action {
>>  	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
>>  	SPE_PMU_BUF_FAULT_ACT_FATAL,
>> @@ -670,7 +679,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>>  	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
>>  		return -ENOENT;
>>  
>> -	if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
>> +	if (arm_spe_event_to_pmsevfr(event) & ~sys_pmsevfr_el1_mask[spe_pmu->pmuver])
>>  		return -EOPNOTSUPP;
>>  
>>  	if (attr->exclude_idle)
>> @@ -937,6 +946,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
>>  			fld, smp_processor_id());
>>  		return;
>>  	}
>> +	spe_pmu->pmuver = fld;
>>  
>>  	/* Read PMBIDR first to determine whether or not we have access */
>>  	reg = read_sysreg_s(SYS_PMBIDR_EL1);
>> @@ -1027,8 +1037,8 @@ static void __arm_spe_pmu_dev_probe(void *info)
>>  	}
>>  
>>  	dev_info(dev,
>> -		 "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
>> -		 cpumask_pr_args(&spe_pmu->supported_cpus),
>> +		 "v%d probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
> 
> Let's output explict info, like:
> 
>   "probed for CPUs %*pbl [pmuver %d, max_record_sz %u, align %u, features 0x%llx]\n",
> 

Agree, and i have a little question here:
Currently, the of_compatible of SPE PMU is "arm,statistical-profiling-extension-v1", and
the platform_device name is "arm,spe-v1". So this message looks weird when supporting
ARMv8.3-SPE because the pmuver is 2.

As the version of SPE can be probed by reading 'ID_AA64DFR0_EL1.PMSVer', can we remove
the version hint in of_compatible and platform_device name?

> 
>> +		 spe_pmu->pmuver, cpumask_pr_args(&spe_pmu->supported_cpus),
>>  		 spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features);
>>  
>>  	spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED;
>> -- 
>> 2.17.1
>>

Thanks,
Wei

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/4] perf: arm-spe: Add support for ARMv8.3-SPE
  2020-07-24  9:16 ` [PATCH 2/4] perf: arm-spe: " Wei Li
@ 2020-07-29  6:29   ` Leo Yan
  2020-07-29  7:21     ` liwei (GF)
  0 siblings, 1 reply; 19+ messages in thread
From: Leo Yan @ 2020-07-29  6:29 UTC (permalink / raw)
  To: Wei Li
  Cc: Mark Rutland, Will Deacon, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, Adrian Hunter, Arnaldo Carvalho de Melo,
	linux-kernel, zhangshaokun, Peter Zijlstra, Ingo Molnar,
	James Clark, guohanjun, Namhyung Kim, Jiri Olsa,
	linux-arm-kernel

On Fri, Jul 24, 2020 at 05:16:05PM +0800, Wei Li wrote:
> Armv8.3 extends the SPE by adding:
> - Alignment field in the Events packet, and filtering on this event
>   using PMSEVFR_EL1.
> - Support for the Scalable Vector Extension (SVE).
> 
> The main additions for SVE are:
> - Recording the vector length for SVE operations in the Operation Type
>   packet. It is not possible to filter on vector length.
> - Incomplete predicate and empty predicate fields in the Events packet,
>   and filtering on these events using PMSEVFR_EL1.

This comment description is not relevant with the changes in this
patch, so could remove them.

> Add the corresponding decode process of Events packet and Operation Type
> packet in perf tool.

This patch is to add the raw dumping for Events packet and Operation Type
packet.

> Signed-off-by: Wei Li <liwei391@huawei.com>
> ---
>  .../arm-spe-decoder/arm-spe-pkt-decoder.c     | 69 ++++++++++++++++++-
>  1 file changed, 67 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
> index b94001b756c7..10a3692839de 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
> @@ -347,6 +347,24 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
>  				blen -= ret;
>  			}
>  		}
> +		if (idx > 2) {
> +			if (payload & 0x800) {
> +				ret = snprintf(buf, buf_len, " ALIGNMENT");
> +				buf += ret;
> +				blen -= ret;
> +			}
> +			if (payload & 0x20000) {
> +				ret = snprintf(buf, buf_len, " SVE-PRED-PARTIAL");
> +				buf += ret;
> +				blen -= ret;
> +			}
> +			if (payload & 0x40000) {
> +				ret = snprintf(buf, buf_len, " SVE-PRED-EMPTY");
> +				buf += ret;
> +				blen -= ret;
> +			}
> +		}
> +

Correct.

>  		if (ret < 0)
>  			return ret;
>  		blen -= ret;
> @@ -354,8 +372,38 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
>  	}
>  	case ARM_SPE_OP_TYPE:
>  		switch (idx) {
> -		case 0:	return snprintf(buf, buf_len, "%s", payload & 0x1 ?
> -					"COND-SELECT" : "INSN-OTHER");
> +		case 0:	{
> +			if (payload & 0x8) {

Some nitpicks for packet format checking ...

For SVE operation, the payload partten is: 0b0xxx1xx0.

So it's good to check the partten like:

  /* SVE operation subclass is: 0b0xxx1xx0 */
  if ((payload & 0x8081) == 0x80) {
     ....
  }

If later the packet format is extended, this will not introduce any
confliction.

> +				size_t blen = buf_len;
> +
> +				ret = snprintf(buf, buf_len, "SVE-OTHER");
> +				buf += ret;
> +				blen -= ret;
> +				if (payload & 0x2) {

Here should express as binary results: " FP" or " INT".

> +					ret = snprintf(buf, buf_len, " FP");
> +					buf += ret;
> +					blen -= ret;
> +				}
> +				if (payload & 0x4) {
> +					ret = snprintf(buf, buf_len, " PRED");

Here should express as binary results: " PRED" or " NOT-PRED".

> +					buf += ret;
> +					blen -= ret;
> +				}
> +				if (payload & 0x70) {

This is incorrect.  If bits[6:4] is zero, it presents vector length is 32 bits.

> +					ret = snprintf(buf, buf_len, " EVL %d",
> +						32 << ((payload & 0x70) >> 4));
> +					buf += ret;
> +					blen -= ret;
> +				}
> +				if (ret < 0)
> +					return ret;
> +				blen -= ret;
> +				return buf_len - blen;
> +			} else {

Here we can check with more accurate format as defined in ARMv8 ARM:

  /* Other operation subclass is: 0b0000000x */
  if ((payload & 0xfe) == 0x0) {
     ....
  }

> +				return snprintf(buf, buf_len, "%s", payload & 0x1 ?
> +						"COND-SELECT" : "INSN-OTHER");
> +			}
> +		}
>  		case 1:	{
>  			size_t blen = buf_len;
>  
> @@ -385,6 +433,23 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
>  				ret = snprintf(buf, buf_len, " SIMD-FP");
>  				buf += ret;
>  				blen -= ret;
> +			} else if (payload & 0x8) {
> +				if (payload & 0x4) {
> +					ret = snprintf(buf, buf_len, " PRED");

Here should express as binary results: " PRED" or " NOT-PRED".

> +					buf += ret;
> +					blen -= ret;
> +				}
> +				if (payload & 0x70) {

This is incorrect.  If bits[6:4] is zero, it presents vector length is 32 bits.

> +					ret = snprintf(buf, buf_len, " EVL %d",
> +						32 << ((payload & 0x70) >> 4));
> +					buf += ret;
> +					blen -= ret;
> +				}
> +				if (payload & 0x80) {
> +					ret = snprintf(buf, buf_len, " SG");

Here should express as binary results: " SG" or " NOT-SG".

Thanks,
Leo

> +					buf += ret;
> +					blen -= ret;
> +				}



>  			}
>  			if (ret < 0)
>  				return ret;
> -- 
> 2.17.1
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] perf auxtrace: Add new itrace options for ARMv8.3-SPE
  2020-07-24  9:16 ` [PATCH 3/4] perf auxtrace: Add new itrace options " Wei Li
@ 2020-07-29  6:51   ` Leo Yan
  0 siblings, 0 replies; 19+ messages in thread
From: Leo Yan @ 2020-07-29  6:51 UTC (permalink / raw)
  To: Wei Li
  Cc: Mark Rutland, Will Deacon, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, Adrian Hunter, Arnaldo Carvalho de Melo,
	linux-kernel, zhangshaokun, Peter Zijlstra, Ingo Molnar,
	James Clark, guohanjun, Namhyung Kim, Jiri Olsa,
	linux-arm-kernel

On Fri, Jul 24, 2020 at 05:16:06PM +0800, Wei Li wrote:
> This patch is to add two options to synthesize events which are
> described as below:
> 
>  'u': synthesize unaligned address access events
>  'v': synthesize partial/empty predicated SVE events
> 
> This two options will be used by ARM SPE as their first consumer.
> 
> Signed-off-by: Wei Li <liwei391@huawei.com>
> ---
>  tools/perf/Documentation/itrace.txt | 2 ++
>  tools/perf/util/auxtrace.c          | 8 ++++++++
>  tools/perf/util/auxtrace.h          | 4 ++++
>  3 files changed, 14 insertions(+)
> 
> diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
> index e817179c5027..25bcf3622709 100644
> --- a/tools/perf/Documentation/itrace.txt
> +++ b/tools/perf/Documentation/itrace.txt
> @@ -13,6 +13,8 @@
>  		m	synthesize last level cache events
>  		t	synthesize TLB events
>  		a	synthesize remote access events
> +		u	synthesize unaligned address access events
> +		v	synthesize partial/empty predicated SVE events
>  		g	synthesize a call chain (use with i or x)
>  		G	synthesize a call chain on existing event records
>  		l	synthesize last branch entries (use with i or x)
> diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
> index 25c639ac4ad4..2033eb3708ec 100644
> --- a/tools/perf/util/auxtrace.c
> +++ b/tools/perf/util/auxtrace.c
> @@ -1334,6 +1334,8 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
>  	synth_opts->llc = true;
>  	synth_opts->tlb = true;
>  	synth_opts->remote_access = true;
> +	synth_opts->alignment = true;
> +	synth_opts->sve = true;
>  
>  	if (no_sample) {
>  		synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
> @@ -1507,6 +1509,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
>  		case 'a':
>  			synth_opts->remote_access = true;
>  			break;
> +		case 'u':
> +			synth_opts->alignment = true;
> +			break;
> +		case 'v':
> +			synth_opts->sve = true;
> +			break;
>  		case ' ':
>  		case ',':
>  			break;
> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
> index 142ccf7d34df..972df7b06b0d 100644
> --- a/tools/perf/util/auxtrace.h
> +++ b/tools/perf/util/auxtrace.h
> @@ -116,6 +116,8 @@ struct itrace_synth_opts {
>  	bool			llc;
>  	bool			tlb;
>  	bool			remote_access;
> +	bool			alignment;

Patch 03 and 04 are directive, it's good for me.

The naming 'unalignment' would be more clear and avoid confusion.

> +	bool			sve;
>  	unsigned int		callchain_sz;
>  	unsigned int		last_branch_sz;
>  	unsigned long long	period;
> @@ -617,6 +619,8 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
>  "				m:	    		synthesize last level cache events\n" \
>  "				t:	    		synthesize TLB events\n" \
>  "				a:	    		synthesize remote access events\n" \
> +"				u:	    		synthesize unaligned address access events\n" \

The 'unalignment' events are for 'unaligned address access and size of data'.

Otherwise, looks good to me:
Reviewed-by: Leo Yan <leo.yan@linaro.org>

Thanks,
Leo

> +"				v:	    		synthesize partial/empty predicated SVE events\n" \
>  "				g[len]:     		synthesize a call chain (use with i or x)\n" \
>  "				l[len]:     		synthesize last branch entries (use with i or x)\n" \
>  "				sNUMBER:    		skip initial number of events\n"		\
> -- 
> 2.17.1
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] drivers/perf: Add support for ARMv8.3-SPE
  2020-07-28 13:24     ` liwei (GF)
@ 2020-07-29  7:08       ` Leo Yan
  0 siblings, 0 replies; 19+ messages in thread
From: Leo Yan @ 2020-07-29  7:08 UTC (permalink / raw)
  To: liwei (GF), Al Grant
  Cc: Mark Rutland, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, Jiri Olsa, Adrian Hunter,
	Arnaldo Carvalho de Melo, linux-kernel, zhangshaokun,
	Peter Zijlstra, Ingo Molnar, James Clark, guohanjun,
	Namhyung Kim, Will Deacon, linux-arm-kernel

On Tue, Jul 28, 2020 at 09:24:42PM +0800, liwei (GF) wrote:

[...]

> >> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
> >> index e51ddb6d63ed..5ec7ee0c8fa1 100644
> >> --- a/drivers/perf/arm_spe_pmu.c
> >> +++ b/drivers/perf/arm_spe_pmu.c
> >> @@ -54,7 +54,7 @@ struct arm_spe_pmu {
> >>  	struct hlist_node			hotplug_node;
> >>  
> >>  	int					irq; /* PPI */
> >> -
> >> +	int					pmuver;
> > 
> > Since the version number is only 4 bits width, 'u16' would be enough
> > to record SPE version number.
> 
> Sounds reasonable, i can change it to 'u16' if you insist.
> 
> >>  	u16					min_period;
> >>  	u16					counter_sz;
> >>  
> >> @@ -80,6 +80,15 @@ struct arm_spe_pmu {
> >>  /* Keep track of our dynamic hotplug state */
> >>  static enum cpuhp_state arm_spe_pmu_online;
> >>  
> >> +static u64 sys_pmsevfr_el1_mask[] = {
> >> +	[ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> >> +		GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
> >> +		BIT_ULL(1),
> >> +	[ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> >> +		GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
> >> +		BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
> >> +};
> > 
> > Seems to me, the definitions for Aarch64 system registers should be
> > placed into the file 'arch/arm64/include/asm/sysreg.h'.  Like below
> > two macros:
> > 
> >   #define SYS_PMSEVFR_EL1_RES0_8_2		0x0000ffff00ff0f55UL
> >   #define SYS_PMSEVFR_EL1_RES0_8_3		...
> 
> I really think using GENMASK_ULL() to generate the mask is better than a definition
> with magic number. It is beneficial to be reviewed and extended later.

Understand.  Here I just want to remind, you could see the ARMv8's
system registers definition usually are placed into the global header
sysreg.h rather than define them in separate source files.

You could define the bit mask with GENMASK_ULL() for the two macros
in sysreg.h.

> > Let's wait for Will or Mark Rutland's comments for this, in case I
> > mislead for this.
> >> +
> >>  enum arm_spe_pmu_buf_fault_action {
> >>  	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
> >>  	SPE_PMU_BUF_FAULT_ACT_FATAL,
> >> @@ -670,7 +679,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
> >>  	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
> >>  		return -ENOENT;
> >>  
> >> -	if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
> >> +	if (arm_spe_event_to_pmsevfr(event) & ~sys_pmsevfr_el1_mask[spe_pmu->pmuver])
> >>  		return -EOPNOTSUPP;
> >>  
> >>  	if (attr->exclude_idle)
> >> @@ -937,6 +946,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
> >>  			fld, smp_processor_id());
> >>  		return;
> >>  	}
> >> +	spe_pmu->pmuver = fld;
> >>  
> >>  	/* Read PMBIDR first to determine whether or not we have access */
> >>  	reg = read_sysreg_s(SYS_PMBIDR_EL1);
> >> @@ -1027,8 +1037,8 @@ static void __arm_spe_pmu_dev_probe(void *info)
> >>  	}
> >>  
> >>  	dev_info(dev,
> >> -		 "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
> >> -		 cpumask_pr_args(&spe_pmu->supported_cpus),
> >> +		 "v%d probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
> > 
> > Let's output explict info, like:
> > 
> >   "probed for CPUs %*pbl [pmuver %d, max_record_sz %u, align %u, features 0x%llx]\n",
> > 
> 
> Agree, and i have a little question here:
> Currently, the of_compatible of SPE PMU is "arm,statistical-profiling-extension-v1", and
> the platform_device name is "arm,spe-v1". So this message looks weird when supporting
> ARMv8.3-SPE because the pmuver is 2.

I think here we need to distinguish two things: SPE (as an IP) and
ARMv8.2/ARMv8.3 (as CPU architectures).  From my understanding, now we
are working on SPE-v1, but it needs to support ARMv8 variants, e.g.
ARMv8.2 and ARMv8.3 with SVE extension.

I am not the best person to clarify the version number for SPE, if Arm
colleagues disagree with this, very welcome to correct me.

Also loop in Al for this.

> As the version of SPE can be probed by reading 'ID_AA64DFR0_EL1.PMSVer', can we remove
> the version hint in of_compatible and platform_device name?

No, for device tree, usually we need to keep back compability for the
DT binding, so we cannot remove compatible string.

Thanks,
Leo

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/4] perf: arm-spe: Add support for ARMv8.3-SPE
  2020-07-29  6:29   ` Leo Yan
@ 2020-07-29  7:21     ` liwei (GF)
  2020-07-29  7:28       ` Leo Yan
  0 siblings, 1 reply; 19+ messages in thread
From: liwei (GF) @ 2020-07-29  7:21 UTC (permalink / raw)
  To: Leo Yan
  Cc: Mark Rutland, Will Deacon, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, Adrian Hunter, Arnaldo Carvalho de Melo,
	linux-kernel, zhangshaokun, Peter Zijlstra, Ingo Molnar,
	James Clark, guohanjun, Namhyung Kim, Jiri Olsa,
	linux-arm-kernel

Hi Leo,

On 2020/7/29 14:29, Leo Yan wrote:
> On Fri, Jul 24, 2020 at 05:16:05PM +0800, Wei Li wrote:
>> Armv8.3 extends the SPE by adding:
>> - Alignment field in the Events packet, and filtering on this event
>>   using PMSEVFR_EL1.
>> - Support for the Scalable Vector Extension (SVE).
>>
>> The main additions for SVE are:
>> - Recording the vector length for SVE operations in the Operation Type
>>   packet. It is not possible to filter on vector length.
>> - Incomplete predicate and empty predicate fields in the Events packet,
>>   and filtering on these events using PMSEVFR_EL1.
> 
> This comment description is not relevant with the changes in this
> patch, so could remove them.
> 
>> Add the corresponding decode process of Events packet and Operation Type
>> packet in perf tool.
> 
> This patch is to add the raw dumping for Events packet and Operation Type
> packet.
> 
>> Signed-off-by: Wei Li <liwei391@huawei.com>
>> ---
>>  .../arm-spe-decoder/arm-spe-pkt-decoder.c     | 69 ++++++++++++++++++-
>>  1 file changed, 67 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
>> index b94001b756c7..10a3692839de 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
>> @@ -347,6 +347,24 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
>>  				blen -= ret;
>>  			}
>>  		}
>> +		if (idx > 2) {
>> +			if (payload & 0x800) {
>> +				ret = snprintf(buf, buf_len, " ALIGNMENT");
>> +				buf += ret;
>> +				blen -= ret;
>> +			}
>> +			if (payload & 0x20000) {
>> +				ret = snprintf(buf, buf_len, " SVE-PRED-PARTIAL");
>> +				buf += ret;
>> +				blen -= ret;
>> +			}
>> +			if (payload & 0x40000) {
>> +				ret = snprintf(buf, buf_len, " SVE-PRED-EMPTY");
>> +				buf += ret;
>> +				blen -= ret;
>> +			}
>> +		}
>> +
> 
> Correct.
> 
>>  		if (ret < 0)
>>  			return ret;
>>  		blen -= ret;
>> @@ -354,8 +372,38 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
>>  	}
>>  	case ARM_SPE_OP_TYPE:
>>  		switch (idx) {
>> -		case 0:	return snprintf(buf, buf_len, "%s", payload & 0x1 ?
>> -					"COND-SELECT" : "INSN-OTHER");
>> +		case 0:	{
>> +			if (payload & 0x8) {
> 
> Some nitpicks for packet format checking ...
> 
> For SVE operation, the payload partten is: 0b0xxx1xx0.
> 
> So it's good to check the partten like:
> 
>   /* SVE operation subclass is: 0b0xxx1xx0 */
>   if ((payload & 0x8081) == 0x80) {
>      ....
>   }
> 
> If later the packet format is extended, this will not introduce any
> confliction.

Get it, but i think what you are really meaning is:
if ((payload & 0x89) == 0x80) {
	...
}

> 
>> +				size_t blen = buf_len;
>> +
>> +				ret = snprintf(buf, buf_len, "SVE-OTHER");
>> +				buf += ret;
>> +				blen -= ret;
>> +				if (payload & 0x2) {
> 
> Here should express as binary results: " FP" or " INT".

I think this is a style choice, i add these just like the current code where
processing "AT", "EXCL", "AR", "COND" and so on. So should we modify all the corresponding code together?

> 
>> +					ret = snprintf(buf, buf_len, " FP");
>> +					buf += ret;
>> +					blen -= ret;
>> +				}
>> +				if (payload & 0x4) {
>> +					ret = snprintf(buf, buf_len, " PRED");
> 
> Here should express as binary results: " PRED" or " NOT-PRED".

Ditto.

> 
>> +					buf += ret;
>> +					blen -= ret;
>> +				}
>> +				if (payload & 0x70) {
> 
> This is incorrect.  If bits[6:4] is zero, it presents vector length is 32 bits.
>

I am a little confused here.
Refer to the ARM DDI 0487F.b (ID040120), page D10-2830, if bits[6:4] is zero,
it presents vector length is 32 bits indeed.

>> +					ret = snprintf(buf, buf_len, " EVL %d",
>> +						32 << ((payload & 0x70) >> 4));
>> +					buf += ret;
>> +					blen -= ret;
>> +				}
>> +				if (ret < 0)
>> +					return ret;
>> +				blen -= ret;
>> +				return buf_len - blen;
>> +			} else {
> 
> Here we can check with more accurate format as defined in ARMv8 ARM:
> 
>   /* Other operation subclass is: 0b0000000x */
>   if ((payload & 0xfe) == 0x0) {
>      ....
>   }
> 
>> +				return snprintf(buf, buf_len, "%s", payload & 0x1 ?
>> +						"COND-SELECT" : "INSN-OTHER");
>> +			}
>> +		}
>>  		case 1:	{
>>  			size_t blen = buf_len;
>>  
>> @@ -385,6 +433,23 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
>>  				ret = snprintf(buf, buf_len, " SIMD-FP");
>>  				buf += ret;
>>  				blen -= ret;
>> +			} else if (payload & 0x8) {
>> +				if (payload & 0x4) {
>> +					ret = snprintf(buf, buf_len, " PRED");
> 
> Here should express as binary results: " PRED" or " NOT-PRED".

Ditto.

>> +					buf += ret;
>> +					blen -= ret;
>> +				}
>> +				if (payload & 0x70) {
> 
> This is incorrect.  If bits[6:4] is zero, it presents vector length is 32 bits.

Refer to the ARM DDI 0487F.b (ID040120), page D10-2832, if bits[6:4] is zero,
it presents vector length is 32 bits indeed.

>> +					ret = snprintf(buf, buf_len, " EVL %d",
>> +						32 << ((payload & 0x70) >> 4));
>> +					buf += ret;
>> +					blen -= ret;
>> +				}
>> +				if (payload & 0x80) {
>> +					ret = snprintf(buf, buf_len, " SG");
> 
> Here should express as binary results: " SG" or " NOT-SG".


Thanks,
Wei

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/4] perf: arm-spe: Add support for ARMv8.3-SPE
  2020-07-29  7:21     ` liwei (GF)
@ 2020-07-29  7:28       ` Leo Yan
  2020-07-29  7:42         ` liwei (GF)
  0 siblings, 1 reply; 19+ messages in thread
From: Leo Yan @ 2020-07-29  7:28 UTC (permalink / raw)
  To: liwei (GF)
  Cc: Mark Rutland, Will Deacon, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, Adrian Hunter, Arnaldo Carvalho de Melo,
	linux-kernel, zhangshaokun, Peter Zijlstra, Ingo Molnar,
	James Clark, guohanjun, Namhyung Kim, Jiri Olsa,
	linux-arm-kernel

On Wed, Jul 29, 2020 at 03:21:20PM +0800, liwei (GF) wrote:

[...]

> >> @@ -354,8 +372,38 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
> >>  	}
> >>  	case ARM_SPE_OP_TYPE:
> >>  		switch (idx) {
> >> -		case 0:	return snprintf(buf, buf_len, "%s", payload & 0x1 ?
> >> -					"COND-SELECT" : "INSN-OTHER");
> >> +		case 0:	{
> >> +			if (payload & 0x8) {
> > 
> > Some nitpicks for packet format checking ...
> > 
> > For SVE operation, the payload partten is: 0b0xxx1xx0.
> > 
> > So it's good to check the partten like:
> > 
> >   /* SVE operation subclass is: 0b0xxx1xx0 */
> >   if ((payload & 0x8081) == 0x80) {
> >      ....
> >   }
> > 
> > If later the packet format is extended, this will not introduce any
> > confliction.
> 
> Get it, but i think what you are really meaning is:
> if ((payload & 0x89) == 0x80) {
> 	...
> }

Yes.

> > 
> >> +				size_t blen = buf_len;
> >> +
> >> +				ret = snprintf(buf, buf_len, "SVE-OTHER");
> >> +				buf += ret;
> >> +				blen -= ret;
> >> +				if (payload & 0x2) {
> > 
> > Here should express as binary results: " FP" or " INT".
> 
> I think this is a style choice, i add these just like the current code where
> processing "AT", "EXCL", "AR", "COND" and so on. So should we modify all the corresponding code together?

Okay, understood.  Let's just follow the existed style and later can
enhance the output log with more readable format.

[...]

> > 
> >> +					ret = snprintf(buf, buf_len, " FP");
> >> +					buf += ret;
> >> +					blen -= ret;
> >> +				}
> >> +				if (payload & 0x4) {
> >> +					ret = snprintf(buf, buf_len, " PRED");
> > 
> > Here should express as binary results: " PRED" or " NOT-PRED".
> 
> Ditto.
> 
> > 
> >> +					buf += ret;
> >> +					blen -= ret;
> >> +				}
> >> +				if (payload & 0x70) {
> > 
> > This is incorrect.  If bits[6:4] is zero, it presents vector length is 32 bits.
> >
> 
> I am a little confused here.
> Refer to the ARM DDI 0487F.b (ID040120), page D10-2830, if bits[6:4] is zero,
> it presents vector length is 32 bits indeed.

Yes, if bits[6:4] is zero, your current code will not output any info.

Thanks,
Leo

> >> +					ret = snprintf(buf, buf_len, " EVL %d",
> >> +						32 << ((payload & 0x70) >> 4));
> >> +					buf += ret;
> >> +					blen -= ret;
> >> +				}
> >> +				if (ret < 0)
> >> +					return ret;
> >> +				blen -= ret;
> >> +				return buf_len - blen;
> >> +			} else {
> > 
> > Here we can check with more accurate format as defined in ARMv8 ARM:
> > 
> >   /* Other operation subclass is: 0b0000000x */
> >   if ((payload & 0xfe) == 0x0) {
> >      ....
> >   }
> > 
> >> +				return snprintf(buf, buf_len, "%s", payload & 0x1 ?
> >> +						"COND-SELECT" : "INSN-OTHER");
> >> +			}
> >> +		}
> >>  		case 1:	{
> >>  			size_t blen = buf_len;
> >>  
> >> @@ -385,6 +433,23 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
> >>  				ret = snprintf(buf, buf_len, " SIMD-FP");
> >>  				buf += ret;
> >>  				blen -= ret;
> >> +			} else if (payload & 0x8) {
> >> +				if (payload & 0x4) {
> >> +					ret = snprintf(buf, buf_len, " PRED");
> > 
> > Here should express as binary results: " PRED" or " NOT-PRED".
> 
> Ditto.
> 
> >> +					buf += ret;
> >> +					blen -= ret;
> >> +				}
> >> +				if (payload & 0x70) {
> > 
> > This is incorrect.  If bits[6:4] is zero, it presents vector length is 32 bits.
> 
> Refer to the ARM DDI 0487F.b (ID040120), page D10-2832, if bits[6:4] is zero,
> it presents vector length is 32 bits indeed.
> 
> >> +					ret = snprintf(buf, buf_len, " EVL %d",
> >> +						32 << ((payload & 0x70) >> 4));
> >> +					buf += ret;
> >> +					blen -= ret;
> >> +				}
> >> +				if (payload & 0x80) {
> >> +					ret = snprintf(buf, buf_len, " SG");
> > 
> > Here should express as binary results: " SG" or " NOT-SG".
> 
> 
> Thanks,
> Wei

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/4] perf: arm-spe: Add support for ARMv8.3-SPE
  2020-07-29  7:28       ` Leo Yan
@ 2020-07-29  7:42         ` liwei (GF)
  0 siblings, 0 replies; 19+ messages in thread
From: liwei (GF) @ 2020-07-29  7:42 UTC (permalink / raw)
  To: Leo Yan
  Cc: Mark Rutland, Will Deacon, Suzuki K Poulose, Alexander Shishkin,
	Catalin Marinas, Adrian Hunter, Arnaldo Carvalho de Melo,
	linux-kernel, zhangshaokun, Peter Zijlstra, Ingo Molnar,
	James Clark, guohanjun, Namhyung Kim, Jiri Olsa,
	linux-arm-kernel

Hi Leo,

On 2020/7/29 15:28, Leo Yan wrote:
> On Wed, Jul 29, 2020 at 03:21:20PM +0800, liwei (GF) wrote:
> 
> [...]
> 
>>>> @@ -354,8 +372,38 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
>>>>  	}
>>>>  	case ARM_SPE_OP_TYPE:
>>>>  		switch (idx) {
>>>> -		case 0:	return snprintf(buf, buf_len, "%s", payload & 0x1 ?
>>>> -					"COND-SELECT" : "INSN-OTHER");
>>>> +		case 0:	{
>>>> +			if (payload & 0x8) {
>>>
>>> Some nitpicks for packet format checking ...
>>>
>>> For SVE operation, the payload partten is: 0b0xxx1xx0.
>>>
>>> So it's good to check the partten like:
>>>
>>>   /* SVE operation subclass is: 0b0xxx1xx0 */
>>>   if ((payload & 0x8081) == 0x80) {
>>>      ....
>>>   }
>>>
>>> If later the packet format is extended, this will not introduce any
>>> confliction.
>>
>> Get it, but i think what you are really meaning is:
>> if ((payload & 0x89) == 0x80) {
>> 	...
>> }
> 
> Yes.
> 
>>>
>>>> +				size_t blen = buf_len;
>>>> +
>>>> +				ret = snprintf(buf, buf_len, "SVE-OTHER");
>>>> +				buf += ret;
>>>> +				blen -= ret;
>>>> +				if (payload & 0x2) {
>>>
>>> Here should express as binary results: " FP" or " INT".
>>
>> I think this is a style choice, i add these just like the current code where
>> processing "AT", "EXCL", "AR", "COND" and so on. So should we modify all the corresponding code together?
> 
> Okay, understood.  Let's just follow the existed style and later can
> enhance the output log with more readable format.
> 
> [...]
> 
>>>
>>>> +					ret = snprintf(buf, buf_len, " FP");
>>>> +					buf += ret;
>>>> +					blen -= ret;
>>>> +				}
>>>> +				if (payload & 0x4) {
>>>> +					ret = snprintf(buf, buf_len, " PRED");
>>>
>>> Here should express as binary results: " PRED" or " NOT-PRED".
>>
>> Ditto.
>>
>>>
>>>> +					buf += ret;
>>>> +					blen -= ret;
>>>> +				}
>>>> +				if (payload & 0x70) {
>>>
>>> This is incorrect.  If bits[6:4] is zero, it presents vector length is 32 bits.
>>>
>>
>> I am a little confused here.
>> Refer to the ARM DDI 0487F.b (ID040120), page D10-2830, if bits[6:4] is zero,
>> it presents vector length is 32 bits indeed.
> 
> Yes, if bits[6:4] is zero, your current code will not output any info.
> 

Yes, thanks for spotting this.
And thanks for you review.


Thanks,
Wei

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] drivers/perf: Add support for ARMv8.3-SPE
  2020-07-24  9:16 ` [PATCH 1/4] drivers/perf: " Wei Li
  2020-07-28 12:27   ` Leo Yan
@ 2020-07-29  9:12   ` Suzuki K Poulose
  2020-07-30  8:14     ` Leo Yan
  1 sibling, 1 reply; 19+ messages in thread
From: Suzuki K Poulose @ 2020-07-29  9:12 UTC (permalink / raw)
  To: liwei391, adrian.hunter, alexander.shishkin, acme,
	catalin.marinas, James.Clark, jolsa, leo.yan, mark.rutland,
	namhyung, will, zhangshaokun
  Cc: peterz, mingo, linux-kernel, linux-arm-kernel, guohanjun

On 07/24/2020 10:16 AM, Wei Li wrote:
> Armv8.3 extends the SPE by adding:
> - Alignment field in the Events packet, and filtering on this event
>    using PMSEVFR_EL1.
> - Support for the Scalable Vector Extension (SVE).
> 
> The main additions for SVE are:
> - Recording the vector length for SVE operations in the Operation Type
>    packet. It is not possible to filter on vector length.
> - Incomplete predicate and empty predicate fields in the Events packet,
>    and filtering on these events using PMSEVFR_EL1.
> 
> Update the check of pmsevfr for empty/partial predicated SVE and
> alignment event in kernel driver.
> 
> Signed-off-by: Wei Li <liwei391@huawei.com>
> ---
>   arch/arm64/include/asm/sysreg.h |  4 +++-
>   drivers/perf/arm_spe_pmu.c      | 18 ++++++++++++++----
>   2 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 463175f80341..be4c44ccdb56 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -281,7 +281,6 @@
>   #define SYS_PMSFCR_EL1_ST_SHIFT		18
>   
>   #define SYS_PMSEVFR_EL1			sys_reg(3, 0, 9, 9, 5)
> -#define SYS_PMSEVFR_EL1_RES0		0x0000ffff00ff0f55UL
>   
>   #define SYS_PMSLATFR_EL1		sys_reg(3, 0, 9, 9, 6)
>   #define SYS_PMSLATFR_EL1_MINLAT_SHIFT	0
> @@ -769,6 +768,9 @@
>   #define ID_AA64DFR0_PMUVER_8_5		0x6
>   #define ID_AA64DFR0_PMUVER_IMP_DEF	0xf
>   
> +#define ID_AA64DFR0_PMSVER_8_2		0x1
> +#define ID_AA64DFR0_PMSVER_8_3		0x2
> +
>   #define ID_DFR0_PERFMON_SHIFT		24
>   
>   #define ID_DFR0_PERFMON_8_1		0x4
> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
> index e51ddb6d63ed..5ec7ee0c8fa1 100644
> --- a/drivers/perf/arm_spe_pmu.c
> +++ b/drivers/perf/arm_spe_pmu.c
> @@ -54,7 +54,7 @@ struct arm_spe_pmu {
>   	struct hlist_node			hotplug_node;
>   
>   	int					irq; /* PPI */
> -
> +	int					pmuver;
>   	u16					min_period;
>   	u16					counter_sz;
>   
> @@ -80,6 +80,15 @@ struct arm_spe_pmu {
>   /* Keep track of our dynamic hotplug state */
>   static enum cpuhp_state arm_spe_pmu_online;
>   
> +static u64 sys_pmsevfr_el1_mask[] = {
> +	[ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> +		GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
> +		BIT_ULL(1),
> +	[ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> +		GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
> +		BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
> +};
> +
>   enum arm_spe_pmu_buf_fault_action {
>   	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
>   	SPE_PMU_BUF_FAULT_ACT_FATAL,
> @@ -670,7 +679,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>   	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
>   		return -ENOENT;
>   
> -	if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
> +	if (arm_spe_event_to_pmsevfr(event) & ~sys_pmsevfr_el1_mask[spe_pmu->pmuver])
>   		return -EOPNOTSUPP;
>   
>   	if (attr->exclude_idle)
> @@ -937,6 +946,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
>   			fld, smp_processor_id());
>   		return;
>   	}
> +	spe_pmu->pmuver = fld;

How do we deal with cases where we have big.LITTLE system with differing
SPE versions ?

Cheers
Suzuki

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] drivers/perf: Add support for ARMv8.3-SPE
  2020-07-29  9:12   ` Suzuki K Poulose
@ 2020-07-30  8:14     ` Leo Yan
  2020-07-31 12:18       ` liwei (GF)
  0 siblings, 1 reply; 19+ messages in thread
From: Leo Yan @ 2020-07-30  8:14 UTC (permalink / raw)
  To: Suzuki K Poulose
  Cc: mark.rutland, will, alexander.shishkin, catalin.marinas, jolsa,
	adrian.hunter, acme, linux-kernel, zhangshaokun, peterz, mingo,
	James.Clark, guohanjun, namhyung, liwei391, linux-arm-kernel

Hi Suzuki,

On Wed, Jul 29, 2020 at 10:12:50AM +0100, Suzuki Kuruppassery Poulose wrote:
> On 07/24/2020 10:16 AM, Wei Li wrote:
> > Armv8.3 extends the SPE by adding:
> > - Alignment field in the Events packet, and filtering on this event
> >    using PMSEVFR_EL1.
> > - Support for the Scalable Vector Extension (SVE).
> > 
> > The main additions for SVE are:
> > - Recording the vector length for SVE operations in the Operation Type
> >    packet. It is not possible to filter on vector length.
> > - Incomplete predicate and empty predicate fields in the Events packet,
> >    and filtering on these events using PMSEVFR_EL1.
> > 
> > Update the check of pmsevfr for empty/partial predicated SVE and
> > alignment event in kernel driver.
> > 
> > Signed-off-by: Wei Li <liwei391@huawei.com>
> > ---
> >   arch/arm64/include/asm/sysreg.h |  4 +++-
> >   drivers/perf/arm_spe_pmu.c      | 18 ++++++++++++++----
> >   2 files changed, 17 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > index 463175f80341..be4c44ccdb56 100644
> > --- a/arch/arm64/include/asm/sysreg.h
> > +++ b/arch/arm64/include/asm/sysreg.h
> > @@ -281,7 +281,6 @@
> >   #define SYS_PMSFCR_EL1_ST_SHIFT		18
> >   #define SYS_PMSEVFR_EL1			sys_reg(3, 0, 9, 9, 5)
> > -#define SYS_PMSEVFR_EL1_RES0		0x0000ffff00ff0f55UL
> >   #define SYS_PMSLATFR_EL1		sys_reg(3, 0, 9, 9, 6)
> >   #define SYS_PMSLATFR_EL1_MINLAT_SHIFT	0
> > @@ -769,6 +768,9 @@
> >   #define ID_AA64DFR0_PMUVER_8_5		0x6
> >   #define ID_AA64DFR0_PMUVER_IMP_DEF	0xf
> > +#define ID_AA64DFR0_PMSVER_8_2		0x1
> > +#define ID_AA64DFR0_PMSVER_8_3		0x2
> > +
> >   #define ID_DFR0_PERFMON_SHIFT		24
> >   #define ID_DFR0_PERFMON_8_1		0x4
> > diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
> > index e51ddb6d63ed..5ec7ee0c8fa1 100644
> > --- a/drivers/perf/arm_spe_pmu.c
> > +++ b/drivers/perf/arm_spe_pmu.c
> > @@ -54,7 +54,7 @@ struct arm_spe_pmu {
> >   	struct hlist_node			hotplug_node;
> >   	int					irq; /* PPI */
> > -
> > +	int					pmuver;
> >   	u16					min_period;
> >   	u16					counter_sz;
> > @@ -80,6 +80,15 @@ struct arm_spe_pmu {
> >   /* Keep track of our dynamic hotplug state */
> >   static enum cpuhp_state arm_spe_pmu_online;
> > +static u64 sys_pmsevfr_el1_mask[] = {
> > +	[ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> > +		GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
> > +		BIT_ULL(1),
> > +	[ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> > +		GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
> > +		BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
> > +};
> > +
> >   enum arm_spe_pmu_buf_fault_action {
> >   	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
> >   	SPE_PMU_BUF_FAULT_ACT_FATAL,
> > @@ -670,7 +679,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
> >   	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
> >   		return -ENOENT;
> > -	if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
> > +	if (arm_spe_event_to_pmsevfr(event) & ~sys_pmsevfr_el1_mask[spe_pmu->pmuver])
> >   		return -EOPNOTSUPP;
> >   	if (attr->exclude_idle)
> > @@ -937,6 +946,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
> >   			fld, smp_processor_id());
> >   		return;
> >   	}
> > +	spe_pmu->pmuver = fld;
> 
> How do we deal with cases where we have big.LITTLE system with differing
> SPE versions ?

Good point.

The first question we need to answer is: how to define SPE version?
From my understanding, if SPE uses the same sample specification and
the same packet format, then we should consider the SPE is the same
version cross CPUs.  So even some CPUs are ARMv8.2 and other CPUs are
ARMv8.3 variants, we still should take the SPE as the same version.

And when read the SPE driver in the file drivers/perf/arm_spe_pmu.c and
I concluded that so far the SPE perf driver is to only support SPE-v1
with single instance, it cannot support a complex usage case like
below:

  CPU0-3: ARMv8.2 architecture with SPE
  CPU4-7: ARMv8.3 architecture with SPE

For this case, if we take SPE as two different versions, let's say
SPE-8.2 and SPE-8.3, then should the SPE driver need to create multi
perf PMU events?  For example, we should create a perf PMU event
'arm_spe_8.2' and another PMU event 'arm_spe_8.3'.

Another option is we always take this as SPE-v1 and only create single
PMU event, just keep what's we are doing with the perf event
'arm_spe_0', but the driver needs to dynamically detect SPE PMU version
number in the function arm_spe_pmu_event_init(), and then based on
version number to select corresponding mask for PMSEVFR.

Thanks,
Leo

[1] https://lore.kernel.org/linux-arm-kernel/20200724071111.35593-1-liwei391@huawei.com/

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] drivers/perf: Add support for ARMv8.3-SPE
  2020-07-30  8:14     ` Leo Yan
@ 2020-07-31 12:18       ` liwei (GF)
  2020-07-31 14:01         ` Suzuki K Poulose
  0 siblings, 1 reply; 19+ messages in thread
From: liwei (GF) @ 2020-07-31 12:18 UTC (permalink / raw)
  To: Leo Yan, Suzuki K Poulose
  Cc: mark.rutland, will, alexander.shishkin, catalin.marinas,
	adrian.hunter, acme, linux-kernel, zhangshaokun, peterz, mingo,
	James.Clark, guohanjun, namhyung, jolsa, linux-arm-kernel



On 2020/7/30 16:14, Leo Yan wrote:
> Hi Suzuki,
> 
> On Wed, Jul 29, 2020 at 10:12:50AM +0100, Suzuki Kuruppassery Poulose wrote:
>> On 07/24/2020 10:16 AM, Wei Li wrote:
>>> Armv8.3 extends the SPE by adding:
>>> - Alignment field in the Events packet, and filtering on this event
>>>    using PMSEVFR_EL1.
>>> - Support for the Scalable Vector Extension (SVE).
>>>
>>> The main additions for SVE are:
>>> - Recording the vector length for SVE operations in the Operation Type
>>>    packet. It is not possible to filter on vector length.
>>> - Incomplete predicate and empty predicate fields in the Events packet,
>>>    and filtering on these events using PMSEVFR_EL1.
>>>
>>> Update the check of pmsevfr for empty/partial predicated SVE and
>>> alignment event in kernel driver.
>>>
>>> Signed-off-by: Wei Li <liwei391@huawei.com>
>>> ---
>>>   arch/arm64/include/asm/sysreg.h |  4 +++-
>>>   drivers/perf/arm_spe_pmu.c      | 18 ++++++++++++++----
>>>   2 files changed, 17 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>>> index 463175f80341..be4c44ccdb56 100644
>>> --- a/arch/arm64/include/asm/sysreg.h
>>> +++ b/arch/arm64/include/asm/sysreg.h
>>> @@ -281,7 +281,6 @@
>>>   #define SYS_PMSFCR_EL1_ST_SHIFT		18
>>>   #define SYS_PMSEVFR_EL1			sys_reg(3, 0, 9, 9, 5)
>>> -#define SYS_PMSEVFR_EL1_RES0		0x0000ffff00ff0f55UL
>>>   #define SYS_PMSLATFR_EL1		sys_reg(3, 0, 9, 9, 6)
>>>   #define SYS_PMSLATFR_EL1_MINLAT_SHIFT	0
>>> @@ -769,6 +768,9 @@
>>>   #define ID_AA64DFR0_PMUVER_8_5		0x6
>>>   #define ID_AA64DFR0_PMUVER_IMP_DEF	0xf
>>> +#define ID_AA64DFR0_PMSVER_8_2		0x1
>>> +#define ID_AA64DFR0_PMSVER_8_3		0x2
>>> +
>>>   #define ID_DFR0_PERFMON_SHIFT		24
>>>   #define ID_DFR0_PERFMON_8_1		0x4
>>> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
>>> index e51ddb6d63ed..5ec7ee0c8fa1 100644
>>> --- a/drivers/perf/arm_spe_pmu.c
>>> +++ b/drivers/perf/arm_spe_pmu.c
>>> @@ -54,7 +54,7 @@ struct arm_spe_pmu {
>>>   	struct hlist_node			hotplug_node;
>>>   	int					irq; /* PPI */
>>> -
>>> +	int					pmuver;
>>>   	u16					min_period;
>>>   	u16					counter_sz;
>>> @@ -80,6 +80,15 @@ struct arm_spe_pmu {
>>>   /* Keep track of our dynamic hotplug state */
>>>   static enum cpuhp_state arm_spe_pmu_online;
>>> +static u64 sys_pmsevfr_el1_mask[] = {
>>> +	[ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
>>> +		GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
>>> +		BIT_ULL(1),
>>> +	[ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
>>> +		GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
>>> +		BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
>>> +};
>>> +
>>>   enum arm_spe_pmu_buf_fault_action {
>>>   	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
>>>   	SPE_PMU_BUF_FAULT_ACT_FATAL,
>>> @@ -670,7 +679,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>>>   	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
>>>   		return -ENOENT;
>>> -	if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
>>> +	if (arm_spe_event_to_pmsevfr(event) & ~sys_pmsevfr_el1_mask[spe_pmu->pmuver])
>>>   		return -EOPNOTSUPP;
>>>   	if (attr->exclude_idle)
>>> @@ -937,6 +946,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
>>>   			fld, smp_processor_id());
>>>   		return;
>>>   	}
>>> +	spe_pmu->pmuver = fld;
>>
>> How do we deal with cases where we have big.LITTLE system with differing
>> SPE versions ?
> 
> Good point.
> 
> The first question we need to answer is: how to define SPE version?
> From my understanding, if SPE uses the same sample specification and
> the same packet format, then we should consider the SPE is the same
> version cross CPUs.  So even some CPUs are ARMv8.2 and other CPUs are
> ARMv8.3 variants, we still should take the SPE as the same version.
> 
> And when read the SPE driver in the file drivers/perf/arm_spe_pmu.c and
> I concluded that so far the SPE perf driver is to only support SPE-v1
> with single instance, it cannot support a complex usage case like
> below:
> 
>   CPU0-3: ARMv8.2 architecture with SPE
>   CPU4-7: ARMv8.3 architecture with SPE
> 
> For this case, if we take SPE as two different versions, let's say
> SPE-8.2 and SPE-8.3, then should the SPE driver need to create multi
> perf PMU events?  For example, we should create a perf PMU event
> 'arm_spe_8.2' and another PMU event 'arm_spe_8.3'.

As we have supported SPE v2 (ARMv8.3-SPE) now, if we add the new
of_device_id: "arm,statistical-profiling-extension-v2" and the new
platform_device_id: "arm,spe-v2", we may really support two instance now.
Even two different versions of SPE pmus which work on different range of
cores with the same PPI. Their functional scopes are the same as the PPI partitions.

> Another option is we always take this as SPE-v1 and only create single
> PMU event, just keep what's we are doing with the perf event
> 'arm_spe_0', but the driver needs to dynamically detect SPE PMU version
> number in the function arm_spe_pmu_event_init(), and then based on
> version number to select corresponding mask for PMSEVFR.

Thus, the driver will service two devices, and will also register two PMUs 'arm_spe_0' and
'arm_spe_1', so i think there is no conflict here.

Back to Suzuki's question, refer to the ACPI parsing code for SPE device, function
arm_spe_acpi_register_device(), there is a check of hetero_id for all cores.
It seems that we only support homogeneous ACPI/SPE machines, but i can't find the similar
check in OF/SPE parsing code.

> Thanks,
> Leo
> 
> [1] https://lore.kernel.org/linux-arm-kernel/20200724071111.35593-1-liwei391@huawei.com/
> 


Thanks,
Wei

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] drivers/perf: Add support for ARMv8.3-SPE
  2020-07-31 12:18       ` liwei (GF)
@ 2020-07-31 14:01         ` Suzuki K Poulose
  0 siblings, 0 replies; 19+ messages in thread
From: Suzuki K Poulose @ 2020-07-31 14:01 UTC (permalink / raw)
  To: liwei391, leo.yan, will
  Cc: mark.rutland, alexander.shishkin, catalin.marinas, adrian.hunter,
	acme, linux-kernel, zhangshaokun, peterz, mingo, James.Clark,
	guohanjun, namhyung, jolsa, linux-arm-kernel

On 07/31/2020 01:18 PM, liwei (GF) wrote:
> 
> 
> On 2020/7/30 16:14, Leo Yan wrote:
>> Hi Suzuki,
>>
>> On Wed, Jul 29, 2020 at 10:12:50AM +0100, Suzuki Kuruppassery Poulose wrote:
>>> On 07/24/2020 10:16 AM, Wei Li wrote:
>>>> Armv8.3 extends the SPE by adding:
>>>> - Alignment field in the Events packet, and filtering on this event
>>>>     using PMSEVFR_EL1.
>>>> - Support for the Scalable Vector Extension (SVE).
>>>>
>>>> The main additions for SVE are:
>>>> - Recording the vector length for SVE operations in the Operation Type
>>>>     packet. It is not possible to filter on vector length.
>>>> - Incomplete predicate and empty predicate fields in the Events packet,
>>>>     and filtering on these events using PMSEVFR_EL1.
>>>>
>>>> Update the check of pmsevfr for empty/partial predicated SVE and
>>>> alignment event in kernel driver.
>>>>
>>>> Signed-off-by: Wei Li <liwei391@huawei.com>
>>>> ---
>>>>    arch/arm64/include/asm/sysreg.h |  4 +++-
>>>>    drivers/perf/arm_spe_pmu.c      | 18 ++++++++++++++----
>>>>    2 files changed, 17 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>>>> index 463175f80341..be4c44ccdb56 100644
>>>> --- a/arch/arm64/include/asm/sysreg.h
>>>> +++ b/arch/arm64/include/asm/sysreg.h
>>>> @@ -281,7 +281,6 @@
>>>>    #define SYS_PMSFCR_EL1_ST_SHIFT		18
>>>>    #define SYS_PMSEVFR_EL1			sys_reg(3, 0, 9, 9, 5)
>>>> -#define SYS_PMSEVFR_EL1_RES0		0x0000ffff00ff0f55UL
>>>>    #define SYS_PMSLATFR_EL1		sys_reg(3, 0, 9, 9, 6)
>>>>    #define SYS_PMSLATFR_EL1_MINLAT_SHIFT	0
>>>> @@ -769,6 +768,9 @@
>>>>    #define ID_AA64DFR0_PMUVER_8_5		0x6
>>>>    #define ID_AA64DFR0_PMUVER_IMP_DEF	0xf
>>>> +#define ID_AA64DFR0_PMSVER_8_2		0x1
>>>> +#define ID_AA64DFR0_PMSVER_8_3		0x2
>>>> +
>>>>    #define ID_DFR0_PERFMON_SHIFT		24
>>>>    #define ID_DFR0_PERFMON_8_1		0x4
>>>> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
>>>> index e51ddb6d63ed..5ec7ee0c8fa1 100644
>>>> --- a/drivers/perf/arm_spe_pmu.c
>>>> +++ b/drivers/perf/arm_spe_pmu.c
>>>> @@ -54,7 +54,7 @@ struct arm_spe_pmu {
>>>>    	struct hlist_node			hotplug_node;
>>>>    	int					irq; /* PPI */
>>>> -
>>>> +	int					pmuver;
>>>>    	u16					min_period;
>>>>    	u16					counter_sz;
>>>> @@ -80,6 +80,15 @@ struct arm_spe_pmu {
>>>>    /* Keep track of our dynamic hotplug state */
>>>>    static enum cpuhp_state arm_spe_pmu_online;
>>>> +static u64 sys_pmsevfr_el1_mask[] = {
>>>> +	[ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
>>>> +		GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
>>>> +		BIT_ULL(1),
>>>> +	[ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
>>>> +		GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
>>>> +		BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
>>>> +};
>>>> +
>>>>    enum arm_spe_pmu_buf_fault_action {
>>>>    	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
>>>>    	SPE_PMU_BUF_FAULT_ACT_FATAL,
>>>> @@ -670,7 +679,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>>>>    	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
>>>>    		return -ENOENT;
>>>> -	if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
>>>> +	if (arm_spe_event_to_pmsevfr(event) & ~sys_pmsevfr_el1_mask[spe_pmu->pmuver])
>>>>    		return -EOPNOTSUPP;
>>>>    	if (attr->exclude_idle)
>>>> @@ -937,6 +946,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
>>>>    			fld, smp_processor_id());
>>>>    		return;
>>>>    	}
>>>> +	spe_pmu->pmuver = fld;
>>>
>>> How do we deal with cases where we have big.LITTLE system with differing
>>> SPE versions ?
>>
>> Good point.
>>
>> The first question we need to answer is: how to define SPE version?
>>  From my understanding, if SPE uses the same sample specification and
>> the same packet format, then we should consider the SPE is the same
>> version cross CPUs.  So even some CPUs are ARMv8.2 and other CPUs are
>> ARMv8.3 variants, we still should take the SPE as the same version.
>>
>> And when read the SPE driver in the file drivers/perf/arm_spe_pmu.c and
>> I concluded that so far the SPE perf driver is to only support SPE-v1
>> with single instance, it cannot support a complex usage case like
>> below:
>>
>>    CPU0-3: ARMv8.2 architecture with SPE
>>    CPU4-7: ARMv8.3 architecture with SPE
>>
>> For this case, if we take SPE as two different versions, let's say
>> SPE-8.2 and SPE-8.3, then should the SPE driver need to create multi
>> perf PMU events?  For example, we should create a perf PMU event
>> 'arm_spe_8.2' and another PMU event 'arm_spe_8.3'.
> 
> As we have supported SPE v2 (ARMv8.3-SPE) now, if we add the new
> of_device_id: "arm,statistical-profiling-extension-v2" and the new
> platform_device_id: "arm,spe-v2", we may really support two instance now.
> Even two different versions of SPE pmus which work on different range of
> cores with the same PPI. Their functional scopes are the same as the PPI partitions.
> 
>> Another option is we always take this as SPE-v1 and only create single
>> PMU event, just keep what's we are doing with the perf event
>> 'arm_spe_0', but the driver needs to dynamically detect SPE PMU version
>> number in the function arm_spe_pmu_event_init(), and then based on
>> version number to select corresponding mask for PMSEVFR.
> 
> Thus, the driver will service two devices, and will also register two PMUs 'arm_spe_0' and
> 'arm_spe_1', so i think there is no conflict here.

Or the other option is to strictly support only one version - the lowest
version supported by the system, which is compatible with all the
others. This could be achieved by using the sanitised feature value of
the SPE version.

Will,

What do you prefer ?

> 
> Back to Suzuki's question, refer to the ACPI parsing code for SPE device, function
> arm_spe_acpi_register_device(), there is a check of hetero_id for all cores.
> It seems that we only support homogeneous ACPI/SPE machines, but i can't find the similar
> check in OF/SPE parsing code.

I think the driver assumes that the system supports uniform version
of the SPE (which was valid when the driver was written).

Cheers
Suzuki

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, back to index

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-24  9:16 [PATCH 0/4] Add support for ARMv8.3-SPE Wei Li
2020-07-24  9:16 ` [PATCH 1/4] drivers/perf: " Wei Li
2020-07-28 12:27   ` Leo Yan
2020-07-28 13:24     ` liwei (GF)
2020-07-29  7:08       ` Leo Yan
2020-07-29  9:12   ` Suzuki K Poulose
2020-07-30  8:14     ` Leo Yan
2020-07-31 12:18       ` liwei (GF)
2020-07-31 14:01         ` Suzuki K Poulose
2020-07-24  9:16 ` [PATCH 2/4] perf: arm-spe: " Wei Li
2020-07-29  6:29   ` Leo Yan
2020-07-29  7:21     ` liwei (GF)
2020-07-29  7:28       ` Leo Yan
2020-07-29  7:42         ` liwei (GF)
2020-07-24  9:16 ` [PATCH 3/4] perf auxtrace: Add new itrace options " Wei Li
2020-07-29  6:51   ` Leo Yan
2020-07-24  9:16 ` [PATCH 4/4] perf: arm-spe: Synthesize new events " Wei Li
2020-07-28 12:06 ` [PATCH 0/4] Add support " Arnaldo Carvalho de Melo
2020-07-28 12:41   ` Leo Yan

Linux-ARM-Kernel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-arm-kernel/0 linux-arm-kernel/git/0.git
	git clone --mirror https://lore.kernel.org/linux-arm-kernel/1 linux-arm-kernel/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-arm-kernel linux-arm-kernel/ https://lore.kernel.org/linux-arm-kernel \
		linux-arm-kernel@lists.infradead.org
	public-inbox-index linux-arm-kernel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.infradead.lists.linux-arm-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git