All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marc Zyngier <maz@kernel.org>
To: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Cc: linux-arm-kernel@lists.infradead.org, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org, Mark Rutland <mark.rutland@arm.com>,
	Will Deacon <will@kernel.org>, Hector Martin <marcan@marcan.st>,
	Sven Peter <sven@svenpeter.dev>, Rob Herring <robh+dt@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: Re: [PATCH 8/8] drivers/perf: Add Apple icestorm/firestorm CPU PMU driver
Date: Sun, 14 Nov 2021 18:35:47 +0000	[thread overview]
Message-ID: <87h7ceegak.wl-maz@kernel.org> (raw)
In-Reply-To: <YZES+gF7WdCiCwAe@sunset>

On Sun, 14 Nov 2021 13:45:30 +0000,
Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:
> 
> > +/* Counters */
> > +#define SYS_IMP_APL_PMC0_EL1	sys_reg(3, 2, 15, 0, 0)
> > +#define SYS_IMP_APL_PMC1_EL1	sys_reg(3, 2, 15, 1, 0)
> > +#define SYS_IMP_APL_PMC2_EL1	sys_reg(3, 2, 15, 2, 0)
> > +#define SYS_IMP_APL_PMC3_EL1	sys_reg(3, 2, 15, 3, 0)
> > +#define SYS_IMP_APL_PMC4_EL1	sys_reg(3, 2, 15, 4, 0)
> > +#define SYS_IMP_APL_PMC5_EL1	sys_reg(3, 2, 15, 5, 0)
> > +#define SYS_IMP_APL_PMC6_EL1	sys_reg(3, 2, 15, 6, 0)
> > +#define SYS_IMP_APL_PMC7_EL1	sys_reg(3, 2, 15, 7, 0)
> --gap--
> > +#define SYS_IMP_APL_PMC8_EL1	sys_reg(3, 2, 15, 9, 0)
> > +#define SYS_IMP_APL_PMC9_EL1	sys_reg(3, 2, 15, 10, 0)
> 
> Do we know what the gap is?

If it exists, it is an IMPDEF register.

> 
> > +/*
> > + * Description of the events we actually know about, as well as those with
> > + * a specific counter affinity. Yes, this is a grand total of two known
> > + * counters, and the rest is anybody's guess.
> > + *
> > + * Not all counters can count all events. Counters #0 and #1 are wired to
> > + * count cycles and instructions respectively, and some events have
> > + * bizarre mappings (every other counter, or even *one* counter). These
> > + * restrictins equally apply to both P and E cores.
> 
> restrictions
> 
> > +/* Low level accessors. No synchronisation. */
> > +#define PMU_READ_COUNTER(_idx)						\
> > +	case _idx:	return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1)
> > +
> > +#define PMU_WRITE_COUNTER(_val, _idx)					\
> > +	case _idx:							\
> > +		write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1);	\
> > +		return
> > +
> > +static u64 m1_pmu_read_hw_counter(unsigned int index)
> > +{
> > +	switch (index) {
> > +		PMU_READ_COUNTER(0);
> > +		PMU_READ_COUNTER(1);
> > +		PMU_READ_COUNTER(2);
> > +		PMU_READ_COUNTER(3);
> > +		PMU_READ_COUNTER(4);
> > +		PMU_READ_COUNTER(5);
> > +		PMU_READ_COUNTER(6);
> > +		PMU_READ_COUNTER(7);
> > +		PMU_READ_COUNTER(8);
> > +		PMU_READ_COUNTER(9);
> > +	}
> > +
> > +	BUG();
> > +}
> > +
> > +static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
> > +{
> > +	switch (index) {
> > +		PMU_WRITE_COUNTER(val, 0);
> > +		PMU_WRITE_COUNTER(val, 1);
> > +		PMU_WRITE_COUNTER(val, 2);
> > +		PMU_WRITE_COUNTER(val, 3);
> > +		PMU_WRITE_COUNTER(val, 4);
> > +		PMU_WRITE_COUNTER(val, 5);
> > +		PMU_WRITE_COUNTER(val, 6);
> > +		PMU_WRITE_COUNTER(val, 7);
> > +		PMU_WRITE_COUNTER(val, 8);
> > +		PMU_WRITE_COUNTER(val, 9);
> > +	}
> > +
> > +	BUG();
> > +}
> 
> Probbaly cleaner to use a single switch and no macros, registers become
> greppable and the code is shorter too. Caveat: didn't check if it
> compiles.

I can tell you haven't!

> 
> 	static inline u64 m1_pmu_hw_counter(unsigned int index)
> 	{
> 		switch (index) {
> 		case 0: return SYS_IMP_APL_PMC0_EL1;
> 		case 1: return SYS_IMP_APL_PMC1_EL1;
> 		case 2: return SYS_IMP_APL_PMC2_EL1;
> 		case 3: return SYS_IMP_APL_PMC3_EL1;
> 		case 4: return SYS_IMP_APL_PMC4_EL1;
> 		case 5: return SYS_IMP_APL_PMC5_EL1;
> 		case 6: return SYS_IMP_APL_PMC6_EL1;
> 		case 7: return SYS_IMP_APL_PMC7_EL1;
> 		case 8: return SYS_IMP_APL_PMC8_EL1;
> 		case 9: return SYS_IMP_APL_PMC9_EL1;
> 		}
> 
> 		BUG();
> 	}
> 
> 	static u64 m1_pmu_read_hw_counter(unsigned int index) {
> 		return read_sysreg_s(m1_pmu_hw_counter(index));

read/write_sysreg_s() cannot take a variable as a parameter. They rely
on direct macro expansion to generate the inline asm. See the various
examples all over the code base where we have similar constructs for
this exact reason.

> 	}
> 
> 
> 	static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
> 	{
> 		write_sysreg_s(val, m1_pmu_hw_counter(index));
> 	}
> 
> > +static void __m1_pmu_enable_counter(unsigned int index, bool en)
> > +{
> > +	u64 val, bit;
> > +
> > +	switch (index) {
> > +	case 0 ... 7:
> > +		bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
> > +		break;
> > +	case 8 ... 9:
> > +		bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
> > +		break;
> > +	default:
> > +		BUG();
> > +	}
> > +
> > +	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
> > +
> > +	if (en)
> > +		val |= bit;
> > +	else
> > +		val &= ~bit;
> > +
> > +	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
> > +}
> ...
> > +static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
> > +{
> > +	u64 val, bit;
> > +
> > +	switch (index) {
> > +	case 0 ... 7:
> > +		bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
> > +		break;
> > +	case 8 ... 9:
> > +		bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
> > +		break;
> > +	default:
> > +		BUG();
> > +	}
> > +
> > +	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
> > +
> > +	if (en)
> > +		val |= bit;
> > +	else
> > +		val &= ~bit;
> > +
> > +	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
> > +}
> 
> These two helper functions have basically the same logic -- maybe
> worth combining?

I've tried, and I find the result pretty unconvincing, see below.

diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index bc991fc892eb..40f9e14b12da 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -223,29 +223,38 @@ static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
 
 #define get_bit_offset(index, mask)	(__ffs(mask) + (index))
 
-static void __m1_pmu_enable_counter(unsigned int index, bool en)
+static void __m1_pmu_write_pmcr0(unsigned int index, bool en, u64 mask)
 {
 	u64 val, bit;
 
+	bit = BIT(get_bit_offset(index, mask));
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+	if (en)
+		val |= bit;
+	else
+		val &= ~bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void __m1_pmu_enable_counter(unsigned int index, bool en)
+{
+	u64 mask;
+
 	switch (index) {
 	case 0 ... 7:
-		bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
+		mask = PMCR0_CNT_ENABLE_0_7;
 		break;
 	case 8 ... 9:
-		bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
+		mask = PMCR0_CNT_ENABLE_8_9;
+		index -= 8;
 		break;
 	default:
 		BUG();
 	}
 
-	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
-
-	if (en)
-		val |= bit;
-	else
-		val &= ~bit;
-
-	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+	__m1_pmu_write_pmcr0(index, en, mask);
 }
 
 static void m1_pmu_enable_counter(unsigned int index)
@@ -260,27 +269,21 @@ static void m1_pmu_disable_counter(unsigned int index)
 
 static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
 {
-	u64 val, bit;
+	u64 mask;
 
 	switch (index) {
 	case 0 ... 7:
-		bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
+		mask =  PMCR0_PMI_ENABLE_0_7;
 		break;
 	case 8 ... 9:
-		bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
+		mask = PMCR0_PMI_ENABLE_8_9;
+		index -= 8;
 		break;
 	default:
 		BUG();
 	}
 
-	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
-
-	if (en)
-		val |= bit;
-	else
-		val &= ~bit;
-
-	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+	__m1_pmu_write_pmcr0(index, en, mask);
 }
 
 static void m1_pmu_enable_counter_interrupt(unsigned int index)

>
> > +static void m1_pmu_configure_counter(unsigned int index, u8 event,
> > +				     bool user, bool kernel)
> > +{
> ....
> > +	switch (index) {
> > +	case 0 ... 1:
> > +		/* 0 and 1 have fixed events */
> > +		break;
> > +	case 2 ... 5:
> > +		shift = (index - 2) * 8;
> > +		val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
> > +		val &= ~((u64)0xff << shift);
> > +		val |= (u64)event << shift;
> > +		write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
> > +		break;
> > +	case 6 ... 9:
> > +		shift = (index - 6) * 8;
> > +		val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
> > +		val &= ~((u64)0xff << shift);
> > +		val |= (u64)event << shift;
> > +		write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
> > +		break;
> > +	}
> > +}
> 
> I'd love an explanation what's happening here.

PMESR{0,1} contain the event numbers for counters 2-5, resp 6-9, and
we happily shove an event number there. Is that the sort of
information you are after?

> 
> > +	/*
> > +	 * Place the event on the first free counter that can count
> > +	 * this event.
> > +	 *
> > +	 * We could do a better job if we had a view of all the events
> > +	 * counting on the PMU at any given time, and by placing the
> > +	 * most constraint events first.
> > +	 */
> 
> constraining
> 
> > +static int m1_pmu_device_probe(struct platform_device *pdev)
> > +{
> > +	int ret;
> > +
> > +	ret = arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL);
> > +	if (!ret) {
> > +		/*
> > +		 * If probe succeeds, taint the kernel as this is all
> > +		 * undocumented, implementation defined black magic.
> > +		 */
> > +		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
> > +	}
> > +
> > +	return ret;
> > +}
> 
> What are the implications of this taint? You could say that about every
> driver we've written for the M1, but...

This taint status appears on the kernel crash, telling people the
kernel is using CPU features that are not documented and about which
we can only hope that they work as we think they work. The same thing
takes place with KVM and the use of unadvertised GICv3 CPU registers.

Ideally, this would happen at early boot because of the fixed VHE
crap, but this is more effort that it deserves.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

WARNING: multiple messages have this Message-ID (diff)
From: Marc Zyngier <maz@kernel.org>
To: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Cc: linux-arm-kernel@lists.infradead.org, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org, Mark Rutland <mark.rutland@arm.com>,
	Will Deacon <will@kernel.org>, Hector Martin <marcan@marcan.st>,
	Sven Peter <sven@svenpeter.dev>, Rob Herring <robh+dt@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: Re: [PATCH 8/8] drivers/perf: Add Apple icestorm/firestorm CPU PMU driver
Date: Sun, 14 Nov 2021 18:35:47 +0000	[thread overview]
Message-ID: <87h7ceegak.wl-maz@kernel.org> (raw)
In-Reply-To: <YZES+gF7WdCiCwAe@sunset>

On Sun, 14 Nov 2021 13:45:30 +0000,
Alyssa Rosenzweig <alyssa@rosenzweig.io> wrote:
> 
> > +/* Counters */
> > +#define SYS_IMP_APL_PMC0_EL1	sys_reg(3, 2, 15, 0, 0)
> > +#define SYS_IMP_APL_PMC1_EL1	sys_reg(3, 2, 15, 1, 0)
> > +#define SYS_IMP_APL_PMC2_EL1	sys_reg(3, 2, 15, 2, 0)
> > +#define SYS_IMP_APL_PMC3_EL1	sys_reg(3, 2, 15, 3, 0)
> > +#define SYS_IMP_APL_PMC4_EL1	sys_reg(3, 2, 15, 4, 0)
> > +#define SYS_IMP_APL_PMC5_EL1	sys_reg(3, 2, 15, 5, 0)
> > +#define SYS_IMP_APL_PMC6_EL1	sys_reg(3, 2, 15, 6, 0)
> > +#define SYS_IMP_APL_PMC7_EL1	sys_reg(3, 2, 15, 7, 0)
> --gap--
> > +#define SYS_IMP_APL_PMC8_EL1	sys_reg(3, 2, 15, 9, 0)
> > +#define SYS_IMP_APL_PMC9_EL1	sys_reg(3, 2, 15, 10, 0)
> 
> Do we know what the gap is?

If it exists, it is an IMPDEF register.

> 
> > +/*
> > + * Description of the events we actually know about, as well as those with
> > + * a specific counter affinity. Yes, this is a grand total of two known
> > + * counters, and the rest is anybody's guess.
> > + *
> > + * Not all counters can count all events. Counters #0 and #1 are wired to
> > + * count cycles and instructions respectively, and some events have
> > + * bizarre mappings (every other counter, or even *one* counter). These
> > + * restrictins equally apply to both P and E cores.
> 
> restrictions
> 
> > +/* Low level accessors. No synchronisation. */
> > +#define PMU_READ_COUNTER(_idx)						\
> > +	case _idx:	return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1)
> > +
> > +#define PMU_WRITE_COUNTER(_val, _idx)					\
> > +	case _idx:							\
> > +		write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1);	\
> > +		return
> > +
> > +static u64 m1_pmu_read_hw_counter(unsigned int index)
> > +{
> > +	switch (index) {
> > +		PMU_READ_COUNTER(0);
> > +		PMU_READ_COUNTER(1);
> > +		PMU_READ_COUNTER(2);
> > +		PMU_READ_COUNTER(3);
> > +		PMU_READ_COUNTER(4);
> > +		PMU_READ_COUNTER(5);
> > +		PMU_READ_COUNTER(6);
> > +		PMU_READ_COUNTER(7);
> > +		PMU_READ_COUNTER(8);
> > +		PMU_READ_COUNTER(9);
> > +	}
> > +
> > +	BUG();
> > +}
> > +
> > +static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
> > +{
> > +	switch (index) {
> > +		PMU_WRITE_COUNTER(val, 0);
> > +		PMU_WRITE_COUNTER(val, 1);
> > +		PMU_WRITE_COUNTER(val, 2);
> > +		PMU_WRITE_COUNTER(val, 3);
> > +		PMU_WRITE_COUNTER(val, 4);
> > +		PMU_WRITE_COUNTER(val, 5);
> > +		PMU_WRITE_COUNTER(val, 6);
> > +		PMU_WRITE_COUNTER(val, 7);
> > +		PMU_WRITE_COUNTER(val, 8);
> > +		PMU_WRITE_COUNTER(val, 9);
> > +	}
> > +
> > +	BUG();
> > +}
> 
> Probbaly cleaner to use a single switch and no macros, registers become
> greppable and the code is shorter too. Caveat: didn't check if it
> compiles.

I can tell you haven't!

> 
> 	static inline u64 m1_pmu_hw_counter(unsigned int index)
> 	{
> 		switch (index) {
> 		case 0: return SYS_IMP_APL_PMC0_EL1;
> 		case 1: return SYS_IMP_APL_PMC1_EL1;
> 		case 2: return SYS_IMP_APL_PMC2_EL1;
> 		case 3: return SYS_IMP_APL_PMC3_EL1;
> 		case 4: return SYS_IMP_APL_PMC4_EL1;
> 		case 5: return SYS_IMP_APL_PMC5_EL1;
> 		case 6: return SYS_IMP_APL_PMC6_EL1;
> 		case 7: return SYS_IMP_APL_PMC7_EL1;
> 		case 8: return SYS_IMP_APL_PMC8_EL1;
> 		case 9: return SYS_IMP_APL_PMC9_EL1;
> 		}
> 
> 		BUG();
> 	}
> 
> 	static u64 m1_pmu_read_hw_counter(unsigned int index) {
> 		return read_sysreg_s(m1_pmu_hw_counter(index));

read/write_sysreg_s() cannot take a variable as a parameter. They rely
on direct macro expansion to generate the inline asm. See the various
examples all over the code base where we have similar constructs for
this exact reason.

> 	}
> 
> 
> 	static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
> 	{
> 		write_sysreg_s(val, m1_pmu_hw_counter(index));
> 	}
> 
> > +static void __m1_pmu_enable_counter(unsigned int index, bool en)
> > +{
> > +	u64 val, bit;
> > +
> > +	switch (index) {
> > +	case 0 ... 7:
> > +		bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
> > +		break;
> > +	case 8 ... 9:
> > +		bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
> > +		break;
> > +	default:
> > +		BUG();
> > +	}
> > +
> > +	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
> > +
> > +	if (en)
> > +		val |= bit;
> > +	else
> > +		val &= ~bit;
> > +
> > +	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
> > +}
> ...
> > +static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
> > +{
> > +	u64 val, bit;
> > +
> > +	switch (index) {
> > +	case 0 ... 7:
> > +		bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
> > +		break;
> > +	case 8 ... 9:
> > +		bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
> > +		break;
> > +	default:
> > +		BUG();
> > +	}
> > +
> > +	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
> > +
> > +	if (en)
> > +		val |= bit;
> > +	else
> > +		val &= ~bit;
> > +
> > +	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
> > +}
> 
> These two helper functions have basically the same logic -- maybe
> worth combining?

I've tried, and I find the result pretty unconvincing, see below.

diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index bc991fc892eb..40f9e14b12da 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -223,29 +223,38 @@ static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
 
 #define get_bit_offset(index, mask)	(__ffs(mask) + (index))
 
-static void __m1_pmu_enable_counter(unsigned int index, bool en)
+static void __m1_pmu_write_pmcr0(unsigned int index, bool en, u64 mask)
 {
 	u64 val, bit;
 
+	bit = BIT(get_bit_offset(index, mask));
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+	if (en)
+		val |= bit;
+	else
+		val &= ~bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void __m1_pmu_enable_counter(unsigned int index, bool en)
+{
+	u64 mask;
+
 	switch (index) {
 	case 0 ... 7:
-		bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
+		mask = PMCR0_CNT_ENABLE_0_7;
 		break;
 	case 8 ... 9:
-		bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
+		mask = PMCR0_CNT_ENABLE_8_9;
+		index -= 8;
 		break;
 	default:
 		BUG();
 	}
 
-	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
-
-	if (en)
-		val |= bit;
-	else
-		val &= ~bit;
-
-	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+	__m1_pmu_write_pmcr0(index, en, mask);
 }
 
 static void m1_pmu_enable_counter(unsigned int index)
@@ -260,27 +269,21 @@ static void m1_pmu_disable_counter(unsigned int index)
 
 static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
 {
-	u64 val, bit;
+	u64 mask;
 
 	switch (index) {
 	case 0 ... 7:
-		bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
+		mask =  PMCR0_PMI_ENABLE_0_7;
 		break;
 	case 8 ... 9:
-		bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
+		mask = PMCR0_PMI_ENABLE_8_9;
+		index -= 8;
 		break;
 	default:
 		BUG();
 	}
 
-	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
-
-	if (en)
-		val |= bit;
-	else
-		val &= ~bit;
-
-	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+	__m1_pmu_write_pmcr0(index, en, mask);
 }
 
 static void m1_pmu_enable_counter_interrupt(unsigned int index)

>
> > +static void m1_pmu_configure_counter(unsigned int index, u8 event,
> > +				     bool user, bool kernel)
> > +{
> ....
> > +	switch (index) {
> > +	case 0 ... 1:
> > +		/* 0 and 1 have fixed events */
> > +		break;
> > +	case 2 ... 5:
> > +		shift = (index - 2) * 8;
> > +		val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
> > +		val &= ~((u64)0xff << shift);
> > +		val |= (u64)event << shift;
> > +		write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
> > +		break;
> > +	case 6 ... 9:
> > +		shift = (index - 6) * 8;
> > +		val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
> > +		val &= ~((u64)0xff << shift);
> > +		val |= (u64)event << shift;
> > +		write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
> > +		break;
> > +	}
> > +}
> 
> I'd love an explanation what's happening here.

PMESR{0,1} contain the event numbers for counters 2-5, resp 6-9, and
we happily shove an event number there. Is that the sort of
information you are after?

> 
> > +	/*
> > +	 * Place the event on the first free counter that can count
> > +	 * this event.
> > +	 *
> > +	 * We could do a better job if we had a view of all the events
> > +	 * counting on the PMU at any given time, and by placing the
> > +	 * most constraint events first.
> > +	 */
> 
> constraining
> 
> > +static int m1_pmu_device_probe(struct platform_device *pdev)
> > +{
> > +	int ret;
> > +
> > +	ret = arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL);
> > +	if (!ret) {
> > +		/*
> > +		 * If probe succeeds, taint the kernel as this is all
> > +		 * undocumented, implementation defined black magic.
> > +		 */
> > +		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
> > +	}
> > +
> > +	return ret;
> > +}
> 
> What are the implications of this taint? You could say that about every
> driver we've written for the M1, but...

This taint status appears on the kernel crash, telling people the
kernel is using CPU features that are not documented and about which
we can only hope that they work as we think they work. The same thing
takes place with KVM and the use of unadvertised GICv3 CPU registers.

Ideally, this would happen at early boot because of the fixed VHE
crap, but this is more effort that it deserves.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  reply	other threads:[~2021-11-14 18:36 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-13 11:54 [PATCH 0/8] drivers/perf: CPU PMU driver for Apple M1 Marc Zyngier
2021-11-13 11:54 ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 1/8] dt-bindings: arm-pmu: Document Apple PMU compatible strings Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-29 21:24   ` Rob Herring
2021-11-29 21:24     ` Rob Herring
2021-11-13 11:54 ` [PATCH 2/8] dt-bindings: apple,aic: Add CPU PMU per-cpu pseudo-interrupts Marc Zyngier
2021-11-13 11:54   ` [PATCH 2/8] dt-bindings: apple, aic: " Marc Zyngier
2021-11-29 21:25   ` [PATCH 2/8] dt-bindings: apple,aic: " Rob Herring
2021-11-29 21:25     ` Rob Herring
2021-11-13 11:54 ` [PATCH 3/8] irqchip/apple-aic: Add cpumasks for E and P cores Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 4/8] irqchip/apple-aic: Wire PMU interrupts Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 5/8] irqchip/apple-aic: Move PMU-specific registers to their own include file Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 6/8] arm64: apple: t8301: Add PMU nodes Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 7/8] drivers/perf: arm_pmu: Handle 47 bit counters Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 8/8] drivers/perf: Add Apple icestorm/firestorm CPU PMU driver Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 13:04   ` Alyssa Rosenzweig
2021-11-13 13:04     ` Alyssa Rosenzweig
2021-11-14  2:43     ` Dougall
2021-11-14  2:43       ` Dougall
2021-11-15 10:51       ` Marc Zyngier
2021-11-15 10:51         ` Marc Zyngier
2021-11-14 13:45   ` Alyssa Rosenzweig
2021-11-14 13:45     ` Alyssa Rosenzweig
2021-11-14 18:35     ` Marc Zyngier [this message]
2021-11-14 18:35       ` Marc Zyngier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87h7ceegak.wl-maz@kernel.org \
    --to=maz@kernel.org \
    --cc=alyssa@rosenzweig.io \
    --cc=devicetree@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marcan@marcan.st \
    --cc=mark.rutland@arm.com \
    --cc=robh+dt@kernel.org \
    --cc=sven@svenpeter.dev \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.