All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
To: Marc Zyngier <maz@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org, Mark Rutland <mark.rutland@arm.com>,
	Will Deacon <will@kernel.org>, Hector Martin <marcan@marcan.st>,
	Sven Peter <sven@svenpeter.dev>, Rob Herring <robh+dt@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: Re: [PATCH 8/8] drivers/perf: Add Apple icestorm/firestorm CPU PMU driver
Date: Sun, 14 Nov 2021 08:45:30 -0500	[thread overview]
Message-ID: <YZES+gF7WdCiCwAe@sunset> (raw)
In-Reply-To: <20211113115429.4027571-9-maz@kernel.org>

> +/* Counters */
> +#define SYS_IMP_APL_PMC0_EL1	sys_reg(3, 2, 15, 0, 0)
> +#define SYS_IMP_APL_PMC1_EL1	sys_reg(3, 2, 15, 1, 0)
> +#define SYS_IMP_APL_PMC2_EL1	sys_reg(3, 2, 15, 2, 0)
> +#define SYS_IMP_APL_PMC3_EL1	sys_reg(3, 2, 15, 3, 0)
> +#define SYS_IMP_APL_PMC4_EL1	sys_reg(3, 2, 15, 4, 0)
> +#define SYS_IMP_APL_PMC5_EL1	sys_reg(3, 2, 15, 5, 0)
> +#define SYS_IMP_APL_PMC6_EL1	sys_reg(3, 2, 15, 6, 0)
> +#define SYS_IMP_APL_PMC7_EL1	sys_reg(3, 2, 15, 7, 0)
--gap--
> +#define SYS_IMP_APL_PMC8_EL1	sys_reg(3, 2, 15, 9, 0)
> +#define SYS_IMP_APL_PMC9_EL1	sys_reg(3, 2, 15, 10, 0)

Do we know what the gap is?

> +/*
> + * Description of the events we actually know about, as well as those with
> + * a specific counter affinity. Yes, this is a grand total of two known
> + * counters, and the rest is anybody's guess.
> + *
> + * Not all counters can count all events. Counters #0 and #1 are wired to
> + * count cycles and instructions respectively, and some events have
> + * bizarre mappings (every other counter, or even *one* counter). These
> + * restrictins equally apply to both P and E cores.

restrictions

> +/* Low level accessors. No synchronisation. */
> +#define PMU_READ_COUNTER(_idx)						\
> +	case _idx:	return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1)
> +
> +#define PMU_WRITE_COUNTER(_val, _idx)					\
> +	case _idx:							\
> +		write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1);	\
> +		return
> +
> +static u64 m1_pmu_read_hw_counter(unsigned int index)
> +{
> +	switch (index) {
> +		PMU_READ_COUNTER(0);
> +		PMU_READ_COUNTER(1);
> +		PMU_READ_COUNTER(2);
> +		PMU_READ_COUNTER(3);
> +		PMU_READ_COUNTER(4);
> +		PMU_READ_COUNTER(5);
> +		PMU_READ_COUNTER(6);
> +		PMU_READ_COUNTER(7);
> +		PMU_READ_COUNTER(8);
> +		PMU_READ_COUNTER(9);
> +	}
> +
> +	BUG();
> +}
> +
> +static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
> +{
> +	switch (index) {
> +		PMU_WRITE_COUNTER(val, 0);
> +		PMU_WRITE_COUNTER(val, 1);
> +		PMU_WRITE_COUNTER(val, 2);
> +		PMU_WRITE_COUNTER(val, 3);
> +		PMU_WRITE_COUNTER(val, 4);
> +		PMU_WRITE_COUNTER(val, 5);
> +		PMU_WRITE_COUNTER(val, 6);
> +		PMU_WRITE_COUNTER(val, 7);
> +		PMU_WRITE_COUNTER(val, 8);
> +		PMU_WRITE_COUNTER(val, 9);
> +	}
> +
> +	BUG();
> +}

Probbaly cleaner to use a single switch and no macros, registers become
greppable and the code is shorter too. Caveat: didn't check if it
compiles.

	static inline u64 m1_pmu_hw_counter(unsigned int index)
	{
		switch (index) {
		case 0: return SYS_IMP_APL_PMC0_EL1;
		case 1: return SYS_IMP_APL_PMC1_EL1;
		case 2: return SYS_IMP_APL_PMC2_EL1;
		case 3: return SYS_IMP_APL_PMC3_EL1;
		case 4: return SYS_IMP_APL_PMC4_EL1;
		case 5: return SYS_IMP_APL_PMC5_EL1;
		case 6: return SYS_IMP_APL_PMC6_EL1;
		case 7: return SYS_IMP_APL_PMC7_EL1;
		case 8: return SYS_IMP_APL_PMC8_EL1;
		case 9: return SYS_IMP_APL_PMC9_EL1;
		}

		BUG();
	}

	static u64 m1_pmu_read_hw_counter(unsigned int index) {
		return read_sysreg_s(m1_pmu_hw_counter(index));
	}


	static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
	{
		write_sysreg_s(val, m1_pmu_hw_counter(index));
	}

> +static void __m1_pmu_enable_counter(unsigned int index, bool en)
> +{
> +	u64 val, bit;
> +
> +	switch (index) {
> +	case 0 ... 7:
> +		bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
> +		break;
> +	case 8 ... 9:
> +		bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
> +
> +	if (en)
> +		val |= bit;
> +	else
> +		val &= ~bit;
> +
> +	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
> +}
...
> +static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
> +{
> +	u64 val, bit;
> +
> +	switch (index) {
> +	case 0 ... 7:
> +		bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
> +		break;
> +	case 8 ... 9:
> +		bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
> +
> +	if (en)
> +		val |= bit;
> +	else
> +		val &= ~bit;
> +
> +	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
> +}

These two helper functions have basically the same logic -- maybe worth combining?

> +static void m1_pmu_configure_counter(unsigned int index, u8 event,
> +				     bool user, bool kernel)
> +{
....
> +	switch (index) {
> +	case 0 ... 1:
> +		/* 0 and 1 have fixed events */
> +		break;
> +	case 2 ... 5:
> +		shift = (index - 2) * 8;
> +		val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
> +		val &= ~((u64)0xff << shift);
> +		val |= (u64)event << shift;
> +		write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
> +		break;
> +	case 6 ... 9:
> +		shift = (index - 6) * 8;
> +		val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
> +		val &= ~((u64)0xff << shift);
> +		val |= (u64)event << shift;
> +		write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
> +		break;
> +	}
> +}

I'd love an explanation what's happening here.

> +	/*
> +	 * Place the event on the first free counter that can count
> +	 * this event.
> +	 *
> +	 * We could do a better job if we had a view of all the events
> +	 * counting on the PMU at any given time, and by placing the
> +	 * most constraint events first.
> +	 */

constraining

> +static int m1_pmu_device_probe(struct platform_device *pdev)
> +{
> +	int ret;
> +
> +	ret = arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL);
> +	if (!ret) {
> +		/*
> +		 * If probe succeeds, taint the kernel as this is all
> +		 * undocumented, implementation defined black magic.
> +		 */
> +		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
> +	}
> +
> +	return ret;
> +}

What are the implications of this taint? You could say that about every
driver we've written for the M1, but...

WARNING: multiple messages have this Message-ID (diff)
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
To: Marc Zyngier <maz@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org, Mark Rutland <mark.rutland@arm.com>,
	Will Deacon <will@kernel.org>, Hector Martin <marcan@marcan.st>,
	Sven Peter <sven@svenpeter.dev>, Rob Herring <robh+dt@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: Re: [PATCH 8/8] drivers/perf: Add Apple icestorm/firestorm CPU PMU driver
Date: Sun, 14 Nov 2021 08:45:30 -0500	[thread overview]
Message-ID: <YZES+gF7WdCiCwAe@sunset> (raw)
In-Reply-To: <20211113115429.4027571-9-maz@kernel.org>

> +/* Counters */
> +#define SYS_IMP_APL_PMC0_EL1	sys_reg(3, 2, 15, 0, 0)
> +#define SYS_IMP_APL_PMC1_EL1	sys_reg(3, 2, 15, 1, 0)
> +#define SYS_IMP_APL_PMC2_EL1	sys_reg(3, 2, 15, 2, 0)
> +#define SYS_IMP_APL_PMC3_EL1	sys_reg(3, 2, 15, 3, 0)
> +#define SYS_IMP_APL_PMC4_EL1	sys_reg(3, 2, 15, 4, 0)
> +#define SYS_IMP_APL_PMC5_EL1	sys_reg(3, 2, 15, 5, 0)
> +#define SYS_IMP_APL_PMC6_EL1	sys_reg(3, 2, 15, 6, 0)
> +#define SYS_IMP_APL_PMC7_EL1	sys_reg(3, 2, 15, 7, 0)
--gap--
> +#define SYS_IMP_APL_PMC8_EL1	sys_reg(3, 2, 15, 9, 0)
> +#define SYS_IMP_APL_PMC9_EL1	sys_reg(3, 2, 15, 10, 0)

Do we know what the gap is?

> +/*
> + * Description of the events we actually know about, as well as those with
> + * a specific counter affinity. Yes, this is a grand total of two known
> + * counters, and the rest is anybody's guess.
> + *
> + * Not all counters can count all events. Counters #0 and #1 are wired to
> + * count cycles and instructions respectively, and some events have
> + * bizarre mappings (every other counter, or even *one* counter). These
> + * restrictins equally apply to both P and E cores.

restrictions

> +/* Low level accessors. No synchronisation. */
> +#define PMU_READ_COUNTER(_idx)						\
> +	case _idx:	return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1)
> +
> +#define PMU_WRITE_COUNTER(_val, _idx)					\
> +	case _idx:							\
> +		write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1);	\
> +		return
> +
> +static u64 m1_pmu_read_hw_counter(unsigned int index)
> +{
> +	switch (index) {
> +		PMU_READ_COUNTER(0);
> +		PMU_READ_COUNTER(1);
> +		PMU_READ_COUNTER(2);
> +		PMU_READ_COUNTER(3);
> +		PMU_READ_COUNTER(4);
> +		PMU_READ_COUNTER(5);
> +		PMU_READ_COUNTER(6);
> +		PMU_READ_COUNTER(7);
> +		PMU_READ_COUNTER(8);
> +		PMU_READ_COUNTER(9);
> +	}
> +
> +	BUG();
> +}
> +
> +static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
> +{
> +	switch (index) {
> +		PMU_WRITE_COUNTER(val, 0);
> +		PMU_WRITE_COUNTER(val, 1);
> +		PMU_WRITE_COUNTER(val, 2);
> +		PMU_WRITE_COUNTER(val, 3);
> +		PMU_WRITE_COUNTER(val, 4);
> +		PMU_WRITE_COUNTER(val, 5);
> +		PMU_WRITE_COUNTER(val, 6);
> +		PMU_WRITE_COUNTER(val, 7);
> +		PMU_WRITE_COUNTER(val, 8);
> +		PMU_WRITE_COUNTER(val, 9);
> +	}
> +
> +	BUG();
> +}

Probbaly cleaner to use a single switch and no macros, registers become
greppable and the code is shorter too. Caveat: didn't check if it
compiles.

	static inline u64 m1_pmu_hw_counter(unsigned int index)
	{
		switch (index) {
		case 0: return SYS_IMP_APL_PMC0_EL1;
		case 1: return SYS_IMP_APL_PMC1_EL1;
		case 2: return SYS_IMP_APL_PMC2_EL1;
		case 3: return SYS_IMP_APL_PMC3_EL1;
		case 4: return SYS_IMP_APL_PMC4_EL1;
		case 5: return SYS_IMP_APL_PMC5_EL1;
		case 6: return SYS_IMP_APL_PMC6_EL1;
		case 7: return SYS_IMP_APL_PMC7_EL1;
		case 8: return SYS_IMP_APL_PMC8_EL1;
		case 9: return SYS_IMP_APL_PMC9_EL1;
		}

		BUG();
	}

	static u64 m1_pmu_read_hw_counter(unsigned int index) {
		return read_sysreg_s(m1_pmu_hw_counter(index));
	}


	static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
	{
		write_sysreg_s(val, m1_pmu_hw_counter(index));
	}

> +static void __m1_pmu_enable_counter(unsigned int index, bool en)
> +{
> +	u64 val, bit;
> +
> +	switch (index) {
> +	case 0 ... 7:
> +		bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
> +		break;
> +	case 8 ... 9:
> +		bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
> +
> +	if (en)
> +		val |= bit;
> +	else
> +		val &= ~bit;
> +
> +	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
> +}
...
> +static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
> +{
> +	u64 val, bit;
> +
> +	switch (index) {
> +	case 0 ... 7:
> +		bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
> +		break;
> +	case 8 ... 9:
> +		bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
> +
> +	if (en)
> +		val |= bit;
> +	else
> +		val &= ~bit;
> +
> +	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
> +}

These two helper functions have basically the same logic -- maybe worth combining?

> +static void m1_pmu_configure_counter(unsigned int index, u8 event,
> +				     bool user, bool kernel)
> +{
....
> +	switch (index) {
> +	case 0 ... 1:
> +		/* 0 and 1 have fixed events */
> +		break;
> +	case 2 ... 5:
> +		shift = (index - 2) * 8;
> +		val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
> +		val &= ~((u64)0xff << shift);
> +		val |= (u64)event << shift;
> +		write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
> +		break;
> +	case 6 ... 9:
> +		shift = (index - 6) * 8;
> +		val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
> +		val &= ~((u64)0xff << shift);
> +		val |= (u64)event << shift;
> +		write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
> +		break;
> +	}
> +}

I'd love an explanation what's happening here.

> +	/*
> +	 * Place the event on the first free counter that can count
> +	 * this event.
> +	 *
> +	 * We could do a better job if we had a view of all the events
> +	 * counting on the PMU at any given time, and by placing the
> +	 * most constraint events first.
> +	 */

constraining

> +static int m1_pmu_device_probe(struct platform_device *pdev)
> +{
> +	int ret;
> +
> +	ret = arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL);
> +	if (!ret) {
> +		/*
> +		 * If probe succeeds, taint the kernel as this is all
> +		 * undocumented, implementation defined black magic.
> +		 */
> +		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
> +	}
> +
> +	return ret;
> +}

What are the implications of this taint? You could say that about every
driver we've written for the M1, but...

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2021-11-14 13:45 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-13 11:54 [PATCH 0/8] drivers/perf: CPU PMU driver for Apple M1 Marc Zyngier
2021-11-13 11:54 ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 1/8] dt-bindings: arm-pmu: Document Apple PMU compatible strings Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-29 21:24   ` Rob Herring
2021-11-29 21:24     ` Rob Herring
2021-11-13 11:54 ` [PATCH 2/8] dt-bindings: apple,aic: Add CPU PMU per-cpu pseudo-interrupts Marc Zyngier
2021-11-13 11:54   ` [PATCH 2/8] dt-bindings: apple, aic: " Marc Zyngier
2021-11-29 21:25   ` [PATCH 2/8] dt-bindings: apple,aic: " Rob Herring
2021-11-29 21:25     ` Rob Herring
2021-11-13 11:54 ` [PATCH 3/8] irqchip/apple-aic: Add cpumasks for E and P cores Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 4/8] irqchip/apple-aic: Wire PMU interrupts Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 5/8] irqchip/apple-aic: Move PMU-specific registers to their own include file Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 6/8] arm64: apple: t8301: Add PMU nodes Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 7/8] drivers/perf: arm_pmu: Handle 47 bit counters Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 11:54 ` [PATCH 8/8] drivers/perf: Add Apple icestorm/firestorm CPU PMU driver Marc Zyngier
2021-11-13 11:54   ` Marc Zyngier
2021-11-13 13:04   ` Alyssa Rosenzweig
2021-11-13 13:04     ` Alyssa Rosenzweig
2021-11-14  2:43     ` Dougall
2021-11-14  2:43       ` Dougall
2021-11-15 10:51       ` Marc Zyngier
2021-11-15 10:51         ` Marc Zyngier
2021-11-14 13:45   ` Alyssa Rosenzweig [this message]
2021-11-14 13:45     ` Alyssa Rosenzweig
2021-11-14 18:35     ` Marc Zyngier
2021-11-14 18:35       ` Marc Zyngier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YZES+gF7WdCiCwAe@sunset \
    --to=alyssa@rosenzweig.io \
    --cc=devicetree@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marcan@marcan.st \
    --cc=mark.rutland@arm.com \
    --cc=maz@kernel.org \
    --cc=robh+dt@kernel.org \
    --cc=sven@svenpeter.dev \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.