linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR
@ 2022-02-26 12:33 Linu Cherian
  2022-02-26 13:50 ` Marc Zyngier
  0 siblings, 1 reply; 4+ messages in thread
From: Linu Cherian @ 2022-02-26 12:33 UTC (permalink / raw)
  To: maz, tglx, catalin.marinas, will
  Cc: linux-kernel, linux-arm-kernel, linuc.decode, Linu Cherian

When a IAR register read races with a GIC interrupt RELEASE event,
GIC-CPU interface could wrongly return a valid INTID to the CPU
for an interrupt that is already released(non activated) instead of 0x3ff.

As a side effect, an interrupt handler could run twice, once with
interrupt priority and then with idle priority.

As a workaround, gic_read_iar is updated so that it will return a
valid interrupt ID only if there is a change in the active priority list
after the IAR read on all the affected Silicons.

Along with this, Thunderx erratum 23154 is reworked to use GIC IIDR
based quirk management for the sake of consistency and also
because there is workaround overlap on some silicon variants.

Signed-off-by: Linu Cherian <lcherian@marvell.com>
---
 Documentation/arm64/silicon-errata.rst |  4 +-
 arch/arm64/Kconfig                     | 10 -----
 arch/arm64/include/asm/arch_gicv3.h    | 24 +++++++++--
 arch/arm64/kernel/cpu_errata.c         |  8 ----
 arch/arm64/tools/cpucaps               |  1 -
 drivers/irqchip/irq-gic-v3.c           | 56 +++++++++++++++++++++++++-
 6 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index ea281dd75517..f602faf4bf82 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -136,10 +136,12 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
 +----------------+-----------------+-----------------+-----------------------------+
-| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
+| Cavium         | ThunderX GICv3  | #23154          | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX GICv3  | #38545          | N/A                         |
++----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 09b885cc4db5..889cb56bf5ec 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -890,16 +890,6 @@ config CAVIUM_ERRATUM_23144
 
 	  If unsure, say Y.
 
-config CAVIUM_ERRATUM_23154
-	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
-	default y
-	help
-	  The gicv3 of ThunderX requires a modified version for
-	  reading the IAR status to ensure data synchronization
-	  (access to icc_iar1_el1 is not sync'ed before and after).
-
-	  If unsure, say Y.
-
 config CAVIUM_ERRATUM_27456
 	bool "Cavium erratum 27456: Broadcast TLBI instructions may cause icache corruption"
 	default y
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 4ad22c3135db..bc98a60a4bcb 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -47,21 +47,37 @@ static inline u64 gic_read_iar_common(void)
 	return irqstat;
 }
 
-/*
+/* Marvell Erratum 38545
+ *
+ * When a IAR register read races with a GIC interrupt RELEASE event,
+ * GIC-CPU interface could wrongly return a valid INTID to the CPU
+ * for an interrupt that is already released(non activated) instead of 0x3ff.
+ *
+ * To workaround this, return a valid interrupt ID only if there is a change
+ * in the active priority list after the IAR read.
+ *
  * Cavium ThunderX erratum 23154
  *
  * The gicv3 of ThunderX requires a modified version for reading the
  * IAR status to ensure data synchronization (access to icc_iar1_el1
  * is not sync'ed before and after).
+ *
+ * Have merged both the workarounds into a common function since,
+ * 1. On Thunderx 88xx 1.x both erratas are applicable.
+ * 2. Having extra nops doesn't add any side effects for Silicons where
+ *    erratum 23154 is not applicable.
  */
-static inline u64 gic_read_iar_cavium_thunderx(void)
+static inline u64 gic_read_iar_marvell_38545_23154(void)
 {
-	u64 irqstat;
+	u64 irqstat, apr;
 
+	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
 	nops(8);
 	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
 	nops(4);
-	mb();
+	dsb(sy);
+	if (unlikely(apr == read_sysreg_s(SYS_ICC_AP1R0_EL1)))
+		return 0x3ff;
 
 	return irqstat;
 }
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b217941713a8..79beb800ee79 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -423,14 +423,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		ERRATA_MIDR_RANGE_LIST(erratum_845719_list),
 	},
 #endif
-#ifdef CONFIG_CAVIUM_ERRATUM_23154
-	{
-	/* Cavium ThunderX, pass 1.x */
-		.desc = "Cavium erratum 23154",
-		.capability = ARM64_WORKAROUND_CAVIUM_23154,
-		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
-	},
-#endif
 #ifdef CONFIG_CAVIUM_ERRATUM_27456
 	{
 		.desc = "Cavium erratum 27456",
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 9c65b1e25a96..3f751fe4fec4 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -62,7 +62,6 @@ WORKAROUND_2077057
 WORKAROUND_TRBE_OVERWRITE_FILL_MODE
 WORKAROUND_TSB_FLUSH_FAILURE
 WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
-WORKAROUND_CAVIUM_23154
 WORKAROUND_CAVIUM_27456
 WORKAROUND_CAVIUM_30115
 WORKAROUND_CAVIUM_TX2_219_PRFM
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 5e935d97207d..a3b58bf4fce4 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -35,6 +35,8 @@
 
 #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996	(1ULL << 0)
 #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539	(1ULL << 1)
+#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154	(1ULL << 2)
+#define FLAGS_WORKAROUND_MARVELL_ERRATUM_38545	(1ULL << 3)
 
 #define GIC_IRQ_TYPE_PARTITION	(GIC_IRQ_TYPE_LPI + 1)
 
@@ -60,6 +62,7 @@ struct gic_chip_data {
 
 static struct gic_chip_data gic_data __read_mostly;
 static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
+static DEFINE_STATIC_KEY_FALSE(gic_iar_quirk);
 
 #define GIC_ID_NR	(1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
 #define GIC_LINE_NR	min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
@@ -235,10 +238,19 @@ static void gic_redist_wait_for_rwp(void)
 
 #ifdef CONFIG_ARM64
 
+static u64 __maybe_unused gic_read_iar_fixup(void)
+{
+	if (gic_data.flags & FLAGS_WORKAROUND_MARVELL_ERRATUM_38545 ||
+		gic_data.flags & FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154)
+		return gic_read_iar_marvell_38545_23154();
+	else /* Not possible */
+		return ICC_IAR1_EL1_SPURIOUS;
+}
+
 static u64 __maybe_unused gic_read_iar(void)
 {
-	if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154))
-		return gic_read_iar_cavium_thunderx();
+	if (static_branch_unlikely(&gic_iar_quirk))
+		return gic_read_iar_fixup();
 	else
 		return gic_read_iar_common();
 }
@@ -1614,6 +1626,16 @@ static bool gic_enable_quirk_msm8996(void *data)
 	return true;
 }
 
+static bool gic_enable_quirk_cavium_23154(void *data)
+{
+	struct gic_chip_data *d = data;
+
+	d->flags |= FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154;
+	static_branch_enable(&gic_iar_quirk);
+
+	return true;
+}
+
 static bool gic_enable_quirk_cavium_38539(void *data)
 {
 	struct gic_chip_data *d = data;
@@ -1623,6 +1645,16 @@ static bool gic_enable_quirk_cavium_38539(void *data)
 	return true;
 }
 
+static bool gic_enable_quirk_marvell_38545(void *data)
+{
+	struct gic_chip_data *d = data;
+
+	d->flags |= FLAGS_WORKAROUND_MARVELL_ERRATUM_38545;
+	static_branch_enable(&gic_iar_quirk);
+
+	return true;
+}
+
 static bool gic_enable_quirk_hip06_07(void *data)
 {
 	struct gic_chip_data *d = data;
@@ -1660,6 +1692,13 @@ static const struct gic_quirk gic_quirks[] = {
 		.iidr	= 0x00000000,
 		.mask	= 0xffffffff,
 		.init	= gic_enable_quirk_hip06_07,
+	},
+		/* ThunderX: CN88xx 1.x */
+	{
+		.desc	= "GICv3: Cavium erratum 23154",
+		.iidr	= 0xa101034c,
+		.mask	= 0xffff0fff,
+		.init	= gic_enable_quirk_cavium_23154,
 	},
 	{
 		/*
@@ -1674,6 +1713,19 @@ static const struct gic_quirk gic_quirks[] = {
 		.mask	= 0xe8f00fff,
 		.init	= gic_enable_quirk_cavium_38539,
 	},
+	{
+		/*
+		 * IAR register reads could be unreliable, under certain
+		 * race conditions. This erratum applies to:
+		 * - ThunderX: CN88xx
+		 * - OCTEON TX: CN83xx, CN81xx
+		 * - OCTEON TX2: CN93xx, CN96xx, CN98xx, CNF95xx*
+		 */
+		.desc	= "GICv3: Marvell erratum 38545",
+		.iidr	= 0xa000034c,
+		.mask	= 0xe0f00fff,
+		.init	= gic_enable_quirk_marvell_38545,
+	},
 	{
 	}
 };
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR
  2022-02-26 12:33 [PATCH] irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR Linu Cherian
@ 2022-02-26 13:50 ` Marc Zyngier
  2022-03-01  7:13   ` [EXT] " Linu Cherian
  0 siblings, 1 reply; 4+ messages in thread
From: Marc Zyngier @ 2022-02-26 13:50 UTC (permalink / raw)
  To: Linu Cherian
  Cc: tglx, catalin.marinas, will, linux-kernel, linux-arm-kernel,
	linuc.decode

On Sat, 26 Feb 2022 12:33:32 +0000,
Linu Cherian <lcherian@marvell.com> wrote:
> 
> When a IAR register read races with a GIC interrupt RELEASE event,
> GIC-CPU interface could wrongly return a valid INTID to the CPU
> for an interrupt that is already released(non activated) instead of 0x3ff.
> 
> As a side effect, an interrupt handler could run twice, once with
> interrupt priority and then with idle priority.
> 
> As a workaround, gic_read_iar is updated so that it will return a
> valid interrupt ID only if there is a change in the active priority list
> after the IAR read on all the affected Silicons.
> 
> Along with this, Thunderx erratum 23154 is reworked to use GIC IIDR
> based quirk management for the sake of consistency and also
> because there is workaround overlap on some silicon variants.
> 
> Signed-off-by: Linu Cherian <lcherian@marvell.com>
> ---
>  Documentation/arm64/silicon-errata.rst |  4 +-
>  arch/arm64/Kconfig                     | 10 -----
>  arch/arm64/include/asm/arch_gicv3.h    | 24 +++++++++--
>  arch/arm64/kernel/cpu_errata.c         |  8 ----
>  arch/arm64/tools/cpucaps               |  1 -
>  drivers/irqchip/irq-gic-v3.c           | 56 +++++++++++++++++++++++++-
>  6 files changed, 77 insertions(+), 26 deletions(-)
> 
> diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
> index ea281dd75517..f602faf4bf82 100644
> --- a/Documentation/arm64/silicon-errata.rst
> +++ b/Documentation/arm64/silicon-errata.rst
> @@ -136,10 +136,12 @@ stable kernels.
>  +----------------+-----------------+-----------------+-----------------------------+
>  | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
>  +----------------+-----------------+-----------------+-----------------------------+
> -| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
> +| Cavium         | ThunderX GICv3  | #23154          | N/A                         |
>  +----------------+-----------------+-----------------+-----------------------------+
>  | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
>  +----------------+-----------------+-----------------+-----------------------------+
> +| Cavium         | ThunderX GICv3  | #38545          | N/A                         |

Cavium? Or Marvell? I can understand the identity crisis, but you
should pick your poison. It also seem to affect the new TX2 rather
than (or in addition to) the ancient TX.

> ++----------------+-----------------+-----------------+-----------------------------+
>  | Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
>  +----------------+-----------------+-----------------+-----------------------------+
>  | Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 09b885cc4db5..889cb56bf5ec 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -890,16 +890,6 @@ config CAVIUM_ERRATUM_23144
>  
>  	  If unsure, say Y.
>  
> -config CAVIUM_ERRATUM_23154
> -	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
> -	default y
> -	help
> -	  The gicv3 of ThunderX requires a modified version for
> -	  reading the IAR status to ensure data synchronization
> -	  (access to icc_iar1_el1 is not sync'ed before and after).
> -
> -	  If unsure, say Y.
> -

This is starting really badly. Why make this config option disappear?

This is both useful for documentation (in the absence of a public
errata document from the silicon vendor, I *really* want to know what
I am affected by) and for people who do not want their kernels to be
encumbered by support for broken HW.

I actually want to see a description of the *new* errata in Kconfig,
warts and all.

>  config CAVIUM_ERRATUM_27456
>  	bool "Cavium erratum 27456: Broadcast TLBI instructions may cause icache corruption"
>  	default y
> diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
> index 4ad22c3135db..bc98a60a4bcb 100644
> --- a/arch/arm64/include/asm/arch_gicv3.h
> +++ b/arch/arm64/include/asm/arch_gicv3.h
> @@ -47,21 +47,37 @@ static inline u64 gic_read_iar_common(void)
>  	return irqstat;
>  }
>  
> -/*
> +/* Marvell Erratum 38545
> + *
> + * When a IAR register read races with a GIC interrupt RELEASE event,
> + * GIC-CPU interface could wrongly return a valid INTID to the CPU
> + * for an interrupt that is already released(non activated) instead of 0x3ff.
> + *
> + * To workaround this, return a valid interrupt ID only if there is a change
> + * in the active priority list after the IAR read.
> + *
>   * Cavium ThunderX erratum 23154
>   *
>   * The gicv3 of ThunderX requires a modified version for reading the
>   * IAR status to ensure data synchronization (access to icc_iar1_el1
>   * is not sync'ed before and after).
> + *
> + * Have merged both the workarounds into a common function since,
> + * 1. On Thunderx 88xx 1.x both erratas are applicable.

This is new. Are you saying that TX is also affected by this new bug?
Experience doesn't seem to support this statement, but I am prepared
to believe that this thing is even more broken than I thought.

Also, what is the story for virtualisation? Does the ICV interface
suffer from the same bug? I can't see why not (23154 definitely
affects virtualisation).

> + * 2. Having extra nops doesn't add any side effects for Silicons where
> + *    erratum 23154 is not applicable.
>   */
> -static inline u64 gic_read_iar_cavium_thunderx(void)
> +static inline u64 gic_read_iar_marvell_38545_23154(void)
>  {
> -	u64 irqstat;
> +	u64 irqstat, apr;
>  
> +	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
>  	nops(8);
>  	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
>  	nops(4);
> -	mb();
> +	dsb(sy);

mb() *is* a dsb(sy). Why the change?

> +	if (unlikely(apr == read_sysreg_s(SYS_ICC_AP1R0_EL1)))
> +		return 0x3ff;

So you are adding extra overhead to all TX platforms that did not
require it. Why is that an acceptable outcome? If all platforms
affected by 23154 are also affected by 38545, why are they treated
independently with separate flags?

>  
>  	return irqstat;
>  }
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index b217941713a8..79beb800ee79 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -423,14 +423,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
>  		ERRATA_MIDR_RANGE_LIST(erratum_845719_list),
>  	},
>  #endif
> -#ifdef CONFIG_CAVIUM_ERRATUM_23154
> -	{
> -	/* Cavium ThunderX, pass 1.x */
> -		.desc = "Cavium erratum 23154",
> -		.capability = ARM64_WORKAROUND_CAVIUM_23154,
> -		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
> -	},
> -#endif

NAK. See below.

>  #ifdef CONFIG_CAVIUM_ERRATUM_27456
>  	{
>  		.desc = "Cavium erratum 27456",
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index 9c65b1e25a96..3f751fe4fec4 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -62,7 +62,6 @@ WORKAROUND_2077057
>  WORKAROUND_TRBE_OVERWRITE_FILL_MODE
>  WORKAROUND_TSB_FLUSH_FAILURE
>  WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
> -WORKAROUND_CAVIUM_23154
>  WORKAROUND_CAVIUM_27456
>  WORKAROUND_CAVIUM_30115
>  WORKAROUND_CAVIUM_TX2_219_PRFM
> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> index 5e935d97207d..a3b58bf4fce4 100644
> --- a/drivers/irqchip/irq-gic-v3.c
> +++ b/drivers/irqchip/irq-gic-v3.c
> @@ -35,6 +35,8 @@
>  
>  #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996	(1ULL << 0)
>  #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539	(1ULL << 1)
> +#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154	(1ULL << 2)
> +#define FLAGS_WORKAROUND_MARVELL_ERRATUM_38545	(1ULL << 3)
>  
>  #define GIC_IRQ_TYPE_PARTITION	(GIC_IRQ_TYPE_LPI + 1)
>  
> @@ -60,6 +62,7 @@ struct gic_chip_data {
>  
>  static struct gic_chip_data gic_data __read_mostly;
>  static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
> +static DEFINE_STATIC_KEY_FALSE(gic_iar_quirk);
>  
>  #define GIC_ID_NR	(1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
>  #define GIC_LINE_NR	min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
> @@ -235,10 +238,19 @@ static void gic_redist_wait_for_rwp(void)
>  
>  #ifdef CONFIG_ARM64
>  
> +static u64 __maybe_unused gic_read_iar_fixup(void)
> +{
> +	if (gic_data.flags & FLAGS_WORKAROUND_MARVELL_ERRATUM_38545 ||
> +		gic_data.flags & FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154)
> +		return gic_read_iar_marvell_38545_23154();
> +	else /* Not possible */
> +		return ICC_IAR1_EL1_SPURIOUS;

Not possible? What does that even mean? And what is the point of
gating things with a static key if you have to check again with an
extra load?

> +}
> +
>  static u64 __maybe_unused gic_read_iar(void)
>  {
> -	if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154))
> -		return gic_read_iar_cavium_thunderx();
> +	if (static_branch_unlikely(&gic_iar_quirk))
> +		return gic_read_iar_fixup();

You are trading a static key for another one describing... the same
thing. What is the point?

>  	else
>  		return gic_read_iar_common();
>  }
> @@ -1614,6 +1626,16 @@ static bool gic_enable_quirk_msm8996(void *data)
>  	return true;
>  }
>  
> +static bool gic_enable_quirk_cavium_23154(void *data)
> +{
> +	struct gic_chip_data *d = data;
> +
> +	d->flags |= FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154;
> +	static_branch_enable(&gic_iar_quirk);
> +
> +	return true;
> +}
> +
>  static bool gic_enable_quirk_cavium_38539(void *data)
>  {
>  	struct gic_chip_data *d = data;
> @@ -1623,6 +1645,16 @@ static bool gic_enable_quirk_cavium_38539(void *data)
>  	return true;
>  }
>  
> +static bool gic_enable_quirk_marvell_38545(void *data)
> +{
> +	struct gic_chip_data *d = data;
> +
> +	d->flags |= FLAGS_WORKAROUND_MARVELL_ERRATUM_38545;
> +	static_branch_enable(&gic_iar_quirk);
> +
> +	return true;
> +}
> +
>  static bool gic_enable_quirk_hip06_07(void *data)
>  {
>  	struct gic_chip_data *d = data;
> @@ -1660,6 +1692,13 @@ static const struct gic_quirk gic_quirks[] = {
>  		.iidr	= 0x00000000,
>  		.mask	= 0xffffffff,
>  		.init	= gic_enable_quirk_hip06_07,
> +	},
> +		/* ThunderX: CN88xx 1.x */
> +	{
> +		.desc	= "GICv3: Cavium erratum 23154",
> +		.iidr	= 0xa101034c,
> +		.mask	= 0xffff0fff,
> +		.init	= gic_enable_quirk_cavium_23154,
>  	},
>  	{
>  		/*
> @@ -1674,6 +1713,19 @@ static const struct gic_quirk gic_quirks[] = {
>  		.mask	= 0xe8f00fff,
>  		.init	= gic_enable_quirk_cavium_38539,
>  	},
> +	{
> +		/*
> +		 * IAR register reads could be unreliable, under certain
> +		 * race conditions. This erratum applies to:
> +		 * - ThunderX: CN88xx
> +		 * - OCTEON TX: CN83xx, CN81xx
> +		 * - OCTEON TX2: CN93xx, CN96xx, CN98xx, CNF95xx*
> +		 */
> +		.desc	= "GICv3: Marvell erratum 38545",
> +		.iidr	= 0xa000034c,
> +		.mask	= 0xe0f00fff,
> +		.init	= gic_enable_quirk_marvell_38545,
> +	},

I love the fact that you are checking for a a *CPU interface* bug
based on the *distributor* IIDR. What could possibly go wrong? Oh,
don't worry, this only breaks... *all virtual machines*.

Puzzled,

	M.

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [EXT] Re: [PATCH] irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR
  2022-02-26 13:50 ` Marc Zyngier
@ 2022-03-01  7:13   ` Linu Cherian
  2022-03-01 15:10     ` Marc Zyngier
  0 siblings, 1 reply; 4+ messages in thread
From: Linu Cherian @ 2022-03-01  7:13 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: tglx, catalin.marinas, will, linux-kernel, linux-arm-kernel,
	linuc.decode

Hi Marc,

> -----Original Message-----
> From: Marc Zyngier <maz@kernel.org>
> Sent: Saturday, February 26, 2022 7:20 PM
> To: Linu Cherian <lcherian@marvell.com>
> Cc: tglx@linutronix.de; catalin.marinas@arm.com; will@kernel.org; linux-
> kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org;
> linuc.decode@gmail.com
> Subject: [EXT] Re: [PATCH] irqchip/gic-v3: Workaround Marvell erratum
> 38545 when reading IAR
> 
> External Email
> 
> ----------------------------------------------------------------------
> On Sat, 26 Feb 2022 12:33:32 +0000,
> Linu Cherian <lcherian@marvell.com> wrote:
> >
> > When a IAR register read races with a GIC interrupt RELEASE event,
> > GIC-CPU interface could wrongly return a valid INTID to the CPU for an
> > interrupt that is already released(non activated) instead of 0x3ff.
> >
> > As a side effect, an interrupt handler could run twice, once with
> > interrupt priority and then with idle priority.
> >
> > As a workaround, gic_read_iar is updated so that it will return a
> > valid interrupt ID only if there is a change in the active priority
> > list after the IAR read on all the affected Silicons.
> >
> > Along with this, Thunderx erratum 23154 is reworked to use GIC IIDR
> > based quirk management for the sake of consistency and also because
> > there is workaround overlap on some silicon variants.
> >
> > Signed-off-by: Linu Cherian <lcherian@marvell.com>
> > ---
> >  Documentation/arm64/silicon-errata.rst |  4 +-
> >  arch/arm64/Kconfig                     | 10 -----
> >  arch/arm64/include/asm/arch_gicv3.h    | 24 +++++++++--
> >  arch/arm64/kernel/cpu_errata.c         |  8 ----
> >  arch/arm64/tools/cpucaps               |  1 -
> >  drivers/irqchip/irq-gic-v3.c           | 56 +++++++++++++++++++++++++-
> >  6 files changed, 77 insertions(+), 26 deletions(-)
> >
> > diff --git a/Documentation/arm64/silicon-errata.rst
> > b/Documentation/arm64/silicon-errata.rst
> > index ea281dd75517..f602faf4bf82 100644
> > --- a/Documentation/arm64/silicon-errata.rst
> > +++ b/Documentation/arm64/silicon-errata.rst
> > @@ -136,10 +136,12 @@ stable kernels.
> >  +----------------+-----------------+-----------------+-----------------------------+
> >  | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144
> |
> >
> > +----------------+-----------------+-----------------+----------------
> > -------------+
> > -| Cavium         | ThunderX GICv3  | #23154          |
> CAVIUM_ERRATUM_23154        |
> > +| Cavium         | ThunderX GICv3  | #23154          | N/A                         |
> >  +----------------+-----------------+-----------------+-----------------------------+
> >  | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
> >
> > +----------------+-----------------+-----------------+----------------
> > -------------+
> > +| Cavium         | ThunderX GICv3  | #38545          | N/A                         |
> 
> Cavium? Or Marvell? I can understand the identity crisis, but you should pick
> your poison. It also seem to affect the new TX2 rather than (or in addition to)
> the ancient TX.

It doesn't applies to Tx2(ThunderX2) and it affects OcteonTx2, OcteonTx
and ThunderX.

In the V2 will rename this as OcteonTx2 to avoid confusion with ThunderX2.


> 
> > ++----------------+-----------------+-----------------+-----------------------------+
> >  | Cavium         | ThunderX Core   | #27456          |
> CAVIUM_ERRATUM_27456        |
> >  +----------------+-----------------+-----------------+-----------------------------+
> >  | Cavium         | ThunderX Core   | #30115          |
> CAVIUM_ERRATUM_30115        |
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index
> > 09b885cc4db5..889cb56bf5ec 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -890,16 +890,6 @@ config CAVIUM_ERRATUM_23144
> >
> >  	  If unsure, say Y.
> >
> > -config CAVIUM_ERRATUM_23154
> > -	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
> > -	default y
> > -	help
> > -	  The gicv3 of ThunderX requires a modified version for
> > -	  reading the IAR status to ensure data synchronization
> > -	  (access to icc_iar1_el1 is not sync'ed before and after).
> > -
> > -	  If unsure, say Y.
> > -
> 
> This is starting really badly. Why make this config option disappear?
> 
> This is both useful for documentation (in the absence of a public errata
> document from the silicon vendor, I *really* want to know what I am affected
> by) and for people who do not want their kernels to be encumbered by
> support for broken HW.
> 
> I actually want to see a description of the *new* errata in Kconfig, warts and
> all.


Ack. Will revert this.

Let me clarify the reason why it was removed. 
IIUC, there are two ways in which errata is managed in the GIC driver.
1. GICD_IIDR based quirk management
2. CPU MIDR based quirk management

Somehow I overlooked the virtualization scenario and missed the point that, 
GICD_IIDR doesn't reflect the actual Silicon implementer in a guest unlike CPU MIDR and 
hence wrongly assumed that the errata will take effect in both host and guest.
 
Opted for the IIDR method(option #1), since it was contained within the GIC driver.
Will change this to option #2 .

> 
> >  config CAVIUM_ERRATUM_27456
> >  	bool "Cavium erratum 27456: Broadcast TLBI instructions may cause
> icache corruption"
> >  	default y
> > diff --git a/arch/arm64/include/asm/arch_gicv3.h
> > b/arch/arm64/include/asm/arch_gicv3.h
> > index 4ad22c3135db..bc98a60a4bcb 100644
> > --- a/arch/arm64/include/asm/arch_gicv3.h
> > +++ b/arch/arm64/include/asm/arch_gicv3.h
> > @@ -47,21 +47,37 @@ static inline u64 gic_read_iar_common(void)
> >  	return irqstat;
> >  }
> >
> > -/*
> > +/* Marvell Erratum 38545
> > + *
> > + * When a IAR register read races with a GIC interrupt RELEASE event,
> > + * GIC-CPU interface could wrongly return a valid INTID to the CPU
> > + * for an interrupt that is already released(non activated) instead of 0x3ff.
> > + *
> > + * To workaround this, return a valid interrupt ID only if there is a
> > +change
> > + * in the active priority list after the IAR read.
> > + *
> >   * Cavium ThunderX erratum 23154
> >   *
> >   * The gicv3 of ThunderX requires a modified version for reading the
> >   * IAR status to ensure data synchronization (access to icc_iar1_el1
> >   * is not sync'ed before and after).
> > + *
> > + * Have merged both the workarounds into a common function since,
> > + * 1. On Thunderx 88xx 1.x both erratas are applicable.
> 
> This is new. Are you saying that TX is also affected by this new bug?
> Experience doesn't seem to support this statement, but I am prepared to
> believe that this thing is even more broken than I thought.
> 

Yes. Thunderx is affected by this new bug as well but got identified only while testing
 on a OcteonTx2 hardware. HW team has confirmed that it applies to the older
ThudnderX and OcteonTx Silicons as well.


> Also, what is the story for virtualisation? Does the ICV interface suffer from
> the same bug? I can't see why not (23154 definitely affects virtualisation).
> 

Yes. It affects virtualization as well and the errata is applicable to ICV interface.


> > + * 2. Having extra nops doesn't add any side effects for Silicons where
> > + *    erratum 23154 is not applicable.
> >   */
> > -static inline u64 gic_read_iar_cavium_thunderx(void)
> > +static inline u64 gic_read_iar_marvell_38545_23154(void)
> >  {
> > -	u64 irqstat;
> > +	u64 irqstat, apr;
> >
> > +	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
> >  	nops(8);
> >  	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
> >  	nops(4);
> > -	mb();
> > +	dsb(sy);
> 
> mb() *is* a dsb(sy). Why the change?

Agree the change is redundant.

> 
> > +	if (unlikely(apr == read_sysreg_s(SYS_ICC_AP1R0_EL1)))
> > +		return 0x3ff;
> 
> So you are adding extra overhead to all TX platforms that did not require it.
> Why is that an acceptable outcome? If all platforms affected by 23154 are also
> affected by 38545, why are they treated independently with separate flags?
> 

Basically we need two IAR workaround functions,

#1 That takes care of both  both 38545 and 23154 (For example ThunderX 88xx 1.x)

#2. That cares of only 38545 

Since the only difference between the two is  12 NOPs, tried to use a
a common function for both the workaround to avoid code duplication.

Yeah. Agree that separate flags were really not required.

Will keep #1 and #2 as separate functions if that is preferred.


> >
> >  	return irqstat;
> >  }
> > diff --git a/arch/arm64/kernel/cpu_errata.c
> > b/arch/arm64/kernel/cpu_errata.c index b217941713a8..79beb800ee79
> > 100644
> > --- a/arch/arm64/kernel/cpu_errata.c
> > +++ b/arch/arm64/kernel/cpu_errata.c
> > @@ -423,14 +423,6 @@ const struct arm64_cpu_capabilities
> arm64_errata[] = {
> >  		ERRATA_MIDR_RANGE_LIST(erratum_845719_list),
> >  	},
> >  #endif
> > -#ifdef CONFIG_CAVIUM_ERRATUM_23154
> > -	{
> > -	/* Cavium ThunderX, pass 1.x */
> > -		.desc = "Cavium erratum 23154",
> > -		.capability = ARM64_WORKAROUND_CAVIUM_23154,
> > -		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
> > -	},
> > -#endif
> 
> NAK. See below.
> 
> >  #ifdef CONFIG_CAVIUM_ERRATUM_27456
> >  	{
> >  		.desc = "Cavium erratum 27456",
> > diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index
> > 9c65b1e25a96..3f751fe4fec4 100644
> > --- a/arch/arm64/tools/cpucaps
> > +++ b/arch/arm64/tools/cpucaps
> > @@ -62,7 +62,6 @@ WORKAROUND_2077057
> >  WORKAROUND_TRBE_OVERWRITE_FILL_MODE
> >  WORKAROUND_TSB_FLUSH_FAILURE
> >  WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
> > -WORKAROUND_CAVIUM_23154
> >  WORKAROUND_CAVIUM_27456
> >  WORKAROUND_CAVIUM_30115
> >  WORKAROUND_CAVIUM_TX2_219_PRFM
> > diff --git a/drivers/irqchip/irq-gic-v3.c
> > b/drivers/irqchip/irq-gic-v3.c index 5e935d97207d..a3b58bf4fce4 100644
> > --- a/drivers/irqchip/irq-gic-v3.c
> > +++ b/drivers/irqchip/irq-gic-v3.c
> > @@ -35,6 +35,8 @@
> >
> >  #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996	(1ULL << 0)
> >  #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539	(1ULL << 1)
> > +#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154	(1ULL << 2)
> > +#define FLAGS_WORKAROUND_MARVELL_ERRATUM_38545	(1ULL
> << 3)
> >
> >  #define GIC_IRQ_TYPE_PARTITION	(GIC_IRQ_TYPE_LPI + 1)
> >
> > @@ -60,6 +62,7 @@ struct gic_chip_data {
> >
> >  static struct gic_chip_data gic_data __read_mostly;  static
> > DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
> > +static DEFINE_STATIC_KEY_FALSE(gic_iar_quirk);
> >
> >  #define GIC_ID_NR	(1U <<
> GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
> >  #define GIC_LINE_NR
> 	min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
> > @@ -235,10 +238,19 @@ static void gic_redist_wait_for_rwp(void)
> >
> >  #ifdef CONFIG_ARM64
> >
> > +static u64 __maybe_unused gic_read_iar_fixup(void) {
> > +	if (gic_data.flags &
> FLAGS_WORKAROUND_MARVELL_ERRATUM_38545 ||
> > +		gic_data.flags &
> FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154)
> > +		return gic_read_iar_marvell_38545_23154();
> > +	else /* Not possible */
> > +		return ICC_IAR1_EL1_SPURIOUS;
> 
> Not possible? What does that even mean? And what is the point of gating
> things with a static key if you have to check again with an extra load?
> 

Please see below.

> > +}
> > +
> >  static u64 __maybe_unused gic_read_iar(void)  {
> > -	if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154))
> > -		return gic_read_iar_cavium_thunderx();
> > +	if (static_branch_unlikely(&gic_iar_quirk))
> > +		return gic_read_iar_fixup();
> 
> You are trading a static key for another one describing... the same thing. What
> is the point?

The real intention was to avoid an additional if check getting introduced like below for Silicon
not affected by both of these errata.

If (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM23154))
	return gic_read_iar_38545_23154(); /* Both workaround applicable */
else if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM38545))
              return gic_read_iar_38545();  /* Only 38545 applicable */

So tried to use a static key, gic_iar_quirk instead.


> 
> >  	else
> >  		return gic_read_iar_common();
> >  }
> > @@ -1614,6 +1626,16 @@ static bool gic_enable_quirk_msm8996(void
> *data)
> >  	return true;
> >  }
> >
> > +static bool gic_enable_quirk_cavium_23154(void *data) {
> > +	struct gic_chip_data *d = data;
> > +
> > +	d->flags |= FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154;
> > +	static_branch_enable(&gic_iar_quirk);
> > +
> > +	return true;
> > +}
> > +
> >  static bool gic_enable_quirk_cavium_38539(void *data)  {
> >  	struct gic_chip_data *d = data;
> > @@ -1623,6 +1645,16 @@ static bool
> gic_enable_quirk_cavium_38539(void *data)
> >  	return true;
> >  }
> >
> > +static bool gic_enable_quirk_marvell_38545(void *data) {
> > +	struct gic_chip_data *d = data;
> > +
> > +	d->flags |= FLAGS_WORKAROUND_MARVELL_ERRATUM_38545;
> > +	static_branch_enable(&gic_iar_quirk);
> > +
> > +	return true;
> > +}
> > +
> >  static bool gic_enable_quirk_hip06_07(void *data)  {
> >  	struct gic_chip_data *d = data;
> > @@ -1660,6 +1692,13 @@ static const struct gic_quirk gic_quirks[] = {
> >  		.iidr	= 0x00000000,
> >  		.mask	= 0xffffffff,
> >  		.init	= gic_enable_quirk_hip06_07,
> > +	},
> > +		/* ThunderX: CN88xx 1.x */
> > +	{
> > +		.desc	= "GICv3: Cavium erratum 23154",
> > +		.iidr	= 0xa101034c,
> > +		.mask	= 0xffff0fff,
> > +		.init	= gic_enable_quirk_cavium_23154,
> >  	},
> >  	{
> >  		/*
> > @@ -1674,6 +1713,19 @@ static const struct gic_quirk gic_quirks[] = {
> >  		.mask	= 0xe8f00fff,
> >  		.init	= gic_enable_quirk_cavium_38539,
> >  	},
> > +	{
> > +		/*
> > +		 * IAR register reads could be unreliable, under certain
> > +		 * race conditions. This erratum applies to:
> > +		 * - ThunderX: CN88xx
> > +		 * - OCTEON TX: CN83xx, CN81xx
> > +		 * - OCTEON TX2: CN93xx, CN96xx, CN98xx, CNF95xx*z
> > +		 */
> > +		.desc	= "GICv3: Marvell erratum 38545",
> > +		.iidr	= 0xa000034c,
> > +		.mask	= 0xe0f00fff,
> > +		.init	= gic_enable_quirk_marvell_38545,
> > +	},
> 
> I love the fact that you are checking for a a *CPU interface* bug based on the
> *distributor* IIDR. What could possibly go wrong? Oh, don't worry, this only
> breaks... *all virtual machines*.
> 

Thanks for pointing out. Will change this to CPU MIDR based checks.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [EXT] Re: [PATCH] irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR
  2022-03-01  7:13   ` [EXT] " Linu Cherian
@ 2022-03-01 15:10     ` Marc Zyngier
  0 siblings, 0 replies; 4+ messages in thread
From: Marc Zyngier @ 2022-03-01 15:10 UTC (permalink / raw)
  To: Linu Cherian
  Cc: tglx, catalin.marinas, will, linux-kernel, linux-arm-kernel,
	linuc.decode

Linu,

On Tue, 01 Mar 2022 07:13:21 +0000,
Linu Cherian <lcherian@marvell.com> wrote:
> 
> Hi Marc,
> 
> > -----Original Message-----
> > From: Marc Zyngier <maz@kernel.org>
> > Sent: Saturday, February 26, 2022 7:20 PM
> > To: Linu Cherian <lcherian@marvell.com>
> > Cc: tglx@linutronix.de; catalin.marinas@arm.com; will@kernel.org; linux-
> > kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org;
> > linuc.decode@gmail.com
> > Subject: [EXT] Re: [PATCH] irqchip/gic-v3: Workaround Marvell erratum
> > 38545 when reading IAR
> > 
> > External Email
> > 
> > ----------------------------------------------------------------------
> > On Sat, 26 Feb 2022 12:33:32 +0000,
> > Linu Cherian <lcherian@marvell.com> wrote:
> > >
> > > When a IAR register read races with a GIC interrupt RELEASE event,
> > > GIC-CPU interface could wrongly return a valid INTID to the CPU for an
> > > interrupt that is already released(non activated) instead of 0x3ff.
> > >
> > > As a side effect, an interrupt handler could run twice, once with
> > > interrupt priority and then with idle priority.
> > >
> > > As a workaround, gic_read_iar is updated so that it will return a
> > > valid interrupt ID only if there is a change in the active priority
> > > list after the IAR read on all the affected Silicons.
> > >
> > > Along with this, Thunderx erratum 23154 is reworked to use GIC IIDR
> > > based quirk management for the sake of consistency and also because
> > > there is workaround overlap on some silicon variants.
> > >
> > > Signed-off-by: Linu Cherian <lcherian@marvell.com>
> > > ---
> > >  Documentation/arm64/silicon-errata.rst |  4 +-
> > >  arch/arm64/Kconfig                     | 10 -----
> > >  arch/arm64/include/asm/arch_gicv3.h    | 24 +++++++++--
> > >  arch/arm64/kernel/cpu_errata.c         |  8 ----
> > >  arch/arm64/tools/cpucaps               |  1 -
> > >  drivers/irqchip/irq-gic-v3.c           | 56 +++++++++++++++++++++++++-
> > >  6 files changed, 77 insertions(+), 26 deletions(-)
> > >
> > > diff --git a/Documentation/arm64/silicon-errata.rst
> > > b/Documentation/arm64/silicon-errata.rst
> > > index ea281dd75517..f602faf4bf82 100644
> > > --- a/Documentation/arm64/silicon-errata.rst
> > > +++ b/Documentation/arm64/silicon-errata.rst
> > > @@ -136,10 +136,12 @@ stable kernels.
> > >  +----------------+-----------------+-----------------+-----------------------------+
> > >  | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144
> > |
> > >
> > > +----------------+-----------------+-----------------+----------------
> > > -------------+
> > > -| Cavium         | ThunderX GICv3  | #23154          |
> > CAVIUM_ERRATUM_23154        |
> > > +| Cavium         | ThunderX GICv3  | #23154          | N/A                         |
> > >  +----------------+-----------------+-----------------+-----------------------------+
> > >  | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
> > >
> > > +----------------+-----------------+-----------------+----------------
> > > -------------+
> > > +| Cavium         | ThunderX GICv3  | #38545          | N/A                         |
> > 
> > Cavium? Or Marvell? I can understand the identity crisis, but you should pick
> > your poison. It also seem to affect the new TX2 rather than (or in addition to)
> > the ancient TX.
> 
> It doesn't applies to Tx2(ThunderX2) and it affects OcteonTx2, OcteonTx
> and ThunderX.
> 
> In the V2 will rename this as OcteonTx2 to avoid confusion with ThunderX2.
> 
> 
> > 
> > > ++----------------+-----------------+-----------------+-----------------------------+
> > >  | Cavium         | ThunderX Core   | #27456          |
> > CAVIUM_ERRATUM_27456        |
> > >  +----------------+-----------------+-----------------+-----------------------------+
> > >  | Cavium         | ThunderX Core   | #30115          |
> > CAVIUM_ERRATUM_30115        |
> > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index
> > > 09b885cc4db5..889cb56bf5ec 100644
> > > --- a/arch/arm64/Kconfig
> > > +++ b/arch/arm64/Kconfig
> > > @@ -890,16 +890,6 @@ config CAVIUM_ERRATUM_23144
> > >
> > >  	  If unsure, say Y.
> > >
> > > -config CAVIUM_ERRATUM_23154
> > > -	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
> > > -	default y
> > > -	help
> > > -	  The gicv3 of ThunderX requires a modified version for
> > > -	  reading the IAR status to ensure data synchronization
> > > -	  (access to icc_iar1_el1 is not sync'ed before and after).
> > > -
> > > -	  If unsure, say Y.
> > > -
> > 
> > This is starting really badly. Why make this config option disappear?
> > 
> > This is both useful for documentation (in the absence of a public errata
> > document from the silicon vendor, I *really* want to know what I am affected
> > by) and for people who do not want their kernels to be encumbered by
> > support for broken HW.
> > 
> > I actually want to see a description of the *new* errata in Kconfig, warts and
> > all.
> 
> 
> Ack. Will revert this.
> 
> Let me clarify the reason why it was removed. 
> IIUC, there are two ways in which errata is managed in the GIC driver.
> 1. GICD_IIDR based quirk management
> 2. CPU MIDR based quirk management
> 
> Somehow I overlooked the virtualization scenario and missed the point that, 
> GICD_IIDR doesn't reflect the actual Silicon implementer in a guest unlike CPU MIDR and 
> hence wrongly assumed that the errata will take effect in both host and guest.
>  
> Opted for the IIDR method(option #1), since it was contained within the GIC driver.
> Will change this to option #2 .
> 
> > 
> > >  config CAVIUM_ERRATUM_27456
> > >  	bool "Cavium erratum 27456: Broadcast TLBI instructions may cause
> > icache corruption"
> > >  	default y
> > > diff --git a/arch/arm64/include/asm/arch_gicv3.h
> > > b/arch/arm64/include/asm/arch_gicv3.h
> > > index 4ad22c3135db..bc98a60a4bcb 100644
> > > --- a/arch/arm64/include/asm/arch_gicv3.h
> > > +++ b/arch/arm64/include/asm/arch_gicv3.h
> > > @@ -47,21 +47,37 @@ static inline u64 gic_read_iar_common(void)
> > >  	return irqstat;
> > >  }
> > >
> > > -/*
> > > +/* Marvell Erratum 38545
> > > + *
> > > + * When a IAR register read races with a GIC interrupt RELEASE event,
> > > + * GIC-CPU interface could wrongly return a valid INTID to the CPU
> > > + * for an interrupt that is already released(non activated) instead of 0x3ff.
> > > + *
> > > + * To workaround this, return a valid interrupt ID only if there is a
> > > +change
> > > + * in the active priority list after the IAR read.
> > > + *
> > >   * Cavium ThunderX erratum 23154
> > >   *
> > >   * The gicv3 of ThunderX requires a modified version for reading the
> > >   * IAR status to ensure data synchronization (access to icc_iar1_el1
> > >   * is not sync'ed before and after).
> > > + *
> > > + * Have merged both the workarounds into a common function since,
> > > + * 1. On Thunderx 88xx 1.x both erratas are applicable.
> > 
> > This is new. Are you saying that TX is also affected by this new bug?
> > Experience doesn't seem to support this statement, but I am prepared to
> > believe that this thing is even more broken than I thought.
> > 
> 
> Yes. Thunderx is affected by this new bug as well but got identified
> only while testing on a OcteonTx2 hardware. HW team has confirmed
> that it applies to the older ThudnderX and OcteonTx Silicons as
> well.

Oh great. Copy paste strikes back... :-/

> 
> 
> > Also, what is the story for virtualisation? Does the ICV interface
> > suffer from the same bug? I can't see why not (23154 definitely
> > affects virtualisation).
> > 
> 
> Yes. It affects virtualization as well and the errata is applicable
> to ICV interface.

/me picks a spare TX SoC from the bin and frames it.

> 
> 
> > > + * 2. Having extra nops doesn't add any side effects for Silicons where
> > > + *    erratum 23154 is not applicable.
> > >   */
> > > -static inline u64 gic_read_iar_cavium_thunderx(void)
> > > +static inline u64 gic_read_iar_marvell_38545_23154(void)
> > >  {
> > > -	u64 irqstat;
> > > +	u64 irqstat, apr;
> > >
> > > +	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
> > >  	nops(8);
> > >  	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
> > >  	nops(4);
> > > -	mb();
> > > +	dsb(sy);
> > 
> > mb() *is* a dsb(sy). Why the change?
> 
> Agree the change is redundant.
> 
> > 
> > > +	if (unlikely(apr == read_sysreg_s(SYS_ICC_AP1R0_EL1)))
> > > +		return 0x3ff;
> > 
> > So you are adding extra overhead to all TX platforms that did not
> > require it.  Why is that an acceptable outcome? If all platforms
> > affected by 23154 are also affected by 38545, why are they treated
> > independently with separate flags?
> > 
> 
> Basically we need two IAR workaround functions,
> 
> #1 That takes care of both both 38545 and 23154 (For example
> #ThunderX 88xx 1.x)
> 
> #2. That cares of only 38545 
> 
> Since the only difference between the two is  12 NOPs, tried to use a
> a common function for both the workaround to avoid code duplication.
> 
> Yeah. Agree that separate flags were really not required.
> 
> Will keep #1 and #2 as separate functions if that is preferred.

Maybe not. Given that hardly anyone has access to OTX2, and that there
is a gazillion of TX in the wild, the single function looks like the
right thing to do, with a single static key for both of them.

You can simply extend the current 23154 workaround to also take care
of 38545 (documenting it whilst you're at it), and the patch should be
10 lines at most.

> 
> 
> > >
> > >  	return irqstat;
> > >  }
> > > diff --git a/arch/arm64/kernel/cpu_errata.c
> > > b/arch/arm64/kernel/cpu_errata.c index b217941713a8..79beb800ee79
> > > 100644
> > > --- a/arch/arm64/kernel/cpu_errata.c
> > > +++ b/arch/arm64/kernel/cpu_errata.c
> > > @@ -423,14 +423,6 @@ const struct arm64_cpu_capabilities
> > arm64_errata[] = {
> > >  		ERRATA_MIDR_RANGE_LIST(erratum_845719_list),
> > >  	},
> > >  #endif
> > > -#ifdef CONFIG_CAVIUM_ERRATUM_23154
> > > -	{
> > > -	/* Cavium ThunderX, pass 1.x */
> > > -		.desc = "Cavium erratum 23154",
> > > -		.capability = ARM64_WORKAROUND_CAVIUM_23154,
> > > -		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),

Expand this to an MIDR list instead of a single range in order to
handle OTX2 as well.

> > > -	},
> > > -#endif
> > 
> > NAK. See below.
> > 
> > >  #ifdef CONFIG_CAVIUM_ERRATUM_27456
> > >  	{
> > >  		.desc = "Cavium erratum 27456",
> > > diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index
> > > 9c65b1e25a96..3f751fe4fec4 100644
> > > --- a/arch/arm64/tools/cpucaps
> > > +++ b/arch/arm64/tools/cpucaps
> > > @@ -62,7 +62,6 @@ WORKAROUND_2077057
> > >  WORKAROUND_TRBE_OVERWRITE_FILL_MODE
> > >  WORKAROUND_TSB_FLUSH_FAILURE
> > >  WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
> > > -WORKAROUND_CAVIUM_23154
> > >  WORKAROUND_CAVIUM_27456
> > >  WORKAROUND_CAVIUM_30115
> > >  WORKAROUND_CAVIUM_TX2_219_PRFM
> > > diff --git a/drivers/irqchip/irq-gic-v3.c
> > > b/drivers/irqchip/irq-gic-v3.c index 5e935d97207d..a3b58bf4fce4 100644
> > > --- a/drivers/irqchip/irq-gic-v3.c
> > > +++ b/drivers/irqchip/irq-gic-v3.c
> > > @@ -35,6 +35,8 @@
> > >
> > >  #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996	(1ULL << 0)
> > >  #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539	(1ULL << 1)
> > > +#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154	(1ULL << 2)
> > > +#define FLAGS_WORKAROUND_MARVELL_ERRATUM_38545	(1ULL
> > << 3)
> > >
> > >  #define GIC_IRQ_TYPE_PARTITION	(GIC_IRQ_TYPE_LPI + 1)
> > >
> > > @@ -60,6 +62,7 @@ struct gic_chip_data {
> > >
> > >  static struct gic_chip_data gic_data __read_mostly;  static
> > > DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
> > > +static DEFINE_STATIC_KEY_FALSE(gic_iar_quirk);
> > >
> > >  #define GIC_ID_NR	(1U <<
> > GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
> > >  #define GIC_LINE_NR
> > 	min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
> > > @@ -235,10 +238,19 @@ static void gic_redist_wait_for_rwp(void)
> > >
> > >  #ifdef CONFIG_ARM64
> > >
> > > +static u64 __maybe_unused gic_read_iar_fixup(void) {
> > > +	if (gic_data.flags &
> > FLAGS_WORKAROUND_MARVELL_ERRATUM_38545 ||
> > > +		gic_data.flags &
> > FLAGS_WORKAROUND_CAVIUM_ERRATUM_23154)
> > > +		return gic_read_iar_marvell_38545_23154();
> > > +	else /* Not possible */
> > > +		return ICC_IAR1_EL1_SPURIOUS;
> > 
> > Not possible? What does that even mean? And what is the point of gating
> > things with a static key if you have to check again with an extra load?
> > 
> 
> Please see below.
> 
> > > +}
> > > +
> > >  static u64 __maybe_unused gic_read_iar(void)  {
> > > -	if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154))
> > > -		return gic_read_iar_cavium_thunderx();
> > > +	if (static_branch_unlikely(&gic_iar_quirk))
> > > +		return gic_read_iar_fixup();
> > 
> > You are trading a static key for another one describing... the same thing. What
> > is the point?
> 
> The real intention was to avoid an additional if check getting
> introduced like below for Silicon not affected by both of these
> errata.
> 
> If (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM23154))
> 	return gic_read_iar_38545_23154(); /* Both workaround applicable */
> else if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM38545))
>               return gic_read_iar_38545();  /* Only 38545 applicable */
> 
> So tried to use a static key, gic_iar_quirk instead.

Nah, just key it against ARM64_WORKAROUND_CAVIUM_23154. As you pointed
out, NOPs are cheap. That will be  an incentive for someone to debug
this HW.

To sum it up, stash the actual workaround in the existing function,
and the rest as below (obviously it doesn't compile).

Thanks,

	M.

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index ea281dd75517..466cb9e89047 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -136,7 +136,7 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
 +----------------+-----------------+-----------------+-----------------------------+
-| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
+| Cavium         | ThunderX GICv3  | #23154,38545    | CAVIUM_ERRATUM_23154        |
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index cbcd42decb2a..049ca0cc0525 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -890,12 +890,17 @@ config CAVIUM_ERRATUM_23144
 	  If unsure, say Y.
 
 config CAVIUM_ERRATUM_23154
-	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
+	bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation"
 	default y
 	help
-	  The gicv3 of ThunderX requires a modified version for
+	  The ThunderX GICv3 implementation requires a modified version for
 	  reading the IAR status to ensure data synchronization
-	  (access to icc_iar1_el1 is not sync'ed before and after).
+	  (access to icc_iar1_el1 is not sync'ed before and after, erratum
+	  23154).
+
+	  It also suffers from erratum 38545 (also present on Marvell's
+	  OcteonTX2), resulting in deactivated interrupts being spuriously
+	  presented to the CPU interface.
 
 	  If unsure, say Y.
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b217941713a8..553a72b517af 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -214,6 +214,16 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
 };
 #endif
 
+#ifdef CONFIG_CAVIUM_ERRATUM_23154
+const struct midr_range cavium_erratum_23154_cpus[] = {
+	/* Cavium ThunderX, pass 1.x */
+	MIDR_RANGE(MIDR_THUNDERX, 0, 0, 0, 1),
+	/* OcteonTX2 */
+	MIDR_ALL_VERSIONS(MIDR_MRVL_OCTEONTX2),
+	{},
+};
+#endif
+
 #ifdef CONFIG_CAVIUM_ERRATUM_27456
 const struct midr_range cavium_erratum_27456_cpus[] = {
 	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
@@ -425,10 +435,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_23154
 	{
-	/* Cavium ThunderX, pass 1.x */
-		.desc = "Cavium erratum 23154",
+		.desc = "Cavium errata 23154 and 38545",
 		.capability = ARM64_WORKAROUND_CAVIUM_23154,
-		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
+		ERRATA_MIDR_RANGE_LIST(cavium_erratum_23154_cpus),
 	},
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_27456

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-03-01 15:10 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-26 12:33 [PATCH] irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR Linu Cherian
2022-02-26 13:50 ` Marc Zyngier
2022-03-01  7:13   ` [EXT] " Linu Cherian
2022-03-01 15:10     ` Marc Zyngier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).