All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ARM: mm: Add workaround for erratum 763126
@ 2014-05-08  5:55 Arjun.K.V
  2014-05-08 16:57 ` Doug Anderson
  0 siblings, 1 reply; 7+ messages in thread
From: Arjun.K.V @ 2014-05-08  5:55 UTC (permalink / raw)
  To: linux-arm-kernel

From: "Arjun.K.V" <arjun.kv@samsung.com>

Three processor exclusive access livelock.
In a system with three or more coherent masters that all use
the ldrex/strex synchronization primitives to access a semaphore
in coherent cacheable memory, there is a possibility of a
livelock condition where two masters continuously attempt
and fail to get the lock while the third master
continuously reads the lock.

Workaround is to set the "Snoop-delayed exclusive handling"
bit in the Auxiliary Control Register, ACTLR[31] to 1.
This hardware is installed in each processor to detect
that the load/store exclusive livelock scenario may be occurring
and delay snoops for a period of time to allow the load
exclusive/store exclusive loop to complete and make
forward progress.

Change-Id: Idcf066e25ea6571a0f5da6f3a770318c1a9d6fff
Signed-off-by: Arjun.K.V <arjun.kv@samsung.com>
---
 arch/arm/Kconfig      |   13 +++++++++++++
 arch/arm/mm/proc-v7.S |   43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index db3c541..80b8562 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1464,6 +1464,19 @@ source "drivers/pci/pcie/Kconfig"
 
 source "drivers/pcmcia/Kconfig"
 
+config ARM_ERRATA_763126
+	bool "ARM errata: Three processor exclusive access livelock"
+	depends on CPU_V7 && SMP
+	help
+	  This enables the workaround got ARM Erratum 763126.
+	  In a system with three or more coherent masters that all use the
+	  ldrex/strex synchronization primitives to access a semaphore in
+	  coherent cacheable memory, there is a possibility of a livelock
+	  condition where two masters continuously attempt and fail to get
+	  the lock while the third master continuously reads the lock.
+	  Workaround is to set the "Snoop-delayed exclusive handling" bit
+	  in the Auxiliary Control Register, ACTLR[31] to 1.
+
 endmenu
 
 menu "Kernel Features"
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 195731d..e6d22c7 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -136,6 +136,29 @@ ENTRY(cpu_v7_do_resume)
 	mcr	p15, 0, r7, c2, c0, 1	@ TTB 1
 #endif
 	mcr	p15, 0, r11, c2, c0, 2	@ TTB control register
+
+#ifdef CONFIG_ARM_ERRATA_763126
+	mrc	p15, 0, r0, c0, c0, 0	@ Check if we are A15s
+	ubfx	r1, r0, #4, #12
+	ldr	r2, =0x00000c0f
+	teq	r1, r2
+	bne	6f
+	and	r1, r0, #0x00f00000
+	and	r2, r0, #0x0000000f
+	orr	r2, r2, r1, lsr #20-4
+	cmp	r2, #0x20		@ Is the revision < r2p0
+	blt	6f			@ If so, skip
+	mrc	p15, 1, r0, c9, c0, 2	@ read L2 Control register
+	and	r1, r0, #0x03000000
+	cmp	r1, #0x02000000		@ less than 3 A15 cores?
+	blt	6f			@ if yes, erratum doesnt apply
+	/* Read and set auxiliary register */
+	mrc	p15, 0, r0, c1, c0, 1	@ Read Auxiliary control register
+	orr	r0, r0, #(0x1 << 31)	@ Set ACTLR[31] bit
+	mcr	p15, 0, r0, c1, c0, 1	@ Write to Auxiliary control register
+
+6:
+#endif
 	ldr	r4, =PRRR		@ PRRR
 	ldr	r5, =NMRR		@ NMRR
 	mcr	p15, 0, r4, c10, c2, 0	@ write PRRR
@@ -350,6 +373,26 @@ __v7_setup:
 	mcrle	p15, 0, r10, c1, c0, 1		@ write aux control register
 #endif
 
+#ifdef CONFIG_ARM_ERRATA_763126
+	/*
+	 * Add workaround for 763126, by setting the ACTLR[31] = 1.
+	 * This bit enables Snoop-delayed exclusive handling feature,
+	 * which delay snoops for a period of time to allow the
+	 * load exclusive/store exclusive loop to complete
+	 * and make forward progress.
+	 * The resume path setting is taken care in cpu_v7_do_resume
+	 */
+	cmp	r6, #0x20			@ present from r2p0 onwards
+	blt	5f
+	mrc	p15, 1, r0, c9, c0, 2		@ read L2 Control register
+	and	r1, r0, #0x03000000
+	cmp	r1, #0x02000000			@ less than 3 A15 cores?
+	blt	5f				@ if yes, erratum doesnt apply
+	mrc	p15, 0, r5, c1, c0, 1		@ read Auxiliary Control register
+	orr	r5, r5, #(0x1 << 31)
+	mcr	p15, 0, r5, c1, c0, 1		@ set Auxiliary Control register
+5:
+#endif
 4:	mov	r10, #0
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
 #ifdef CONFIG_MMU
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH] ARM: mm: Add workaround for erratum 763126
  2014-05-08  5:55 [PATCH] ARM: mm: Add workaround for erratum 763126 Arjun.K.V
@ 2014-05-08 16:57 ` Doug Anderson
  2014-05-08 17:38   ` Will Deacon
  0 siblings, 1 reply; 7+ messages in thread
From: Doug Anderson @ 2014-05-08 16:57 UTC (permalink / raw)
  To: linux-arm-kernel

Arjun,

Thanks for sending this.

On Wed, May 7, 2014 at 10:55 PM, Arjun.K.V <arjun.kv@samsung.com> wrote:
> From: "Arjun.K.V" <arjun.kv@samsung.com>
>
> Three processor exclusive access livelock.
> In a system with three or more coherent masters that all use
> the ldrex/strex synchronization primitives to access a semaphore
> in coherent cacheable memory, there is a possibility of a
> livelock condition where two masters continuously attempt
> and fail to get the lock while the third master
> continuously reads the lock.
>
> Workaround is to set the "Snoop-delayed exclusive handling"
> bit in the Auxiliary Control Register, ACTLR[31] to 1.
> This hardware is installed in each processor to detect
> that the load/store exclusive livelock scenario may be occurring
> and delay snoops for a period of time to allow the load
> exclusive/store exclusive loop to complete and make
> forward progress.
>
> Change-Id: Idcf066e25ea6571a0f5da6f3a770318c1a9d6fff
> Signed-off-by: Arjun.K.V <arjun.kv@samsung.com>
> ---
>  arch/arm/Kconfig      |   13 +++++++++++++
>  arch/arm/mm/proc-v7.S |   43 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 56 insertions(+)
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index db3c541..80b8562 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -1464,6 +1464,19 @@ source "drivers/pci/pcie/Kconfig"
>
>  source "drivers/pcmcia/Kconfig"
>
> +config ARM_ERRATA_763126
> +       bool "ARM errata: Three processor exclusive access livelock"
> +       depends on CPU_V7 && SMP
> +       help
> +         This enables the workaround got ARM Erratum 763126.
> +         In a system with three or more coherent masters that all use the
> +         ldrex/strex synchronization primitives to access a semaphore in
> +         coherent cacheable memory, there is a possibility of a livelock
> +         condition where two masters continuously attempt and fail to get
> +         the lock while the third master continuously reads the lock.
> +         Workaround is to set the "Snoop-delayed exclusive handling" bit
> +         in the Auxiliary Control Register, ACTLR[31] to 1.
> +
>  endmenu
>
>  menu "Kernel Features"
> diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
> index 195731d..e6d22c7 100644
> --- a/arch/arm/mm/proc-v7.S
> +++ b/arch/arm/mm/proc-v7.S
> @@ -136,6 +136,29 @@ ENTRY(cpu_v7_do_resume)
>         mcr     p15, 0, r7, c2, c0, 1   @ TTB 1
>  #endif
>         mcr     p15, 0, r11, c2, c0, 2  @ TTB control register
> +
> +#ifdef CONFIG_ARM_ERRATA_763126
> +       mrc     p15, 0, r0, c0, c0, 0   @ Check if we are A15s
> +       ubfx    r1, r0, #4, #12
> +       ldr     r2, =0x00000c0f
> +       teq     r1, r2
> +       bne     6f
> +       and     r1, r0, #0x00f00000
> +       and     r2, r0, #0x0000000f
> +       orr     r2, r2, r1, lsr #20-4
> +       cmp     r2, #0x20               @ Is the revision < r2p0

As I mentioned locally, I think this affects all A15s equally.
Specifically, I see that:

* this affects r0p4
* this affects r2p0, r2p1, r2p2, r2p3, r2p4
* this affects r3p0, r3p1, r3p2, r3p3
* this affects r4p0

Just get rid of the revision test and assume that on all A15s (w/ 3 or
more processors) you need this.

-Doug

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] ARM: mm: Add workaround for erratum 763126
  2014-05-08 16:57 ` Doug Anderson
@ 2014-05-08 17:38   ` Will Deacon
  2014-05-08 17:57     ` Doug Anderson
  0 siblings, 1 reply; 7+ messages in thread
From: Will Deacon @ 2014-05-08 17:38 UTC (permalink / raw)
  To: linux-arm-kernel

Hi guys,

On Thu, May 08, 2014 at 05:57:03PM +0100, Doug Anderson wrote:
> On Wed, May 7, 2014 at 10:55 PM, Arjun.K.V <arjun.kv@samsung.com> wrote:
> > From: "Arjun.K.V" <arjun.kv@samsung.com>
> >
> > Three processor exclusive access livelock.
> > In a system with three or more coherent masters that all use
> > the ldrex/strex synchronization primitives to access a semaphore
> > in coherent cacheable memory, there is a possibility of a
> > livelock condition where two masters continuously attempt
> > and fail to get the lock while the third master
> > continuously reads the lock.

Tentative NAK. You're paraphrasing the bug to make it sound worse than it
is -- whilst two of the cores need to be in a ldrex/strex loop, the third
needs to be issuing snoops from a normal load (to get the line into a shared
state). Please can you point me at the code in Linux which is triggering
this issue?

Cheers,

Will

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] ARM: mm: Add workaround for erratum 763126
  2014-05-08 17:38   ` Will Deacon
@ 2014-05-08 17:57     ` Doug Anderson
  2014-05-08 18:05       ` Will Deacon
  0 siblings, 1 reply; 7+ messages in thread
From: Doug Anderson @ 2014-05-08 17:57 UTC (permalink / raw)
  To: linux-arm-kernel

Will,

On Thu, May 8, 2014 at 10:38 AM, Will Deacon <will.deacon@arm.com> wrote:
> Hi guys,
>
> On Thu, May 08, 2014 at 05:57:03PM +0100, Doug Anderson wrote:
>> On Wed, May 7, 2014 at 10:55 PM, Arjun.K.V <arjun.kv@samsung.com> wrote:
>> > From: "Arjun.K.V" <arjun.kv@samsung.com>
>> >
>> > Three processor exclusive access livelock.
>> > In a system with three or more coherent masters that all use
>> > the ldrex/strex synchronization primitives to access a semaphore
>> > in coherent cacheable memory, there is a possibility of a
>> > livelock condition where two masters continuously attempt
>> > and fail to get the lock while the third master
>> > continuously reads the lock.
>
> Tentative NAK. You're paraphrasing the bug to make it sound worse than it
> is -- whilst two of the cores need to be in a ldrex/strex loop, the third
> needs to be issuing snoops from a normal load (to get the line into a shared
> state). Please can you point me at the code in Linux which is triggering
> this issue?

Arjun can correct me if I'm wrong, but I don't think there is any
known way to trigger the issue.  We haven't landed this fix locally in
our tree because there has been no evidence showing that it increases
stability.  If you guys say that it's a correct fix then we'll still
land it, but if (as you say) it's impossible to trigger in Linux then
it's a good reason to skip it!  :)

-Doug

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] ARM: mm: Add workaround for erratum 763126
  2014-05-08 17:57     ` Doug Anderson
@ 2014-05-08 18:05       ` Will Deacon
       [not found]         ` <CAFY15YHPSLMh6iA1PQHjsneL_Kxarc0xL5uJ7_px71cbmJbrgQ@mail.gmail.com>
  0 siblings, 1 reply; 7+ messages in thread
From: Will Deacon @ 2014-05-08 18:05 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, May 08, 2014 at 06:57:10PM +0100, Doug Anderson wrote:
> On Thu, May 8, 2014 at 10:38 AM, Will Deacon <will.deacon@arm.com> wrote:
> > On Thu, May 08, 2014 at 05:57:03PM +0100, Doug Anderson wrote:
> >> On Wed, May 7, 2014 at 10:55 PM, Arjun.K.V <arjun.kv@samsung.com> wrote:
> >> > From: "Arjun.K.V" <arjun.kv@samsung.com>
> >> >
> >> > Three processor exclusive access livelock.
> >> > In a system with three or more coherent masters that all use
> >> > the ldrex/strex synchronization primitives to access a semaphore
> >> > in coherent cacheable memory, there is a possibility of a
> >> > livelock condition where two masters continuously attempt
> >> > and fail to get the lock while the third master
> >> > continuously reads the lock.
> >
> > Tentative NAK. You're paraphrasing the bug to make it sound worse than it
> > is -- whilst two of the cores need to be in a ldrex/strex loop, the third
> > needs to be issuing snoops from a normal load (to get the line into a shared
> > state). Please can you point me at the code in Linux which is triggering
> > this issue?
> 
> Arjun can correct me if I'm wrong, but I don't think there is any
> known way to trigger the issue.  We haven't landed this fix locally in
> our tree because there has been no evidence showing that it increases
> stability.  If you guys say that it's a correct fix then we'll still
> land it, but if (as you say) it's impossible to trigger in Linux then
> it's a good reason to skip it!  :)

We basically don't think that any sane code would hit it. Now, there could
be insane code hanging out in userspace, so I'm prepared to believe that
this could lead to performance issues there but interrupts would fix the
lockup so it's not the end of the world.

Will

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] ARM: mm: Add workaround for erratum 763126
       [not found]         ` <CAFY15YHPSLMh6iA1PQHjsneL_Kxarc0xL5uJ7_px71cbmJbrgQ@mail.gmail.com>
@ 2014-05-09 14:44           ` Catalin Marinas
  2014-05-12 16:36           ` Doug Anderson
  1 sibling, 0 replies; 7+ messages in thread
From: Catalin Marinas @ 2014-05-09 14:44 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, May 09, 2014 at 02:56:02PM +0100, arjun kv wrote:
> As Doug already mentioned, we have not observed an issue which could have been
> triggered by this erratum.
> Nor do we have a method to actually trigger the livelock.
> But if it improves the performance and has no real side effects, we could?at
> least?land this in our local tree?

It's up to you, but do some benchmarks first.

ACTLR bits that improve performance on specific SoCs should really be
set by the firmware before booting the kernel. There is no way for the
kernel to know what works best for your system, especially when we deal
with implementation-defined registers.

-- 
Catalin

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] ARM: mm: Add workaround for erratum 763126
       [not found]         ` <CAFY15YHPSLMh6iA1PQHjsneL_Kxarc0xL5uJ7_px71cbmJbrgQ@mail.gmail.com>
  2014-05-09 14:44           ` Catalin Marinas
@ 2014-05-12 16:36           ` Doug Anderson
  1 sibling, 0 replies; 7+ messages in thread
From: Doug Anderson @ 2014-05-12 16:36 UTC (permalink / raw)
  To: linux-arm-kernel

Arjun,

On Fri, May 9, 2014 at 6:56 AM, arjun kv <kvarjun@gmail.com> wrote:
> Hi All,
>
>
> As Doug already mentioned, we have not observed an issue which could have
> been triggered by this erratum.
> Nor do we have a method to actually trigger the livelock.
> But if it improves the performance and has no real side effects, we could at
> least land this in our local tree?

Right, this is not a discussion for upstream anymore.  If Will and
Catalin believe that this errata will not actually hit and that the
worst case is a performance problem then not applying it makes sense.

We can continue the discussion in
<https://chromium-review.googlesource.com/#/c/169732/>, but given what
I see here I'd even vote that we don't need to apply this locally.


Thanks to all for the review!

-Doug

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-05-12 16:36 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-08  5:55 [PATCH] ARM: mm: Add workaround for erratum 763126 Arjun.K.V
2014-05-08 16:57 ` Doug Anderson
2014-05-08 17:38   ` Will Deacon
2014-05-08 17:57     ` Doug Anderson
2014-05-08 18:05       ` Will Deacon
     [not found]         ` <CAFY15YHPSLMh6iA1PQHjsneL_Kxarc0xL5uJ7_px71cbmJbrgQ@mail.gmail.com>
2014-05-09 14:44           ` Catalin Marinas
2014-05-12 16:36           ` Doug Anderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.