* [PATCH] powerpc: add compile-time support for lbarx, lwarx
@ 2020-11-07 3:23 Nicholas Piggin
2020-11-07 7:12 ` Gabriel Paubert
2020-11-07 8:15 ` Christophe Leroy
0 siblings, 2 replies; 7+ messages in thread
From: Nicholas Piggin @ 2020-11-07 3:23 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
Add a compile option that allows code to use it, and add support in
cmpxchg and xchg 8 and 16 bit values.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 3 +
arch/powerpc/include/asm/cmpxchg.h | 236 ++++++++++++++++++++++++-
arch/powerpc/platforms/Kconfig.cputype | 5 +
3 files changed, 243 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..d231af06f75a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
default y
depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
+config PPC_LBARX_LWARX
+ bool
+
config EARLY_PRINTK
bool
default y
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..17fd996dc0d4 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \
* the previous value stored there.
*/
+#ifndef CONFIG_PPC_LBARX_LWARX
XCHG_GEN(u8, _local, "memory");
XCHG_GEN(u8, _relaxed, "cc");
XCHG_GEN(u16, _local, "memory");
XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lbarx %0,0,%2 \n"
+" stbcx. %3,0,%2 \n\
+ bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lbarx %0,0,%2\n"
+" stbcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (val)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lharx %0,0,%2 \n"
+" sthcx. %3,0,%2 \n\
+ bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lharx %0,0,%2\n"
+" sthcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (val)
+ : "cc");
+
+ return prev;
+}
+#endif
static __always_inline unsigned long
__xchg_u32_local(volatile void *p, unsigned long val)
@@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
(__typeof__(*(ptr))) __xchg_relaxed((ptr), \
(unsigned long)_x_, sizeof(*(ptr))); \
})
+
/*
* Compare and exchange - if *p == old, set it to new,
* and return the old value of *p.
*/
-
+#ifndef CONFIG_PPC_LBARX_LWARX
CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
CMPXCHG_GEN(u8, _local, , , "memory");
CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
@@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
CMPXCHG_GEN(u16, _local, , , "memory");
CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+ PPC_ATOMIC_ENTRY_BARRIER
+"1: lbarx %0,0,%2 # __cmpxchg_u8\n\
+ cmpw 0,%0,%3\n\
+ bne- 2f\n"
+" stbcx. %4,0,%2\n\
+ bne- 1b"
+ PPC_ATOMIC_EXIT_BARRIER
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+"1: lbarx %0,0,%2 # __cmpxchg_u8\n\
+ cmpw 0,%0,%3\n\
+ bne- 2f\n"
+" stbcx. %4,0,%2\n\
+ bne- 1b"
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lbarx %0,0,%2 # __cmpxchg_u8_relaxed\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lbarx %0,0,%2 # __cmpxchg_u8_acquire\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+ PPC_ATOMIC_ENTRY_BARRIER
+"1: lharx %0,0,%2 # __cmpxchg_u16\n\
+ cmpw 0,%0,%3\n\
+ bne- 2f\n"
+" sthcx. %4,0,%2\n\
+ bne- 1b"
+ PPC_ATOMIC_EXIT_BARRIER
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+"1: lharx %0,0,%2 # __cmpxchg_u16\n\
+ cmpw 0,%0,%3\n\
+ bne- 2f\n"
+" sthcx. %4,0,%2\n\
+ bne- 1b"
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lharx %0,0,%2 # __cmpxchg_u16_relaxed\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lharx %0,0,%2 # __cmpxchg_u16_acquire\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+#endif
static __always_inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index c194c4ae8bc7..2f8c8d61dba4 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -118,6 +118,7 @@ config GENERIC_CPU
bool "Generic (POWER8 and above)"
depends on PPC64 && CPU_LITTLE_ENDIAN
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_LBARX_LWARX
config GENERIC_CPU
bool "Generic 32 bits powerpc"
@@ -139,16 +140,19 @@ config POWER7_CPU
bool "POWER7"
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_LBARX_LWARX
config POWER8_CPU
bool "POWER8"
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_LBARX_LWARX
config POWER9_CPU
bool "POWER9"
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_LBARX_LWARX
config E5500_CPU
bool "Freescale e5500"
@@ -157,6 +161,7 @@ config E5500_CPU
config E6500_CPU
bool "Freescale e6500"
depends on E500
+ select PPC_LBARX_LWARX
config 860_CPU
bool "8xx family"
--
2.23.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
2020-11-07 3:23 [PATCH] powerpc: add compile-time support for lbarx, lwarx Nicholas Piggin
@ 2020-11-07 7:12 ` Gabriel Paubert
2020-11-07 11:42 ` Segher Boessenkool
[not found] ` <0810564117125.202011.20201107114257.GG2672@gate.crashing.org>
2020-11-07 8:15 ` Christophe Leroy
1 sibling, 2 replies; 7+ messages in thread
From: Gabriel Paubert @ 2020-11-07 7:12 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: linuxppc-dev
On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
Hmm, lwarx exists since original Power AFAIR, s/lwarx/lharx/ perhaps?
Same for the title of the patch and the CONFIG variable.
Gabriel
> Add a compile option that allows code to use it, and add support in
> cmpxchg and xchg 8 and 16 bit values.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/Kconfig | 3 +
> arch/powerpc/include/asm/cmpxchg.h | 236 ++++++++++++++++++++++++-
> arch/powerpc/platforms/Kconfig.cputype | 5 +
> 3 files changed, 243 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index e9f13fe08492..d231af06f75a 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
> default y
> depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>
> +config PPC_LBARX_LWARX
> + bool
> +
> config EARLY_PRINTK
> bool
> default y
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..17fd996dc0d4 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \
> * the previous value stored there.
> */
>
> +#ifndef CONFIG_PPC_LBARX_LWARX
> XCHG_GEN(u8, _local, "memory");
> XCHG_GEN(u8, _relaxed, "cc");
> XCHG_GEN(u16, _local, "memory");
> XCHG_GEN(u16, _relaxed, "cc");
> +#else
> +static __always_inline unsigned long
> +__xchg_u8_local(volatile void *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1: lbarx %0,0,%2 \n"
> +" stbcx. %3,0,%2 \n\
> + bne- 1b"
> + : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
> + : "r" (p), "r" (val)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u8_relaxed(u8 *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1: lbarx %0,0,%2\n"
> +" stbcx. %3,0,%2\n"
> +" bne- 1b"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (val)
> + : "cc");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_local(volatile void *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1: lharx %0,0,%2 \n"
> +" sthcx. %3,0,%2 \n\
> + bne- 1b"
> + : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
> + : "r" (p), "r" (val)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_relaxed(u16 *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1: lharx %0,0,%2\n"
> +" sthcx. %3,0,%2\n"
> +" bne- 1b"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (val)
> + : "cc");
> +
> + return prev;
> +}
> +#endif
>
> static __always_inline unsigned long
> __xchg_u32_local(volatile void *p, unsigned long val)
> @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
> (__typeof__(*(ptr))) __xchg_relaxed((ptr), \
> (unsigned long)_x_, sizeof(*(ptr))); \
> })
> +
> /*
> * Compare and exchange - if *p == old, set it to new,
> * and return the old value of *p.
> */
> -
> +#ifndef CONFIG_PPC_LBARX_LWARX
> CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
> CMPXCHG_GEN(u8, _local, , , "memory");
> CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
> CMPXCHG_GEN(u16, _local, , , "memory");
> CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> CMPXCHG_GEN(u16, _relaxed, , , "cc");
> +#else
> +static __always_inline unsigned long
> +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> + PPC_ATOMIC_ENTRY_BARRIER
> +"1: lbarx %0,0,%2 # __cmpxchg_u8\n\
> + cmpw 0,%0,%3\n\
> + bne- 2f\n"
> +" stbcx. %4,0,%2\n\
> + bne- 1b"
> + PPC_ATOMIC_EXIT_BARRIER
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
> + unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> +"1: lbarx %0,0,%2 # __cmpxchg_u8\n\
> + cmpw 0,%0,%3\n\
> + bne- 2f\n"
> +" stbcx. %4,0,%2\n\
> + bne- 1b"
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__ (
> +"1: lbarx %0,0,%2 # __cmpxchg_u8_relaxed\n"
> +" cmpw 0,%0,%3\n"
> +" bne- 2f\n"
> +" stbcx. %4,0,%2\n"
> +" bne- 1b\n"
> +"2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__ (
> +"1: lbarx %0,0,%2 # __cmpxchg_u8_acquire\n"
> +" cmpw 0,%0,%3\n"
> +" bne- 2f\n"
> +" stbcx. %4,0,%2\n"
> +" bne- 1b\n"
> + PPC_ACQUIRE_BARRIER
> + "\n"
> +"2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> + PPC_ATOMIC_ENTRY_BARRIER
> +"1: lharx %0,0,%2 # __cmpxchg_u16\n\
> + cmpw 0,%0,%3\n\
> + bne- 2f\n"
> +" sthcx. %4,0,%2\n\
> + bne- 1b"
> + PPC_ATOMIC_EXIT_BARRIER
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
> + unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> +"1: lharx %0,0,%2 # __cmpxchg_u16\n\
> + cmpw 0,%0,%3\n\
> + bne- 2f\n"
> +" sthcx. %4,0,%2\n\
> + bne- 1b"
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__ (
> +"1: lharx %0,0,%2 # __cmpxchg_u16_relaxed\n"
> +" cmpw 0,%0,%3\n"
> +" bne- 2f\n"
> +" sthcx. %4,0,%2\n"
> +" bne- 1b\n"
> +"2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__ (
> +"1: lharx %0,0,%2 # __cmpxchg_u16_acquire\n"
> +" cmpw 0,%0,%3\n"
> +" bne- 2f\n"
> +" sthcx. %4,0,%2\n"
> +" bne- 1b\n"
> + PPC_ACQUIRE_BARRIER
> + "\n"
> +"2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +#endif
>
> static __always_inline unsigned long
> __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index c194c4ae8bc7..2f8c8d61dba4 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -118,6 +118,7 @@ config GENERIC_CPU
> bool "Generic (POWER8 and above)"
> depends on PPC64 && CPU_LITTLE_ENDIAN
> select ARCH_HAS_FAST_MULTIPLIER
> + select PPC_LBARX_LWARX
>
> config GENERIC_CPU
> bool "Generic 32 bits powerpc"
> @@ -139,16 +140,19 @@ config POWER7_CPU
> bool "POWER7"
> depends on PPC_BOOK3S_64
> select ARCH_HAS_FAST_MULTIPLIER
> + select PPC_LBARX_LWARX
>
> config POWER8_CPU
> bool "POWER8"
> depends on PPC_BOOK3S_64
> select ARCH_HAS_FAST_MULTIPLIER
> + select PPC_LBARX_LWARX
>
> config POWER9_CPU
> bool "POWER9"
> depends on PPC_BOOK3S_64
> select ARCH_HAS_FAST_MULTIPLIER
> + select PPC_LBARX_LWARX
>
> config E5500_CPU
> bool "Freescale e5500"
> @@ -157,6 +161,7 @@ config E5500_CPU
> config E6500_CPU
> bool "Freescale e6500"
> depends on E500
> + select PPC_LBARX_LWARX
>
> config 860_CPU
> bool "8xx family"
> --
> 2.23.0
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
2020-11-07 3:23 [PATCH] powerpc: add compile-time support for lbarx, lwarx Nicholas Piggin
2020-11-07 7:12 ` Gabriel Paubert
@ 2020-11-07 8:15 ` Christophe Leroy
2020-11-10 8:18 ` Nicholas Piggin
1 sibling, 1 reply; 7+ messages in thread
From: Christophe Leroy @ 2020-11-07 8:15 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
Le 07/11/2020 à 04:23, Nicholas Piggin a écrit :
> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> Add a compile option that allows code to use it, and add support in
> cmpxchg and xchg 8 and 16 bit values.
Do you mean lharx ? Because lwarx exists on all powerpcs I think.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/Kconfig | 3 +
> arch/powerpc/include/asm/cmpxchg.h | 236 ++++++++++++++++++++++++-
> arch/powerpc/platforms/Kconfig.cputype | 5 +
> 3 files changed, 243 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index e9f13fe08492..d231af06f75a 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
> default y
> depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>
> +config PPC_LBARX_LWARX
> + bool
s/LWARX/LHARX/ ?
And maybe better with PPC_HAS_LBARX_LWARX ?
> +
> config EARLY_PRINTK
> bool
> default y
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..17fd996dc0d4 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \
> * the previous value stored there.
> */
>
> +#ifndef CONFIG_PPC_LBARX_LWARX
> XCHG_GEN(u8, _local, "memory");
> XCHG_GEN(u8, _relaxed, "cc");
> XCHG_GEN(u16, _local, "memory");
> XCHG_GEN(u16, _relaxed, "cc");
> +#else
> +static __always_inline unsigned long
> +__xchg_u8_local(volatile void *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1: lbarx %0,0,%2 \n"
> +" stbcx. %3,0,%2 \n\
> + bne- 1b"
> + : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
> + : "r" (p), "r" (val)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u8_relaxed(u8 *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1: lbarx %0,0,%2\n"
> +" stbcx. %3,0,%2\n"
> +" bne- 1b"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (val)
> + : "cc");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_local(volatile void *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1: lharx %0,0,%2 \n"
> +" sthcx. %3,0,%2 \n\
> + bne- 1b"
> + : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
> + : "r" (p), "r" (val)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_relaxed(u16 *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1: lharx %0,0,%2\n"
> +" sthcx. %3,0,%2\n"
> +" bne- 1b"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (val)
> + : "cc");
> +
> + return prev;
> +}
> +#endif
That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in
arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?
>
> static __always_inline unsigned long
> __xchg_u32_local(volatile void *p, unsigned long val)
> @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
> (__typeof__(*(ptr))) __xchg_relaxed((ptr), \
> (unsigned long)_x_, sizeof(*(ptr))); \
> })
> +
> /*
> * Compare and exchange - if *p == old, set it to new,
> * and return the old value of *p.
> */
> -
> +#ifndef CONFIG_PPC_LBARX_LWARX
> CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
> CMPXCHG_GEN(u8, _local, , , "memory");
> CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
> CMPXCHG_GEN(u16, _local, , , "memory");
> CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> CMPXCHG_GEN(u16, _relaxed, , , "cc");
> +#else
> +static __always_inline unsigned long
> +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> + PPC_ATOMIC_ENTRY_BARRIER
> +"1: lbarx %0,0,%2 # __cmpxchg_u8\n\
> + cmpw 0,%0,%3\n\
> + bne- 2f\n"
> +" stbcx. %4,0,%2\n\
> + bne- 1b"
> + PPC_ATOMIC_EXIT_BARRIER
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
> + unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> +"1: lbarx %0,0,%2 # __cmpxchg_u8\n\
> + cmpw 0,%0,%3\n\
> + bne- 2f\n"
> +" stbcx. %4,0,%2\n\
> + bne- 1b"
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__ (
> +"1: lbarx %0,0,%2 # __cmpxchg_u8_relaxed\n"
> +" cmpw 0,%0,%3\n"
> +" bne- 2f\n"
> +" stbcx. %4,0,%2\n"
> +" bne- 1b\n"
> +"2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__ (
> +"1: lbarx %0,0,%2 # __cmpxchg_u8_acquire\n"
> +" cmpw 0,%0,%3\n"
> +" bne- 2f\n"
> +" stbcx. %4,0,%2\n"
> +" bne- 1b\n"
> + PPC_ACQUIRE_BARRIER
> + "\n"
> +"2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> + PPC_ATOMIC_ENTRY_BARRIER
> +"1: lharx %0,0,%2 # __cmpxchg_u16\n\
> + cmpw 0,%0,%3\n\
> + bne- 2f\n"
> +" sthcx. %4,0,%2\n\
> + bne- 1b"
> + PPC_ATOMIC_EXIT_BARRIER
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
> + unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> +"1: lharx %0,0,%2 # __cmpxchg_u16\n\
> + cmpw 0,%0,%3\n\
> + bne- 2f\n"
> +" sthcx. %4,0,%2\n\
> + bne- 1b"
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__ (
> +"1: lharx %0,0,%2 # __cmpxchg_u16_relaxed\n"
> +" cmpw 0,%0,%3\n"
> +" bne- 2f\n"
> +" sthcx. %4,0,%2\n"
> +" bne- 1b\n"
> +"2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__ (
> +"1: lharx %0,0,%2 # __cmpxchg_u16_acquire\n"
> +" cmpw 0,%0,%3\n"
> +" bne- 2f\n"
> +" sthcx. %4,0,%2\n"
> +" bne- 1b\n"
> + PPC_ACQUIRE_BARRIER
> + "\n"
> +"2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +#endif
That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in
arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?
>
> static __always_inline unsigned long
> __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index c194c4ae8bc7..2f8c8d61dba4 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -118,6 +118,7 @@ config GENERIC_CPU
> bool "Generic (POWER8 and above)"
> depends on PPC64 && CPU_LITTLE_ENDIAN
> select ARCH_HAS_FAST_MULTIPLIER
> + select PPC_LBARX_LWARX
s/LWARX/LHARX/ ?
>
> config GENERIC_CPU
> bool "Generic 32 bits powerpc"
> @@ -139,16 +140,19 @@ config POWER7_CPU
> bool "POWER7"
> depends on PPC_BOOK3S_64
> select ARCH_HAS_FAST_MULTIPLIER
> + select PPC_LBARX_LWARX
>
> config POWER8_CPU
> bool "POWER8"
> depends on PPC_BOOK3S_64
> select ARCH_HAS_FAST_MULTIPLIER
> + select PPC_LBARX_LWARX
>
> config POWER9_CPU
> bool "POWER9"
> depends on PPC_BOOK3S_64
> select ARCH_HAS_FAST_MULTIPLIER
> + select PPC_LBARX_LWARX
>
> config E5500_CPU
> bool "Freescale e5500"
> @@ -157,6 +161,7 @@ config E5500_CPU
> config E6500_CPU
> bool "Freescale e6500"
> depends on E500
> + select PPC_LBARX_LWARX
>
> config 860_CPU
> bool "8xx family"
>
Christophe
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
2020-11-07 7:12 ` Gabriel Paubert
@ 2020-11-07 11:42 ` Segher Boessenkool
[not found] ` <0810564117125.202011.20201107114257.GG2672@gate.crashing.org>
1 sibling, 0 replies; 7+ messages in thread
From: Segher Boessenkool @ 2020-11-07 11:42 UTC (permalink / raw)
To: Gabriel Paubert; +Cc: linuxppc-dev, Nicholas Piggin
On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
>
> Hmm, lwarx exists since original Power AFAIR,
Almost: it was new on PowerPC.
Segher
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
[not found] ` <0810564117125.202011.20201107114257.GG2672@gate.crashing.org>
@ 2020-11-08 20:01 ` Gabriel Paubert
2020-11-09 12:34 ` Segher Boessenkool
0 siblings, 1 reply; 7+ messages in thread
From: Gabriel Paubert @ 2020-11-08 20:01 UTC (permalink / raw)
To: Segher Boessenkool; +Cc: linuxppc-dev, Nicholas Piggin
On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote:
> On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> >
> > Hmm, lwarx exists since original Power AFAIR,
>
> Almost: it was new on PowerPC.
I stand corrected. Does this mean that Power1 (and 2 I believe) had
no SMP support?
Gabriel
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
2020-11-08 20:01 ` Gabriel Paubert
@ 2020-11-09 12:34 ` Segher Boessenkool
0 siblings, 0 replies; 7+ messages in thread
From: Segher Boessenkool @ 2020-11-09 12:34 UTC (permalink / raw)
To: Gabriel Paubert; +Cc: linuxppc-dev, Nicholas Piggin
On Sun, Nov 08, 2020 at 09:01:52PM +0100, Gabriel Paubert wrote:
> On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote:
> > On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> > > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> > >
> > > Hmm, lwarx exists since original Power AFAIR,
> >
> > Almost: it was new on PowerPC.
>
> I stand corrected. Does this mean that Power1 (and 2 I believe) had
> no SMP support?
As I understand it, that's correct. Of course you always can do SMP "by
hand" -- you can do all synchronisation via software (perhaps using some
knowledge of the specific hardware you're running on), it's just slow
(and usually not portable). Compare to SMP on 603 for example.
Segher
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
2020-11-07 8:15 ` Christophe Leroy
@ 2020-11-10 8:18 ` Nicholas Piggin
0 siblings, 0 replies; 7+ messages in thread
From: Nicholas Piggin @ 2020-11-10 8:18 UTC (permalink / raw)
To: Christophe Leroy, linuxppc-dev
Excerpts from Christophe Leroy's message of November 7, 2020 6:15 pm:
>
>
> Le 07/11/2020 à 04:23, Nicholas Piggin a écrit :
>> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
>> Add a compile option that allows code to use it, and add support in
>> cmpxchg and xchg 8 and 16 bit values.
>
> Do you mean lharx ? Because lwarx exists on all powerpcs I think.
Thanks all who pointed out mistakes :) Yes lharx.
>
>>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> arch/powerpc/Kconfig | 3 +
>> arch/powerpc/include/asm/cmpxchg.h | 236 ++++++++++++++++++++++++-
>> arch/powerpc/platforms/Kconfig.cputype | 5 +
>> 3 files changed, 243 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index e9f13fe08492..d231af06f75a 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
>> default y
>> depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>>
>> +config PPC_LBARX_LWARX
>> + bool
>
> s/LWARX/LHARX/ ?
>
> And maybe better with PPC_HAS_LBARX_LWARX ?
Yes you're right, PPC_HAS_ fits better.
[...]
>> +#endif
>
> That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in
> arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?
For now I don't get too fancy. It's a bit ugly but I'm working through a
generic atomics conversion patch and trying to also work out a nice form
for larx/stcx operation generation macros, I'll look at tidying up this
some time after then.
Thanks,
Nick
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2020-11-10 8:21 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-07 3:23 [PATCH] powerpc: add compile-time support for lbarx, lwarx Nicholas Piggin
2020-11-07 7:12 ` Gabriel Paubert
2020-11-07 11:42 ` Segher Boessenkool
[not found] ` <0810564117125.202011.20201107114257.GG2672@gate.crashing.org>
2020-11-08 20:01 ` Gabriel Paubert
2020-11-09 12:34 ` Segher Boessenkool
2020-11-07 8:15 ` Christophe Leroy
2020-11-10 8:18 ` Nicholas Piggin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.