All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] powerpc: add compile-time support for lbarx, lwarx
@ 2020-11-07  3:23 Nicholas Piggin
  2020-11-07  7:12 ` Gabriel Paubert
  2020-11-07  8:15 ` Christophe Leroy
  0 siblings, 2 replies; 7+ messages in thread
From: Nicholas Piggin @ 2020-11-07  3:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
Add a compile option that allows code to use it, and add support in
cmpxchg and xchg 8 and 16 bit values.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/Kconfig                   |   3 +
 arch/powerpc/include/asm/cmpxchg.h     | 236 ++++++++++++++++++++++++-
 arch/powerpc/platforms/Kconfig.cputype |   5 +
 3 files changed, 243 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..d231af06f75a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
 	default y
 	depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
 
+config PPC_LBARX_LWARX
+	bool
+
 config EARLY_PRINTK
 	bool
 	default y
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..17fd996dc0d4 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new)	\
  * the previous value stored there.
  */
 
+#ifndef CONFIG_PPC_LBARX_LWARX
 XCHG_GEN(u8, _local, "memory");
 XCHG_GEN(u8, _relaxed, "cc");
 XCHG_GEN(u16, _local, "memory");
 XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lbarx	%0,0,%2 \n"
+"	stbcx.	%3,0,%2 \n\
+	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lbarx	%0,0,%2\n"
+"	stbcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lharx	%0,0,%2 \n"
+"	sthcx.	%3,0,%2 \n\
+	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lharx	%0,0,%2\n"
+"	sthcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+#endif
 
 static __always_inline unsigned long
 __xchg_u32_local(volatile void *p, unsigned long val)
@@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
 	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
 			(unsigned long)_x_, sizeof(*(ptr)));		\
 })
+
 /*
  * Compare and exchange - if *p == old, set it to new,
  * and return the old value of *p.
  */
-
+#ifndef CONFIG_PPC_LBARX_LWARX
 CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u8, _local, , , "memory");
 CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
@@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u16, _local, , , "memory");
 CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
 CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	stbcx.	%4,0,%2\n\
+	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	stbcx.	%4,0,%2\n\
+	bne-	1b"
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_relaxed\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_acquire\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+	"\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	sthcx.	%4,0,%2\n\
+	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	sthcx.	%4,0,%2\n\
+	bne-	1b"
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_relaxed\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_acquire\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+	"\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+#endif
 
 static __always_inline unsigned long
 __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index c194c4ae8bc7..2f8c8d61dba4 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -118,6 +118,7 @@ config GENERIC_CPU
 	bool "Generic (POWER8 and above)"
 	depends on PPC64 && CPU_LITTLE_ENDIAN
 	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_LBARX_LWARX
 
 config GENERIC_CPU
 	bool "Generic 32 bits powerpc"
@@ -139,16 +140,19 @@ config POWER7_CPU
 	bool "POWER7"
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_LBARX_LWARX
 
 config POWER8_CPU
 	bool "POWER8"
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_LBARX_LWARX
 
 config POWER9_CPU
 	bool "POWER9"
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_LBARX_LWARX
 
 config E5500_CPU
 	bool "Freescale e5500"
@@ -157,6 +161,7 @@ config E5500_CPU
 config E6500_CPU
 	bool "Freescale e6500"
 	depends on E500
+	select PPC_LBARX_LWARX
 
 config 860_CPU
 	bool "8xx family"
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
  2020-11-07  3:23 [PATCH] powerpc: add compile-time support for lbarx, lwarx Nicholas Piggin
@ 2020-11-07  7:12 ` Gabriel Paubert
  2020-11-07 11:42   ` Segher Boessenkool
       [not found]   ` <0810564117125.202011.20201107114257.GG2672@gate.crashing.org>
  2020-11-07  8:15 ` Christophe Leroy
  1 sibling, 2 replies; 7+ messages in thread
From: Gabriel Paubert @ 2020-11-07  7:12 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev

On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.

Hmm, lwarx exists since original Power AFAIR, s/lwarx/lharx/ perhaps?

Same for the title of the patch and the CONFIG variable.	

	Gabriel

> Add a compile option that allows code to use it, and add support in
> cmpxchg and xchg 8 and 16 bit values.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/Kconfig                   |   3 +
>  arch/powerpc/include/asm/cmpxchg.h     | 236 ++++++++++++++++++++++++-
>  arch/powerpc/platforms/Kconfig.cputype |   5 +
>  3 files changed, 243 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index e9f13fe08492..d231af06f75a 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
>  	default y
>  	depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>  
> +config PPC_LBARX_LWARX
> +	bool
> +
>  config EARLY_PRINTK
>  	bool
>  	default y
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..17fd996dc0d4 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new)	\
>   * the previous value stored there.
>   */
>  
> +#ifndef CONFIG_PPC_LBARX_LWARX
>  XCHG_GEN(u8, _local, "memory");
>  XCHG_GEN(u8, _relaxed, "cc");
>  XCHG_GEN(u16, _local, "memory");
>  XCHG_GEN(u16, _relaxed, "cc");
> +#else
> +static __always_inline unsigned long
> +__xchg_u8_local(volatile void *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lbarx	%0,0,%2 \n"
> +"	stbcx.	%3,0,%2 \n\
> +	bne-	1b"
> +	: "=&r" (prev), "+m" (*(volatile unsigned char *)p)
> +	: "r" (p), "r" (val)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u8_relaxed(u8 *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lbarx	%0,0,%2\n"
> +"	stbcx.	%3,0,%2\n"
> +"	bne-	1b"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (val)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_local(volatile void *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lharx	%0,0,%2 \n"
> +"	sthcx.	%3,0,%2 \n\
> +	bne-	1b"
> +	: "=&r" (prev), "+m" (*(volatile unsigned short *)p)
> +	: "r" (p), "r" (val)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_relaxed(u16 *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lharx	%0,0,%2\n"
> +"	sthcx.	%3,0,%2\n"
> +"	bne-	1b"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (val)
> +	: "cc");
> +
> +	return prev;
> +}
> +#endif
>  
>  static __always_inline unsigned long
>  __xchg_u32_local(volatile void *p, unsigned long val)
> @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
>  	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
>  			(unsigned long)_x_, sizeof(*(ptr)));		\
>  })
> +
>  /*
>   * Compare and exchange - if *p == old, set it to new,
>   * and return the old value of *p.
>   */
> -
> +#ifndef CONFIG_PPC_LBARX_LWARX
>  CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
>  CMPXCHG_GEN(u8, _local, , , "memory");
>  CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
>  CMPXCHG_GEN(u16, _local, , , "memory");
>  CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
>  CMPXCHG_GEN(u16, _relaxed, , , "cc");
> +#else
> +static __always_inline unsigned long
> +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +	PPC_ATOMIC_ENTRY_BARRIER
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n\
> +	bne-	1b"
> +	PPC_ATOMIC_EXIT_BARRIER
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
> +			unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n\
> +	bne-	1b"
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8_relaxed\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8_acquire\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +	PPC_ACQUIRE_BARRIER
> +	"\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +	PPC_ATOMIC_ENTRY_BARRIER
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n\
> +	bne-	1b"
> +	PPC_ATOMIC_EXIT_BARRIER
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
> +			unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n\
> +	bne-	1b"
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16_relaxed\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16_acquire\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +	PPC_ACQUIRE_BARRIER
> +	"\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +#endif
>  
>  static __always_inline unsigned long
>  __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index c194c4ae8bc7..2f8c8d61dba4 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -118,6 +118,7 @@ config GENERIC_CPU
>  	bool "Generic (POWER8 and above)"
>  	depends on PPC64 && CPU_LITTLE_ENDIAN
>  	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>  
>  config GENERIC_CPU
>  	bool "Generic 32 bits powerpc"
> @@ -139,16 +140,19 @@ config POWER7_CPU
>  	bool "POWER7"
>  	depends on PPC_BOOK3S_64
>  	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>  
>  config POWER8_CPU
>  	bool "POWER8"
>  	depends on PPC_BOOK3S_64
>  	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>  
>  config POWER9_CPU
>  	bool "POWER9"
>  	depends on PPC_BOOK3S_64
>  	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>  
>  config E5500_CPU
>  	bool "Freescale e5500"
> @@ -157,6 +161,7 @@ config E5500_CPU
>  config E6500_CPU
>  	bool "Freescale e6500"
>  	depends on E500
> +	select PPC_LBARX_LWARX
>  
>  config 860_CPU
>  	bool "8xx family"
> -- 
> 2.23.0
> 
 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
  2020-11-07  3:23 [PATCH] powerpc: add compile-time support for lbarx, lwarx Nicholas Piggin
  2020-11-07  7:12 ` Gabriel Paubert
@ 2020-11-07  8:15 ` Christophe Leroy
  2020-11-10  8:18   ` Nicholas Piggin
  1 sibling, 1 reply; 7+ messages in thread
From: Christophe Leroy @ 2020-11-07  8:15 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 07/11/2020 à 04:23, Nicholas Piggin a écrit :
> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> Add a compile option that allows code to use it, and add support in
> cmpxchg and xchg 8 and 16 bit values.

Do you mean lharx ? Because lwarx exists on all powerpcs I think.

> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/Kconfig                   |   3 +
>   arch/powerpc/include/asm/cmpxchg.h     | 236 ++++++++++++++++++++++++-
>   arch/powerpc/platforms/Kconfig.cputype |   5 +
>   3 files changed, 243 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index e9f13fe08492..d231af06f75a 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
>   	default y
>   	depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>   
> +config PPC_LBARX_LWARX
> +	bool

s/LWARX/LHARX/ ?

And maybe better with PPC_HAS_LBARX_LWARX ?

> +
>   config EARLY_PRINTK
>   	bool
>   	default y
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..17fd996dc0d4 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new)	\
>    * the previous value stored there.
>    */
>   
> +#ifndef CONFIG_PPC_LBARX_LWARX
>   XCHG_GEN(u8, _local, "memory");
>   XCHG_GEN(u8, _relaxed, "cc");
>   XCHG_GEN(u16, _local, "memory");
>   XCHG_GEN(u16, _relaxed, "cc");
> +#else
> +static __always_inline unsigned long
> +__xchg_u8_local(volatile void *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lbarx	%0,0,%2 \n"
> +"	stbcx.	%3,0,%2 \n\
> +	bne-	1b"
> +	: "=&r" (prev), "+m" (*(volatile unsigned char *)p)
> +	: "r" (p), "r" (val)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u8_relaxed(u8 *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lbarx	%0,0,%2\n"
> +"	stbcx.	%3,0,%2\n"
> +"	bne-	1b"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (val)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_local(volatile void *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lharx	%0,0,%2 \n"
> +"	sthcx.	%3,0,%2 \n\
> +	bne-	1b"
> +	: "=&r" (prev), "+m" (*(volatile unsigned short *)p)
> +	: "r" (p), "r" (val)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_relaxed(u16 *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lharx	%0,0,%2\n"
> +"	sthcx.	%3,0,%2\n"
> +"	bne-	1b"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (val)
> +	: "cc");
> +
> +	return prev;
> +}
> +#endif

That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in 
arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?

>   
>   static __always_inline unsigned long
>   __xchg_u32_local(volatile void *p, unsigned long val)
> @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
>   	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
>   			(unsigned long)_x_, sizeof(*(ptr)));		\
>   })
> +
>   /*
>    * Compare and exchange - if *p == old, set it to new,
>    * and return the old value of *p.
>    */
> -
> +#ifndef CONFIG_PPC_LBARX_LWARX
>   CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
>   CMPXCHG_GEN(u8, _local, , , "memory");
>   CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
>   CMPXCHG_GEN(u16, _local, , , "memory");
>   CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
>   CMPXCHG_GEN(u16, _relaxed, , , "cc");
> +#else
> +static __always_inline unsigned long
> +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +	PPC_ATOMIC_ENTRY_BARRIER
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n\
> +	bne-	1b"
> +	PPC_ATOMIC_EXIT_BARRIER
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
> +			unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n\
> +	bne-	1b"
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8_relaxed\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8_acquire\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +	PPC_ACQUIRE_BARRIER
> +	"\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +	PPC_ATOMIC_ENTRY_BARRIER
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n\
> +	bne-	1b"
> +	PPC_ATOMIC_EXIT_BARRIER
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
> +			unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n\
> +	bne-	1b"
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16_relaxed\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16_acquire\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +	PPC_ACQUIRE_BARRIER
> +	"\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +#endif

That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in 
arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?

>   
>   static __always_inline unsigned long
>   __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index c194c4ae8bc7..2f8c8d61dba4 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -118,6 +118,7 @@ config GENERIC_CPU
>   	bool "Generic (POWER8 and above)"
>   	depends on PPC64 && CPU_LITTLE_ENDIAN
>   	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX

s/LWARX/LHARX/ ?

>   
>   config GENERIC_CPU
>   	bool "Generic 32 bits powerpc"
> @@ -139,16 +140,19 @@ config POWER7_CPU
>   	bool "POWER7"
>   	depends on PPC_BOOK3S_64
>   	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>   
>   config POWER8_CPU
>   	bool "POWER8"
>   	depends on PPC_BOOK3S_64
>   	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>   
>   config POWER9_CPU
>   	bool "POWER9"
>   	depends on PPC_BOOK3S_64
>   	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>   
>   config E5500_CPU
>   	bool "Freescale e5500"
> @@ -157,6 +161,7 @@ config E5500_CPU
>   config E6500_CPU
>   	bool "Freescale e6500"
>   	depends on E500
> +	select PPC_LBARX_LWARX
>   
>   config 860_CPU
>   	bool "8xx family"
> 

Christophe

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
  2020-11-07  7:12 ` Gabriel Paubert
@ 2020-11-07 11:42   ` Segher Boessenkool
       [not found]   ` <0810564117125.202011.20201107114257.GG2672@gate.crashing.org>
  1 sibling, 0 replies; 7+ messages in thread
From: Segher Boessenkool @ 2020-11-07 11:42 UTC (permalink / raw)
  To: Gabriel Paubert; +Cc: linuxppc-dev, Nicholas Piggin

On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> 
> Hmm, lwarx exists since original Power AFAIR,

Almost: it was new on PowerPC.


Segher

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
       [not found]   ` <0810564117125.202011.20201107114257.GG2672@gate.crashing.org>
@ 2020-11-08 20:01     ` Gabriel Paubert
  2020-11-09 12:34       ` Segher Boessenkool
  0 siblings, 1 reply; 7+ messages in thread
From: Gabriel Paubert @ 2020-11-08 20:01 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: linuxppc-dev, Nicholas Piggin

On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote:
> On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> > 
> > Hmm, lwarx exists since original Power AFAIR,
> 
> Almost: it was new on PowerPC.

I stand corrected. Does this mean that Power1 (and 2 I believe) had 
no SMP support?

	Gabriel
 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
  2020-11-08 20:01     ` Gabriel Paubert
@ 2020-11-09 12:34       ` Segher Boessenkool
  0 siblings, 0 replies; 7+ messages in thread
From: Segher Boessenkool @ 2020-11-09 12:34 UTC (permalink / raw)
  To: Gabriel Paubert; +Cc: linuxppc-dev, Nicholas Piggin

On Sun, Nov 08, 2020 at 09:01:52PM +0100, Gabriel Paubert wrote:
> On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote:
> > On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> > > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> > > 
> > > Hmm, lwarx exists since original Power AFAIR,
> > 
> > Almost: it was new on PowerPC.
> 
> I stand corrected. Does this mean that Power1 (and 2 I believe) had 
> no SMP support?

As I understand it, that's correct.  Of course you always can do SMP "by
hand" -- you can do all synchronisation via software (perhaps using some
knowledge of the specific hardware you're running on), it's just slow
(and usually not portable).  Compare to SMP on 603 for example.


Segher

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
  2020-11-07  8:15 ` Christophe Leroy
@ 2020-11-10  8:18   ` Nicholas Piggin
  0 siblings, 0 replies; 7+ messages in thread
From: Nicholas Piggin @ 2020-11-10  8:18 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev

Excerpts from Christophe Leroy's message of November 7, 2020 6:15 pm:
> 
> 
> Le 07/11/2020 à 04:23, Nicholas Piggin a écrit :
>> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
>> Add a compile option that allows code to use it, and add support in
>> cmpxchg and xchg 8 and 16 bit values.
> 
> Do you mean lharx ? Because lwarx exists on all powerpcs I think.

Thanks all who pointed out mistakes :) Yes lharx.

> 
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/Kconfig                   |   3 +
>>   arch/powerpc/include/asm/cmpxchg.h     | 236 ++++++++++++++++++++++++-
>>   arch/powerpc/platforms/Kconfig.cputype |   5 +
>>   3 files changed, 243 insertions(+), 1 deletion(-)
>> 
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index e9f13fe08492..d231af06f75a 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
>>   	default y
>>   	depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>>   
>> +config PPC_LBARX_LWARX
>> +	bool
> 
> s/LWARX/LHARX/ ?
> 
> And maybe better with PPC_HAS_LBARX_LWARX ?

Yes you're right, PPC_HAS_ fits better.

[...]

>> +#endif
> 
> That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in 
> arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?

For now I don't get too fancy. It's a bit ugly but I'm working through a 
generic atomics conversion patch and trying to also work out a nice form
for larx/stcx operation generation macros, I'll look at tidying up this
some time after then.

Thanks,
Nick


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-11-10  8:21 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-07  3:23 [PATCH] powerpc: add compile-time support for lbarx, lwarx Nicholas Piggin
2020-11-07  7:12 ` Gabriel Paubert
2020-11-07 11:42   ` Segher Boessenkool
     [not found]   ` <0810564117125.202011.20201107114257.GG2672@gate.crashing.org>
2020-11-08 20:01     ` Gabriel Paubert
2020-11-09 12:34       ` Segher Boessenkool
2020-11-07  8:15 ` Christophe Leroy
2020-11-10  8:18   ` Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.