[PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction

* [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
@ 2020-05-05  9:02 Shaokun Zhang
  2020-05-05  9:15 ` Will Deacon
  0 siblings, 1 reply; 10+ messages in thread
From: Shaokun Zhang @ 2020-05-05  9:02 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Shaokun Zhang, Yuqi Jin, Andrew Murray, Will Deacon, Catalin Marinas

From: Yuqi Jin <jinyuqi@huawei.com>

Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
it has provided inline implementations of both LSE and ll/sc and used a static
key to select between them, which allows the compiler to generate better
atomics code.
However, xchg still uses the original method which would fail to switch to
the atomic instruction correctly, Let's fix this issue.

Fixes: addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics")
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrew Murray <amurray@thegoodpenguin.co.uk>
Signed-off-by: Yuqi Jin <jinyuqi@huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
---
 arch/arm64/include/asm/atomic_ll_sc.h | 41 ++++++++++++++++++
 arch/arm64/include/asm/atomic_lse.h   | 35 +++++++++++++++
 arch/arm64/include/asm/cmpxchg.h      | 82 ++++++++---------------------------
 3 files changed, 93 insertions(+), 65 deletions(-)

diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 13869b76b58c..73fcb71ccb91 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -348,6 +348,47 @@ __CMPXCHG_DBL(   ,        ,  ,         )
 __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
 
 #undef __CMPXCHG_DBL
+
+#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl)      \
+static inline u##sz __ll_sc__xchg_case_##name##sz(u##sz x, volatile void *ptr) \
+{                                                                              \
+	u##sz ret;                                                              \
+	unsigned long tmp;                                                      \
+										\
+	asm volatile(                                                           \
+	__LL_SC_FALLBACK(                                                       \
+	"       prfm    pstl1strm, %2\n"                                        \
+	"1:     ld" #acq "xr" #sfx "\t%" #w "0, %2\n"                           \
+	"       st" #rel "xr" #sfx "\t%w1, %" #w "3, %2\n"                      \
+	"       cbnz    %w1, 1b\n"                                              \
+	"       " #mb "\n"                                                      \
+	"2:")                                                                   \
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr)                        \
+	: "r" (x)                                                               \
+	: cl);                                                                  \
+										\
+	return ret;                                                             \
+}
+
+__XCHG_CASE(w, b,     ,  8,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, h,     , 16,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w,  ,     , 32,        ,    ,  ,  ,  ,         )
+__XCHG_CASE( ,  ,     , 64,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, b, acq_,  8,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, h, acq_, 16,        ,    , a, a,  , "memory")
+__XCHG_CASE(w,  , acq_, 32,        ,    , a, a,  , "memory")
+__XCHG_CASE( ,  , acq_, 64,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, b, rel_,  8,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, h, rel_, 16,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w,  , rel_, 32,        ,    ,  ,  , l, "memory")
+__XCHG_CASE( ,  , rel_, 64,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, b,  mb_,  8, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w, h,  mb_, 16, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w,  ,  mb_, 32, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE( ,  ,  mb_, 64, dmb ish, nop,  , a, l, "memory")
+
+#undef __XCHG_CASE
+
 #undef K
 
 #endif	/* __ASM_ATOMIC_LL_SC_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index da3280f639cd..ddb2c212faa3 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -416,4 +416,39 @@ __CMPXCHG_DBL(_mb, al, "memory")
 
 #undef __CMPXCHG_DBL
 
+#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl)             \
+static __always_inline u##sz __lse__xchg_case_##name##sz(u##sz x, volatile void *ptr) \
+{                                                                                     \
+	u##sz ret;                                                                     \
+	unsigned long tmp;                                                             \
+										       \
+	asm volatile(                                                                  \
+	__LSE_PREAMBLE                                                                 \
+	"       swp" #acq_lse #rel #sfx "\t%" #w "3, %" #w "0, %2\n"                   \
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr)                               \
+	: "r" (x)                                                                      \
+	: cl);                                                                         \
+										       \
+	return ret;                                                                    \
+}
+
+__XCHG_CASE(w, b,     ,  8,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, h,     , 16,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w,  ,     , 32,        ,    ,  ,  ,  ,         )
+__XCHG_CASE( ,  ,     , 64,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, b, acq_,  8,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, h, acq_, 16,        ,    , a, a,  , "memory")
+__XCHG_CASE(w,  , acq_, 32,        ,    , a, a,  , "memory")
+__XCHG_CASE( ,  , acq_, 64,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, b, rel_,  8,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, h, rel_, 16,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w,  , rel_, 32,        ,    ,  ,  , l, "memory")
+__XCHG_CASE( ,  , rel_, 64,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, b,  mb_,  8, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w, h,  mb_, 16, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w,  ,  mb_, 32, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE( ,  ,  mb_, 64, dmb ish, nop,  , a, l, "memory")
+
+#undef __XCHG_CASE
+
 #endif	/* __ASM_ATOMIC_LSE_H */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index f9bef42c1411..084028518417 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -13,73 +13,25 @@
 #include <asm/barrier.h>
 #include <asm/lse.h>
 
-/*
- * We need separate acquire parameters for ll/sc and lse, since the full
- * barrier case is generated as release+dmb for the former and
- * acquire+release for the latter.
- */
-#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl)	\
-static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)		\
-{										\
-	u##sz ret;								\
-	unsigned long tmp;							\
-										\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(					\
-	/* LL/SC */								\
-	"	prfm	pstl1strm, %2\n"					\
-	"1:	ld" #acq "xr" #sfx "\t%" #w "0, %2\n"				\
-	"	st" #rel "xr" #sfx "\t%w1, %" #w "3, %2\n"			\
-	"	cbnz	%w1, 1b\n"						\
-	"	" #mb,								\
-	/* LSE atomics */							\
-	"	swp" #acq_lse #rel #sfx "\t%" #w "3, %" #w "0, %2\n"		\
-		__nops(3)							\
-	"	" #nop_lse)							\
-	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr)			\
-	: "r" (x)								\
-	: cl);									\
-										\
-	return ret;								\
-}
-
-__XCHG_CASE(w, b,     ,  8,        ,    ,  ,  ,  ,         )
-__XCHG_CASE(w, h,     , 16,        ,    ,  ,  ,  ,         )
-__XCHG_CASE(w,  ,     , 32,        ,    ,  ,  ,  ,         )
-__XCHG_CASE( ,  ,     , 64,        ,    ,  ,  ,  ,         )
-__XCHG_CASE(w, b, acq_,  8,        ,    , a, a,  , "memory")
-__XCHG_CASE(w, h, acq_, 16,        ,    , a, a,  , "memory")
-__XCHG_CASE(w,  , acq_, 32,        ,    , a, a,  , "memory")
-__XCHG_CASE( ,  , acq_, 64,        ,    , a, a,  , "memory")
-__XCHG_CASE(w, b, rel_,  8,        ,    ,  ,  , l, "memory")
-__XCHG_CASE(w, h, rel_, 16,        ,    ,  ,  , l, "memory")
-__XCHG_CASE(w,  , rel_, 32,        ,    ,  ,  , l, "memory")
-__XCHG_CASE( ,  , rel_, 64,        ,    ,  ,  , l, "memory")
-__XCHG_CASE(w, b,  mb_,  8, dmb ish, nop,  , a, l, "memory")
-__XCHG_CASE(w, h,  mb_, 16, dmb ish, nop,  , a, l, "memory")
-__XCHG_CASE(w,  ,  mb_, 32, dmb ish, nop,  , a, l, "memory")
-__XCHG_CASE( ,  ,  mb_, 64, dmb ish, nop,  , a, l, "memory")
-
-#undef __XCHG_CASE
-
 #define __XCHG_GEN(sfx)							\
-static __always_inline  unsigned long __xchg##sfx(unsigned long x,	\
-					volatile void *ptr,		\
-					int size)			\
-{									\
-	switch (size) {							\
-	case 1:								\
-		return __xchg_case##sfx##_8(x, ptr);			\
-	case 2:								\
-		return __xchg_case##sfx##_16(x, ptr);			\
-	case 4:								\
-		return __xchg_case##sfx##_32(x, ptr);			\
-	case 8:								\
-		return __xchg_case##sfx##_64(x, ptr);			\
-	default:							\
-		BUILD_BUG();						\
-	}								\
+static __always_inline  unsigned long __xchg##sfx(unsigned long x,     \
+					volatile void *ptr,             \
+					int size)                       \
+{                                                                      \
+	switch (size) {                                                 \
+	case 1:                                                         \
+		return __lse_ll_sc_body(_xchg_case##sfx##_8, x, ptr);   \
+	case 2:                                                         \
+		return __lse_ll_sc_body(_xchg_case##sfx##_16, x, ptr);  \
+	case 4:                                                         \
+		return __lse_ll_sc_body(_xchg_case##sfx##_32, x, ptr);  \
+	case 8:                                                         \
+		return __lse_ll_sc_body(_xchg_case##sfx##_64, x, ptr);  \
+	default:                                                        \
+		BUILD_BUG();                                            \
+	}                                                               \
 									\
-	unreachable();							\
+	unreachable();                                                  \
 }
 
 __XCHG_GEN()
-- 
2.7.4


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 10+ messages in thread