* [PATCH] csky: atomic: Add custom atomic.h implementation
@ 2022-04-06 12:54 guoren
2022-04-11 9:40 ` Mark Rutland
0 siblings, 1 reply; 2+ messages in thread
From: guoren @ 2022-04-06 12:54 UTC (permalink / raw)
To: arnd, mark.rutland, peterz
Cc: linux-csky, linux-arch, linux-kernel, Guo Ren, Guo Ren
From: Guo Ren <guoren@linux.alibaba.com>
The generic atomic.h used cmpxchg to implement the atomic
operations, it will cause daul loop to reduce the forward
guarantee. The patch implement csky custom atomic operations with
ldex/stex instructions for the best performance.
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
arch/csky/include/asm/atomic.h | 251 +++++++++++++++++++++++++++++++++
1 file changed, 251 insertions(+)
create mode 100644 arch/csky/include/asm/atomic.h
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
new file mode 100644
index 000000000000..3157df8ad555
--- /dev/null
+++ b/arch/csky/include/asm/atomic.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_CSKY_ATOMIC_H
+#define __ASM_CSKY_ATOMIC_H
+
+#ifdef CONFIG_SMP
+# include <asm-generic/atomic64.h>
+
+#include <asm/cmpxchg.h>
+#include <asm/barrier.h>
+
+#define __atomic_acquire_fence() __smp_acquire_fence()
+
+#define __atomic_release_fence() __smp_release_fence()
+
+static __always_inline int arch_atomic_read(const atomic_t *v)
+{
+ return READ_ONCE(v->counter);
+}
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
+{
+ WRITE_ONCE(v->counter, i);
+}
+
+#define ATOMIC_OP(op, asm_op, I) \
+static __always_inline \
+void arch_atomic_##op(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %0, (%2) \n" \
+ " " #op " %0, %1 \n" \
+ " stex.w %0, (%2) \n" \
+ " bez %0, 1b \n" \
+ : "=&r" (tmp) \
+ : "r" (I), "r" (&v->counter) \
+ : "memory"); \
+}
+
+ATOMIC_OP(add, add, i)
+ATOMIC_OP(sub, add, -i)
+ATOMIC_OP(and, and, i)
+ATOMIC_OP( or, or, i)
+ATOMIC_OP(xor, xor, i)
+
+#undef ATOMIC_OP
+
+#define ATOMIC_FETCH_OP(op, asm_op, I) \
+static __always_inline \
+int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+{ \
+ register int ret, tmp; \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %0, (%3) \n" \
+ " mov %1, %0 \n" \
+ " " #op " %0, %2 \n" \
+ " stex.w %0, (%3) \n" \
+ " bez %0, 1b \n" \
+ : "=&r" (tmp), "=&r" (ret) \
+ : "r" (I), "r"(&v->counter) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
+int arch_atomic_fetch_##op##_acquire(int i, atomic_t *v) \
+{ \
+ register int ret, tmp; \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %0, (%3) \n" \
+ ACQUIRE_FENCE \
+ " mov %1, %0 \n" \
+ " " #op " %0, %2 \n" \
+ " stex.w %0, (%3) \n" \
+ " bez %0, 1b \n" \
+ : "=&r" (tmp), "=&r" (ret) \
+ : "r" (I), "r"(&v->counter) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
+int arch_atomic_fetch_##op##_release(int i, atomic_t *v) \
+{ \
+ register int ret, tmp; \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %0, (%3) \n" \
+ " mov %1, %0 \n" \
+ " " #op " %0, %2 \n" \
+ RELEASE_FENCE \
+ " stex.w %0, (%3) \n" \
+ " bez %0, 1b \n" \
+ : "=&r" (tmp), "=&r" (ret) \
+ : "r" (I), "r"(&v->counter) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
+int arch_atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ register int ret, tmp; \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %0, (%3) \n" \
+ ACQUIRE_FENCE \
+ " mov %1, %0 \n" \
+ " " #op " %0, %2 \n" \
+ RELEASE_FENCE \
+ " stex.w %0, (%3) \n" \
+ " bez %0, 1b \n" \
+ : "=&r" (tmp), "=&r" (ret) \
+ : "r" (I), "r"(&v->counter) \
+ : "memory"); \
+ return ret; \
+}
+
+#define ATOMIC_OP_RETURN(op, asm_op, c_op, I) \
+static __always_inline \
+int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
+{ \
+ return arch_atomic_fetch_##op##_relaxed(i, v) c_op I; \
+} \
+static __always_inline \
+int arch_atomic_##op##_return_acquire(int i, atomic_t *v) \
+{ \
+ return arch_atomic_fetch_##op##_relaxed(i, v) c_op I; \
+} \
+static __always_inline \
+int arch_atomic_##op##_return_release(int i, atomic_t *v) \
+{ \
+ return arch_atomic_fetch_##op##_release(i, v) c_op I; \
+} \
+static __always_inline \
+int arch_atomic_##op##_return(int i, atomic_t *v) \
+{ \
+ return arch_atomic_fetch_##op(i, v) c_op I; \
+}
+
+#define ATOMIC_OPS(op, asm_op, c_op, I) \
+ ATOMIC_FETCH_OP( op, asm_op, I) \
+ ATOMIC_OP_RETURN(op, asm_op, c_op, I)
+
+ATOMIC_OPS(add, add, +, i)
+ATOMIC_OPS(sub, add, +, -i)
+
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
+#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
+#define arch_atomic_fetch_add_release arch_atomic_fetch_add_release
+#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire
+#define arch_atomic_add_return_release arch_atomic_add_return_release
+#define arch_atomic_sub_return_release arch_atomic_sub_return_release
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP_RETURN
+
+#define ATOMIC_OPS(op, asm_op, I) \
+ ATOMIC_FETCH_OP(op, asm_op, I)
+
+ATOMIC_OPS(and, and, i)
+ATOMIC_OPS( or, or, i)
+ATOMIC_OPS(xor, xor, i)
+
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
+#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
+#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
+#define arch_atomic_fetch_and_release arch_atomic_fetch_and_release
+#define arch_atomic_fetch_or_release arch_atomic_fetch_or_release
+#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
+#undef ATOMIC_OPS
+
+#undef ATOMIC_FETCH_OP
+
+#define ATOMIC_OP(size) \
+static __always_inline \
+int arch_atomic_xchg_relaxed(atomic_t *v, int n) \
+{ \
+ return __xchg_relaxed(n, &(v->counter), size); \
+} \
+static __always_inline \
+int arch_atomic_xchg_acquire(atomic_t *v, int n) \
+{ \
+ return __xchg_acquire(n, &(v->counter), size); \
+} \
+static __always_inline \
+int arch_atomic_xchg_release(atomic_t *v, int n) \
+{ \
+ return __xchg_release(n, &(v->counter), size); \
+} \
+static __always_inline \
+int arch_atomic_xchg(atomic_t *v, int n) \
+{ \
+ return __xchg(n, &(v->counter), size); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg_relaxed(&(v->counter), o, n, size); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg_acquire(&(v->counter), o, n, size); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg_release(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg_release(&(v->counter), o, n, size); \
+} \
+static __always_inline \
+int arch_atomic_cmpxchg(atomic_t *v, int o, int n) \
+{ \
+ return __cmpxchg(&(v->counter), o, n, size); \
+}
+
+#define ATOMIC_OPS() \
+ ATOMIC_OP(4)
+
+ATOMIC_OPS()
+
+#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
+#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire
+#define arch_atomic_xchg_release arch_atomic_xchg_release
+#define arch_atomic_xchg arch_atomic_xchg
+#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
+#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
+
+#undef ATOMIC_OPS
+#undef ATOMIC_OP
+
+#else
+# include <asm-generic/atomic.h>
+#endif
+
+#endif /* __ASM_CSKY_ATOMIC_H */
--
2.25.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] csky: atomic: Add custom atomic.h implementation
2022-04-06 12:54 [PATCH] csky: atomic: Add custom atomic.h implementation guoren
@ 2022-04-11 9:40 ` Mark Rutland
0 siblings, 0 replies; 2+ messages in thread
From: Mark Rutland @ 2022-04-11 9:40 UTC (permalink / raw)
To: guoren; +Cc: arnd, peterz, linux-csky, linux-arch, linux-kernel, Guo Ren
On Wed, Apr 06, 2022 at 08:54:36PM +0800, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
>
> The generic atomic.h used cmpxchg to implement the atomic
> operations, it will cause daul loop to reduce the forward
> guarantee. The patch implement csky custom atomic operations with
> ldex/stex instructions for the best performance.
>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> ---
> arch/csky/include/asm/atomic.h | 251 +++++++++++++++++++++++++++++++++
> 1 file changed, 251 insertions(+)
> create mode 100644 arch/csky/include/asm/atomic.h
> +static __always_inline \
> +int arch_atomic_fetch_##op(int i, atomic_t *v) \
> +{ \
> + register int ret, tmp; \
> + __asm__ __volatile__ ( \
> + "1: ldex.w %0, (%3) \n" \
> + ACQUIRE_FENCE \
> + " mov %1, %0 \n" \
> + " " #op " %0, %2 \n" \
> + RELEASE_FENCE \
> + " stex.w %0, (%3) \n" \
> + " bez %0, 1b \n" \
> + : "=&r" (tmp), "=&r" (ret) \
> + : "r" (I), "r"(&v->counter) \
> + : "memory"); \
> + return ret; \
> +}
I believe this suffers the problem described in:
8e86f0b409a44193 ("arm64: atomics: fix use of acquire + release for full barrier semantics")
... and does not provide FULL ordering semantics.
Thanks,
Mark.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-04-11 9:41 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-06 12:54 [PATCH] csky: atomic: Add custom atomic.h implementation guoren
2022-04-11 9:40 ` Mark Rutland
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.