All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}()
@ 2021-07-29  9:30 Rui Wang
  2021-07-29  9:55 ` Will Deacon
  0 siblings, 1 reply; 8+ messages in thread
From: Rui Wang @ 2021-07-29  9:30 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon, Arnd Bergmann
  Cc: Waiman Long, Boqun Feng, Guo Ren, linux-arch, linux-kernel,
	Rui Wang, Rui Wang, Xuefeng Li, Huacai Chen, Jiaxun Yang,
	Huacai Chen, kernel test robot

This patch introduce a new atomic primitive andnot_or:

 * atomic_andnot_or
 * atomic_fetch_andnot_or
 * atomic_fetch_andnot_or_relaxed
 * atomic_fetch_andnot_or_acquire
 * atomic_fetch_andnot_or_release
 * atomic64_andnot_or
 * atomic64_fetch_andnot_or
 * atomic64_fetch_andnot_or_relaxed
 * atomic64_fetch_andnot_or_acquire
 * atomic64_fetch_andnot_or_release
 * atomic_long_andnot_or
 * atomic_long_fetch_andnot_or
 * atomic_long_fetch_andnot_or_relaxed
 * atomic_long_fetch_andnot_or_acquire
 * atomic_long_fetch_andnot_or_release

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Rui Wang <wangrui@loongson.cn>
---
 include/asm-generic/atomic-instrumented.h |  72 +++++-
 include/asm-generic/atomic-long.h         |  62 ++++-
 include/linux/atomic-arch-fallback.h      | 262 +++++++++++++++++++++-
 lib/atomic64_test.c                       |  92 ++++----
 scripts/atomic/atomics.tbl                |   1 +
 scripts/atomic/fallbacks/andnot_or        |  25 +++
 6 files changed, 471 insertions(+), 43 deletions(-)
 create mode 100755 scripts/atomic/fallbacks/andnot_or

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index bc45af52c93b..8f5efade88b7 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -599,6 +599,41 @@ atomic_dec_if_positive(atomic_t *v)
 	return arch_atomic_dec_if_positive(v);
 }
 
+static __always_inline void
+atomic_andnot_or(int m, int o, atomic_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	arch_atomic_andnot_or(m, o, v);
+}
+
+static __always_inline int
+atomic_fetch_andnot_or(int m, int o, atomic_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	return arch_atomic_fetch_andnot_or(m, o, v);
+}
+
+static __always_inline int
+atomic_fetch_andnot_or_acquire(int m, int o, atomic_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	return arch_atomic_fetch_andnot_or_acquire(m, o, v);
+}
+
+static __always_inline int
+atomic_fetch_andnot_or_release(int m, int o, atomic_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	return arch_atomic_fetch_andnot_or_release(m, o, v);
+}
+
+static __always_inline int
+atomic_fetch_andnot_or_relaxed(int m, int o, atomic_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	return arch_atomic_fetch_andnot_or_relaxed(m, o, v);
+}
+
 static __always_inline s64
 atomic64_read(const atomic64_t *v)
 {
@@ -1177,6 +1212,41 @@ atomic64_dec_if_positive(atomic64_t *v)
 	return arch_atomic64_dec_if_positive(v);
 }
 
+static __always_inline void
+atomic64_andnot_or(s64 m, s64 o, atomic64_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	arch_atomic64_andnot_or(m, o, v);
+}
+
+static __always_inline s64
+atomic64_fetch_andnot_or(s64 m, s64 o, atomic64_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	return arch_atomic64_fetch_andnot_or(m, o, v);
+}
+
+static __always_inline s64
+atomic64_fetch_andnot_or_acquire(s64 m, s64 o, atomic64_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	return arch_atomic64_fetch_andnot_or_acquire(m, o, v);
+}
+
+static __always_inline s64
+atomic64_fetch_andnot_or_release(s64 m, s64 o, atomic64_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	return arch_atomic64_fetch_andnot_or_release(m, o, v);
+}
+
+static __always_inline s64
+atomic64_fetch_andnot_or_relaxed(s64 m, s64 o, atomic64_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	return arch_atomic64_fetch_andnot_or_relaxed(m, o, v);
+}
+
 #define xchg(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
@@ -1334,4 +1404,4 @@ atomic64_dec_if_positive(atomic64_t *v)
 })
 
 #endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
-// 1d7c3a25aca5c7fb031c307be4c3d24c7b48fcd5
+// 9c9792d0dcd1fb3de8eeda1225ebbd0d811fb941
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 073cf40f431b..0c61626b42d2 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -515,6 +515,36 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	return atomic64_dec_if_positive(v);
 }
 
+static __always_inline void
+atomic_long_andnot_or(long m, long o, atomic_long_t *v)
+{
+	atomic64_andnot_or(m, o, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_or(long m, long o, atomic_long_t *v)
+{
+	return atomic64_fetch_andnot_or(m, o, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_or_acquire(long m, long o, atomic_long_t *v)
+{
+	return atomic64_fetch_andnot_or_acquire(m, o, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_or_release(long m, long o, atomic_long_t *v)
+{
+	return atomic64_fetch_andnot_or_release(m, o, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_or_relaxed(long m, long o, atomic_long_t *v)
+{
+	return atomic64_fetch_andnot_or_relaxed(m, o, v);
+}
+
 #else /* CONFIG_64BIT */
 
 static __always_inline long
@@ -1009,6 +1039,36 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	return atomic_dec_if_positive(v);
 }
 
+static __always_inline void
+atomic_long_andnot_or(long m, long o, atomic_long_t *v)
+{
+	atomic_andnot_or(m, o, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_or(long m, long o, atomic_long_t *v)
+{
+	return atomic_fetch_andnot_or(m, o, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_or_acquire(long m, long o, atomic_long_t *v)
+{
+	return atomic_fetch_andnot_or_acquire(m, o, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_or_release(long m, long o, atomic_long_t *v)
+{
+	return atomic_fetch_andnot_or_release(m, o, v);
+}
+
+static __always_inline long
+atomic_long_fetch_andnot_or_relaxed(long m, long o, atomic_long_t *v)
+{
+	return atomic_fetch_andnot_or_relaxed(m, o, v);
+}
+
 #endif /* CONFIG_64BIT */
 #endif /* _ASM_GENERIC_ATOMIC_LONG_H */
-// a624200981f552b2c6be4f32fe44da8289f30d87
+// 3ab842342b36b655b902481be793ba7a04c5a88d
diff --git a/include/linux/atomic-arch-fallback.h b/include/linux/atomic-arch-fallback.h
index a3dba31df01e..93a68face24f 100644
--- a/include/linux/atomic-arch-fallback.h
+++ b/include/linux/atomic-arch-fallback.h
@@ -1250,6 +1250,136 @@ arch_atomic_dec_if_positive(atomic_t *v)
 #define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
 #endif
 
+#ifndef arch_atomic_andnot_or
+static __always_inline void
+arch_atomic_andnot_or(int m, int o, atomic_t *v)
+{
+	(void)({
+		int N, O = arch_atomic_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic_try_cmpxchg_relaxed(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic_andnot_or arch_atomic_andnot_or
+#endif
+
+#ifndef arch_atomic_fetch_andnot_or_relaxed
+#ifdef arch_atomic_fetch_andnot_or
+#define arch_atomic_fetch_andnot_or_acquire arch_atomic_fetch_andnot_or
+#define arch_atomic_fetch_andnot_or_release arch_atomic_fetch_andnot_or
+#define arch_atomic_fetch_andnot_or_relaxed arch_atomic_fetch_andnot_or
+#endif /* arch_atomic_fetch_andnot_or */
+
+#ifndef arch_atomic_fetch_andnot_or
+static __always_inline int
+arch_atomic_fetch_andnot_or(int m, int o, atomic_t *v)
+{
+	return ({
+		int N, O = arch_atomic_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic_try_cmpxchg(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic_fetch_andnot_or arch_atomic_fetch_andnot_or
+#endif
+
+#ifndef arch_atomic_fetch_andnot_or_acquire
+static __always_inline int
+arch_atomic_fetch_andnot_or_acquire(int m, int o, atomic_t *v)
+{
+	return ({
+		int N, O = arch_atomic_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic_try_cmpxchg_acquire(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic_fetch_andnot_or_acquire arch_atomic_fetch_andnot_or_acquire
+#endif
+
+#ifndef arch_atomic_fetch_andnot_or_release
+static __always_inline int
+arch_atomic_fetch_andnot_or_release(int m, int o, atomic_t *v)
+{
+	return ({
+		int N, O = arch_atomic_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic_try_cmpxchg_release(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic_fetch_andnot_or_release arch_atomic_fetch_andnot_or_release
+#endif
+
+#ifndef arch_atomic_fetch_andnot_or_relaxed
+static __always_inline int
+arch_atomic_fetch_andnot_or_relaxed(int m, int o, atomic_t *v)
+{
+	return ({
+		int N, O = arch_atomic_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic_try_cmpxchg_relaxed(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic_fetch_andnot_or_relaxed arch_atomic_fetch_andnot_or_relaxed
+#endif
+
+#else /* arch_atomic_fetch_andnot_or_relaxed */
+
+#ifndef arch_atomic_fetch_andnot_or_acquire
+static __always_inline int
+arch_atomic_fetch_andnot_or_acquire(int m, int o, atomic_t *v)
+{
+	int ret = arch_atomic_fetch_andnot_or_relaxed(m, o, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define arch_atomic_fetch_andnot_or_acquire arch_atomic_fetch_andnot_or_acquire
+#endif
+
+#ifndef arch_atomic_fetch_andnot_or_release
+static __always_inline int
+arch_atomic_fetch_andnot_or_release(int m, int o, atomic_t *v)
+{
+	__atomic_release_fence();
+	return arch_atomic_fetch_andnot_or_relaxed(m, o, v);
+}
+#define arch_atomic_fetch_andnot_or_release arch_atomic_fetch_andnot_or_release
+#endif
+
+#ifndef arch_atomic_fetch_andnot_or
+static __always_inline int
+arch_atomic_fetch_andnot_or(int m, int o, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic_fetch_andnot_or_relaxed(m, o, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define arch_atomic_fetch_andnot_or arch_atomic_fetch_andnot_or
+#endif
+
+#endif /* arch_atomic_fetch_andnot_or_relaxed */
+
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
@@ -2357,5 +2487,135 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
 #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 #endif
 
+#ifndef arch_atomic64_andnot_or
+static __always_inline void
+arch_atomic64_andnot_or(s64 m, s64 o, atomic64_t *v)
+{
+	(void)({
+		s64 N, O = arch_atomic64_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic64_try_cmpxchg_relaxed(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic64_andnot_or arch_atomic64_andnot_or
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_or_relaxed
+#ifdef arch_atomic64_fetch_andnot_or
+#define arch_atomic64_fetch_andnot_or_acquire arch_atomic64_fetch_andnot_or
+#define arch_atomic64_fetch_andnot_or_release arch_atomic64_fetch_andnot_or
+#define arch_atomic64_fetch_andnot_or_relaxed arch_atomic64_fetch_andnot_or
+#endif /* arch_atomic64_fetch_andnot_or */
+
+#ifndef arch_atomic64_fetch_andnot_or
+static __always_inline s64
+arch_atomic64_fetch_andnot_or(s64 m, s64 o, atomic64_t *v)
+{
+	return ({
+		s64 N, O = arch_atomic64_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic64_try_cmpxchg(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic64_fetch_andnot_or arch_atomic64_fetch_andnot_or
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_or_acquire
+static __always_inline s64
+arch_atomic64_fetch_andnot_or_acquire(s64 m, s64 o, atomic64_t *v)
+{
+	return ({
+		s64 N, O = arch_atomic64_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic64_try_cmpxchg_acquire(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic64_fetch_andnot_or_acquire arch_atomic64_fetch_andnot_or_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_or_release
+static __always_inline s64
+arch_atomic64_fetch_andnot_or_release(s64 m, s64 o, atomic64_t *v)
+{
+	return ({
+		s64 N, O = arch_atomic64_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic64_try_cmpxchg_release(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic64_fetch_andnot_or_release arch_atomic64_fetch_andnot_or_release
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_or_relaxed
+static __always_inline s64
+arch_atomic64_fetch_andnot_or_relaxed(s64 m, s64 o, atomic64_t *v)
+{
+	return ({
+		s64 N, O = arch_atomic64_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!arch_atomic64_try_cmpxchg_relaxed(v, &O, N));
+		O;
+	});
+}
+#define arch_atomic64_fetch_andnot_or_relaxed arch_atomic64_fetch_andnot_or_relaxed
+#endif
+
+#else /* arch_atomic64_fetch_andnot_or_relaxed */
+
+#ifndef arch_atomic64_fetch_andnot_or_acquire
+static __always_inline s64
+arch_atomic64_fetch_andnot_or_acquire(s64 m, s64 o, atomic64_t *v)
+{
+	s64 ret = arch_atomic64_fetch_andnot_or_relaxed(m, o, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define arch_atomic64_fetch_andnot_or_acquire arch_atomic64_fetch_andnot_or_acquire
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_or_release
+static __always_inline s64
+arch_atomic64_fetch_andnot_or_release(s64 m, s64 o, atomic64_t *v)
+{
+	__atomic_release_fence();
+	return arch_atomic64_fetch_andnot_or_relaxed(m, o, v);
+}
+#define arch_atomic64_fetch_andnot_or_release arch_atomic64_fetch_andnot_or_release
+#endif
+
+#ifndef arch_atomic64_fetch_andnot_or
+static __always_inline s64
+arch_atomic64_fetch_andnot_or(s64 m, s64 o, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic64_fetch_andnot_or_relaxed(m, o, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define arch_atomic64_fetch_andnot_or arch_atomic64_fetch_andnot_or
+#endif
+
+#endif /* arch_atomic64_fetch_andnot_or_relaxed */
+
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// cca554917d7ea73d5e3e7397dd70c484cad9b2c4
+// 74f7ec8a3bee44a12678be13ca294c61c4a77941
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index d9d170238165..fedc83118a29 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -17,12 +17,18 @@
 #include <asm/cpufeature.h>	/* for boot_cpu_has below */
 #endif
 
-#define TEST(bit, op, c_op, val)				\
+#define COP(c_op1, c_op2, val1, val2...)			\
+do {								\
+	(void)(r c_op1 val1);					\
+	(void)(r c_op2 val2);					\
+} while (0)
+
+#define TEST(bit, op, c_op1, c_op2, args...)			\
 do {								\
 	atomic##bit##_set(&v, v0);				\
 	r = v0;							\
-	atomic##bit##_##op(val, &v);				\
-	r c_op val;						\
+	atomic##bit##_##op(args, &v);				\
+	COP(c_op1, c_op2, args);				\
 	WARN(atomic##bit##_read(&v) != r, "%Lx != %Lx\n",	\
 		(unsigned long long)atomic##bit##_read(&v),	\
 		(unsigned long long)r);				\
@@ -50,12 +56,12 @@ do {								\
 	BUG_ON(atomic##bit##_read(&v) != r);			\
 } while (0)
 
-#define TEST_FETCH(bit, op, c_op, val)				\
+#define TEST_FETCH(bit, op, c_op1, c_op2, args...)		\
 do {								\
 	atomic##bit##_set(&v, v0);				\
 	r = v0;							\
-	r c_op val;						\
-	BUG_ON(atomic##bit##_##op(val, &v) != v0);		\
+	COP(c_op1, c_op2, args);				\
+	BUG_ON(atomic##bit##_##op(args, &v) != v0);		\
 	BUG_ON(atomic##bit##_read(&v) != r);			\
 } while (0)
 
@@ -64,9 +70,9 @@ do {								\
 	FAMILY_TEST(TEST_RETURN, bit, op, c_op, val);		\
 } while (0)
 
-#define FETCH_FAMILY_TEST(bit, op, c_op, val)			\
+#define FETCH_FAMILY_TEST(bit, op, args...)			\
 do {								\
-	FAMILY_TEST(TEST_FETCH, bit, op, c_op, val);		\
+	FAMILY_TEST(TEST_FETCH, bit, op, args);			\
 } while (0)
 
 #define TEST_ARGS(bit, op, init, ret, expect, args...)		\
@@ -105,35 +111,38 @@ static __init void test_atomic(void)
 {
 	int v0 = 0xaaa31337;
 	int v1 = 0xdeadbeef;
+	int mask = 0x0000ffff;
 	int onestwos = 0x11112222;
 	int one = 1;
 
 	atomic_t v;
 	int r;
 
-	TEST(, add, +=, onestwos);
-	TEST(, add, +=, -one);
-	TEST(, sub, -=, onestwos);
-	TEST(, sub, -=, -one);
-	TEST(, or, |=, v1);
-	TEST(, and, &=, v1);
-	TEST(, xor, ^=, v1);
-	TEST(, andnot, &= ~, v1);
+	TEST(, add, +=, , onestwos);
+	TEST(, add, +=, , -one);
+	TEST(, sub, -=, , onestwos);
+	TEST(, sub, -=, , -one);
+	TEST(, or, |=, , v1);
+	TEST(, and, &=, , v1);
+	TEST(, xor, ^=, , v1);
+	TEST(, andnot, &= ~, , v1);
+	TEST(, andnot_or, &= ~, |=, mask, one);
 
 	RETURN_FAMILY_TEST(, add_return, +=, onestwos);
 	RETURN_FAMILY_TEST(, add_return, +=, -one);
 	RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
 	RETURN_FAMILY_TEST(, sub_return, -=, -one);
 
-	FETCH_FAMILY_TEST(, fetch_add, +=, onestwos);
-	FETCH_FAMILY_TEST(, fetch_add, +=, -one);
-	FETCH_FAMILY_TEST(, fetch_sub, -=, onestwos);
-	FETCH_FAMILY_TEST(, fetch_sub, -=, -one);
+	FETCH_FAMILY_TEST(, fetch_add, +=, , onestwos);
+	FETCH_FAMILY_TEST(, fetch_add, +=, , -one);
+	FETCH_FAMILY_TEST(, fetch_sub, -=, , onestwos);
+	FETCH_FAMILY_TEST(, fetch_sub, -=, , -one);
 
-	FETCH_FAMILY_TEST(, fetch_or,  |=, v1);
-	FETCH_FAMILY_TEST(, fetch_and, &=, v1);
-	FETCH_FAMILY_TEST(, fetch_andnot, &= ~, v1);
-	FETCH_FAMILY_TEST(, fetch_xor, ^=, v1);
+	FETCH_FAMILY_TEST(, fetch_or,  |=, , v1);
+	FETCH_FAMILY_TEST(, fetch_and, &=, , v1);
+	FETCH_FAMILY_TEST(, fetch_andnot, &= ~, , v1);
+	FETCH_FAMILY_TEST(, fetch_xor, ^=, , v1);
+	FETCH_FAMILY_TEST(, fetch_andnot_or, &= ~, |=, mask, one);
 
 	INC_RETURN_FAMILY_TEST(, v0);
 	DEC_RETURN_FAMILY_TEST(, v0);
@@ -150,6 +159,7 @@ static __init void test_atomic64(void)
 	long long v1 = 0xdeadbeefdeafcafeLL;
 	long long v2 = 0xfaceabadf00df001LL;
 	long long v3 = 0x8000000000000000LL;
+	long long mask = 0x00000000ffffffffLL;
 	long long onestwos = 0x1111111122222222LL;
 	long long one = 1LL;
 	int r_int;
@@ -163,29 +173,31 @@ static __init void test_atomic64(void)
 	BUG_ON(v.counter != r);
 	BUG_ON(atomic64_read(&v) != r);
 
-	TEST(64, add, +=, onestwos);
-	TEST(64, add, +=, -one);
-	TEST(64, sub, -=, onestwos);
-	TEST(64, sub, -=, -one);
-	TEST(64, or, |=, v1);
-	TEST(64, and, &=, v1);
-	TEST(64, xor, ^=, v1);
-	TEST(64, andnot, &= ~, v1);
+	TEST(64, add, +=, , onestwos);
+	TEST(64, add, +=, , -one);
+	TEST(64, sub, -=, , onestwos);
+	TEST(64, sub, -=, , -one);
+	TEST(64, or, |=, , v1);
+	TEST(64, and, &=, , v1);
+	TEST(64, xor, ^=, , v1);
+	TEST(64, andnot, &= ~, , v1);
+	TEST(64, andnot_or, &= ~, |=, mask, one);
 
 	RETURN_FAMILY_TEST(64, add_return, +=, onestwos);
 	RETURN_FAMILY_TEST(64, add_return, +=, -one);
 	RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
 	RETURN_FAMILY_TEST(64, sub_return, -=, -one);
 
-	FETCH_FAMILY_TEST(64, fetch_add, +=, onestwos);
-	FETCH_FAMILY_TEST(64, fetch_add, +=, -one);
-	FETCH_FAMILY_TEST(64, fetch_sub, -=, onestwos);
-	FETCH_FAMILY_TEST(64, fetch_sub, -=, -one);
+	FETCH_FAMILY_TEST(64, fetch_add, +=, , onestwos);
+	FETCH_FAMILY_TEST(64, fetch_add, +=, , -one);
+	FETCH_FAMILY_TEST(64, fetch_sub, -=, , onestwos);
+	FETCH_FAMILY_TEST(64, fetch_sub, -=, , -one);
 
-	FETCH_FAMILY_TEST(64, fetch_or,  |=, v1);
-	FETCH_FAMILY_TEST(64, fetch_and, &=, v1);
-	FETCH_FAMILY_TEST(64, fetch_andnot, &= ~, v1);
-	FETCH_FAMILY_TEST(64, fetch_xor, ^=, v1);
+	FETCH_FAMILY_TEST(64, fetch_or,  |=, , v1);
+	FETCH_FAMILY_TEST(64, fetch_and, &=, , v1);
+	FETCH_FAMILY_TEST(64, fetch_andnot, &= ~, , v1);
+	FETCH_FAMILY_TEST(64, fetch_xor, ^=, , v1);
+	FETCH_FAMILY_TEST(64, fetch_andnot_or, &= ~, |=, mask, one);
 
 	INIT(v0);
 	atomic64_inc(&v);
diff --git a/scripts/atomic/atomics.tbl b/scripts/atomic/atomics.tbl
index fbee2f6190d9..db6fe1dfcdb4 100755
--- a/scripts/atomic/atomics.tbl
+++ b/scripts/atomic/atomics.tbl
@@ -39,3 +39,4 @@ inc_not_zero		b	v
 inc_unless_negative	b	v
 dec_unless_positive	b	v
 dec_if_positive		i	v
+andnot_or		vF	i:m	i:o	v
diff --git a/scripts/atomic/fallbacks/andnot_or b/scripts/atomic/fallbacks/andnot_or
new file mode 100755
index 000000000000..a835045208de
--- /dev/null
+++ b/scripts/atomic/fallbacks/andnot_or
@@ -0,0 +1,25 @@
+local try_order=${order}
+
+#
+# non-value returning atomics are implicity relaxed
+#
+if [ -z "${retstmt}" ]; then
+	try_order="_relaxed"
+	retstmt="(void)"
+fi
+
+cat <<EOF
+static __always_inline ${ret}
+${arch}${atomic}_${pfx}andnot_or${sfx}${order}(${int} m, ${int} o, ${atomic}_t *v)
+{
+	${retstmt}({
+		${int} N, O = ${arch}${atomic}_read(v);
+		do {
+			N = O;
+			N &= ~m;
+			N |= o;
+		} while (!${arch}${atomic}_try_cmpxchg${try_order}(v, &O, N));
+		O;
+	});
+}
+EOF
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}()
  2021-07-29  9:30 [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}() Rui Wang
@ 2021-07-29  9:55 ` Will Deacon
  2021-07-29 10:58   ` hev
                     ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Will Deacon @ 2021-07-29  9:55 UTC (permalink / raw)
  To: Rui Wang
  Cc: Peter Zijlstra, Ingo Molnar, Arnd Bergmann, Waiman Long,
	Boqun Feng, Guo Ren, linux-arch, linux-kernel, Rui Wang,
	Xuefeng Li, Huacai Chen, Jiaxun Yang, Huacai Chen,
	kernel test robot

On Thu, Jul 29, 2021 at 05:30:03PM +0800, Rui Wang wrote:
> This patch introduce a new atomic primitive andnot_or:
> 
>  * atomic_andnot_or
>  * atomic_fetch_andnot_or
>  * atomic_fetch_andnot_or_relaxed
>  * atomic_fetch_andnot_or_acquire
>  * atomic_fetch_andnot_or_release
>  * atomic64_andnot_or
>  * atomic64_fetch_andnot_or
>  * atomic64_fetch_andnot_or_relaxed
>  * atomic64_fetch_andnot_or_acquire
>  * atomic64_fetch_andnot_or_release
>  * atomic_long_andnot_or
>  * atomic_long_fetch_andnot_or
>  * atomic_long_fetch_andnot_or_relaxed
>  * atomic_long_fetch_andnot_or_acquire
>  * atomic_long_fetch_andnot_or_release
> 
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Rui Wang <wangrui@loongson.cn>
> ---
>  include/asm-generic/atomic-instrumented.h |  72 +++++-
>  include/asm-generic/atomic-long.h         |  62 ++++-
>  include/linux/atomic-arch-fallback.h      | 262 +++++++++++++++++++++-
>  lib/atomic64_test.c                       |  92 ++++----
>  scripts/atomic/atomics.tbl                |   1 +
>  scripts/atomic/fallbacks/andnot_or        |  25 +++
>  6 files changed, 471 insertions(+), 43 deletions(-)
>  create mode 100755 scripts/atomic/fallbacks/andnot_or

Please see my other comments on the other patches you posted:

https://lore.kernel.org/r/20210729093923.GD21151@willie-the-truck

Overall, I'm not thrilled to bits by extending the atomics API with
operations that cannot be implemented efficiently on any (?) architectures
and are only used by the qspinlock slowpath on machines with more than 16K
CPUs.

I also think we're lacking documentation justifying when you would use this
new primitive over e.g. a sub-word WRITE_ONCE() on architectures that
support those, especially for the non-returning variants.

Will

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}()
  2021-07-29  9:55 ` Will Deacon
@ 2021-07-29 10:58   ` hev
  2021-07-29 11:15   ` Peter Zijlstra
  2021-07-29 11:43   ` Arnd Bergmann
  2 siblings, 0 replies; 8+ messages in thread
From: hev @ 2021-07-29 10:58 UTC (permalink / raw)
  To: Will Deacon
  Cc: Rui Wang, Peter Zijlstra, Ingo Molnar, Arnd Bergmann,
	Waiman Long, Boqun Feng, Guo Ren, linux-arch, linux-kernel,
	Xuefeng Li, Huacai Chen, Jiaxun Yang, Huacai Chen,
	kernel test robot

Hi, Will,

On Thu, Jul 29, 2021 at 5:55 PM Will Deacon <will@kernel.org> wrote:
>
> On Thu, Jul 29, 2021 at 05:30:03PM +0800, Rui Wang wrote:
> > This patch introduce a new atomic primitive andnot_or:
> >
> >  * atomic_andnot_or
> >  * atomic_fetch_andnot_or
> >  * atomic_fetch_andnot_or_relaxed
> >  * atomic_fetch_andnot_or_acquire
> >  * atomic_fetch_andnot_or_release
> >  * atomic64_andnot_or
> >  * atomic64_fetch_andnot_or
> >  * atomic64_fetch_andnot_or_relaxed
> >  * atomic64_fetch_andnot_or_acquire
> >  * atomic64_fetch_andnot_or_release
> >  * atomic_long_andnot_or
> >  * atomic_long_fetch_andnot_or
> >  * atomic_long_fetch_andnot_or_relaxed
> >  * atomic_long_fetch_andnot_or_acquire
> >  * atomic_long_fetch_andnot_or_release
> >
> > Reported-by: kernel test robot <lkp@intel.com>
> > Signed-off-by: Rui Wang <wangrui@loongson.cn>
> > ---
> >  include/asm-generic/atomic-instrumented.h |  72 +++++-
> >  include/asm-generic/atomic-long.h         |  62 ++++-
> >  include/linux/atomic-arch-fallback.h      | 262 +++++++++++++++++++++-
> >  lib/atomic64_test.c                       |  92 ++++----
> >  scripts/atomic/atomics.tbl                |   1 +
> >  scripts/atomic/fallbacks/andnot_or        |  25 +++
> >  6 files changed, 471 insertions(+), 43 deletions(-)
> >  create mode 100755 scripts/atomic/fallbacks/andnot_or
>
> Please see my other comments on the other patches you posted:
>
> https://lore.kernel.org/r/20210729093923.GD21151@willie-the-truck
>
> Overall, I'm not thrilled to bits by extending the atomics API with
> operations that cannot be implemented efficiently on any (?) architectures
> and are only used by the qspinlock slowpath on machines with more than 16K
> CPUs.
>
> I also think we're lacking documentation justifying when you would use this
> new primitive over e.g. a sub-word WRITE_ONCE() on architectures that
> support those, especially for the non-returning variants.
>
> Will

I have tried to explain in another thread. At the beginning, I thought
about implementing xchg_mask for the sub-word xchg, but now I agree
that atomic andnot_or is clearer and more general.

Peter, what do you think?

Regards,
Rui

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}()
  2021-07-29  9:55 ` Will Deacon
  2021-07-29 10:58   ` hev
@ 2021-07-29 11:15   ` Peter Zijlstra
  2021-07-29 12:34     ` Will Deacon
  2021-07-29 11:43   ` Arnd Bergmann
  2 siblings, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2021-07-29 11:15 UTC (permalink / raw)
  To: Will Deacon
  Cc: Rui Wang, Ingo Molnar, Arnd Bergmann, Waiman Long, Boqun Feng,
	Guo Ren, linux-arch, linux-kernel, Rui Wang, Xuefeng Li,
	Huacai Chen, Jiaxun Yang, Huacai Chen, kernel test robot

On Thu, Jul 29, 2021 at 10:55:52AM +0100, Will Deacon wrote:

> Overall, I'm not thrilled to bits by extending the atomics API with
> operations that cannot be implemented efficiently on any (?) architectures
> and are only used by the qspinlock slowpath on machines with more than 16K
> CPUs.

My rationale for proposing this primitive is similar to the existence of
other composite atomic ops from the Misc (and refcount) class (as per
atomic_t.txt). They're common/performance sensitive operations that, on
LL/SC platforms, can be better implemented than a cmpxchg() loop.

Specifically here, it can be used to implement short xchg() in an
architecturally neutral way, but more importantly it provides fwd
progress on LL/SC, while most LL/SC based cmpxchg() implementations are
arguably broken there.

People seem to really struggle to implement that sanely.

It's such a shame we can't have the compiler generate sane composite
atomics for us..

> I also think we're lacking documentation justifying when you would use this
> new primitive over e.g. a sub-word WRITE_ONCE() on architectures that
> support those, especially for the non-returning variants.

Given the sub-word ordering 'fun', this might come in handy somewhere
:-) But yes, it's existence is more of a completeness/symmetry argument
than anything else.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}()
  2021-07-29  9:55 ` Will Deacon
  2021-07-29 10:58   ` hev
  2021-07-29 11:15   ` Peter Zijlstra
@ 2021-07-29 11:43   ` Arnd Bergmann
  2021-07-29 12:35     ` Will Deacon
  2 siblings, 1 reply; 8+ messages in thread
From: Arnd Bergmann @ 2021-07-29 11:43 UTC (permalink / raw)
  To: Will Deacon
  Cc: Rui Wang, Peter Zijlstra, Ingo Molnar, Arnd Bergmann,
	Waiman Long, Boqun Feng, Guo Ren, linux-arch,
	Linux Kernel Mailing List, Rui Wang, Xuefeng Li, Huacai Chen,
	Jiaxun Yang, Huacai Chen, kernel test robot

On Thu, Jul 29, 2021 at 11:56 AM Will Deacon <will@kernel.org> wrote:
> On Thu, Jul 29, 2021 at 05:30:03PM +0800, Rui Wang wrote:
> > This patch introduce a new atomic primitive andnot_or:
>
> Please see my other comments on the other patches you posted:
>
> https://lore.kernel.org/r/20210729093923.GD21151@willie-the-truck
>
> Overall, I'm not thrilled to bits by extending the atomics API with
> operations that cannot be implemented efficiently on any (?) architectures
> and are only used by the qspinlock slowpath on machines with more than 16K
> CPUs.

Wouldn't this also help improve set_mask_bits()? That one at least has
a handful of users in the kernel.

       Arnd

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}()
  2021-07-29 11:15   ` Peter Zijlstra
@ 2021-07-29 12:34     ` Will Deacon
  0 siblings, 0 replies; 8+ messages in thread
From: Will Deacon @ 2021-07-29 12:34 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Rui Wang, Ingo Molnar, Arnd Bergmann, Waiman Long, Boqun Feng,
	Guo Ren, linux-arch, linux-kernel, Rui Wang, Xuefeng Li,
	Huacai Chen, Jiaxun Yang, Huacai Chen, kernel test robot

On Thu, Jul 29, 2021 at 01:15:07PM +0200, Peter Zijlstra wrote:
> On Thu, Jul 29, 2021 at 10:55:52AM +0100, Will Deacon wrote:
> 
> > Overall, I'm not thrilled to bits by extending the atomics API with
> > operations that cannot be implemented efficiently on any (?) architectures
> > and are only used by the qspinlock slowpath on machines with more than 16K
> > CPUs.
> 
> My rationale for proposing this primitive is similar to the existence of
> other composite atomic ops from the Misc (and refcount) class (as per
> atomic_t.txt). They're common/performance sensitive operations that, on
> LL/SC platforms, can be better implemented than a cmpxchg() loop.
> 
> Specifically here, it can be used to implement short xchg() in an
> architecturally neutral way, but more importantly it provides fwd
> progress on LL/SC, while most LL/SC based cmpxchg() implementations are
> arguably broken there.

Well, assuming the CPU provides forward progress for LL/SC which is _very_
rare (i.e. Power). If you implement LL/SC in your L1 it's really hard to
get forward progress guarantees once your micro-architecture starts being
aggressive about speculation.

For arm64, I would prefer the CAS loop to the LL/SC version, but we actually
have short xchg() so I would much prefer that people used that! So my worry
is that we start seeing users of this new thing crop up all over the place
and it's not at all obvious that it's much worse than xchg().

Will

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}()
  2021-07-29 11:43   ` Arnd Bergmann
@ 2021-07-29 12:35     ` Will Deacon
  2021-07-30  2:50       ` hev
  0 siblings, 1 reply; 8+ messages in thread
From: Will Deacon @ 2021-07-29 12:35 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Rui Wang, Peter Zijlstra, Ingo Molnar, Waiman Long, Boqun Feng,
	Guo Ren, linux-arch, Linux Kernel Mailing List, Rui Wang,
	Xuefeng Li, Huacai Chen, Jiaxun Yang, Huacai Chen,
	kernel test robot

On Thu, Jul 29, 2021 at 01:43:41PM +0200, Arnd Bergmann wrote:
> On Thu, Jul 29, 2021 at 11:56 AM Will Deacon <will@kernel.org> wrote:
> > On Thu, Jul 29, 2021 at 05:30:03PM +0800, Rui Wang wrote:
> > > This patch introduce a new atomic primitive andnot_or:
> >
> > Please see my other comments on the other patches you posted:
> >
> > https://lore.kernel.org/r/20210729093923.GD21151@willie-the-truck
> >
> > Overall, I'm not thrilled to bits by extending the atomics API with
> > operations that cannot be implemented efficiently on any (?) architectures
> > and are only used by the qspinlock slowpath on machines with more than 16K
> > CPUs.
> 
> Wouldn't this also help improve set_mask_bits()? That one at least has
> a handful of users in the kernel.

For pure LL/SC architectures, yes, but I don't think it helps anybody else.

Afaict, an architecture can already override set_mask_bits, so why do we
need to add this primitive to the atomic API?

Will

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}()
  2021-07-29 12:35     ` Will Deacon
@ 2021-07-30  2:50       ` hev
  0 siblings, 0 replies; 8+ messages in thread
From: hev @ 2021-07-30  2:50 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Will Deacon, Arnd Bergmann, Rui Wang, Ingo Molnar, Waiman Long,
	Boqun Feng, Guo Ren, linux-arch, Linux Kernel Mailing List,
	Xuefeng Li, Huacai Chen, Jiaxun Yang, Huacai Chen,
	kernel test robot

Hi,

On Thu, Jul 29, 2021 at 8:35 PM Will Deacon <will@kernel.org> wrote:
>
> On Thu, Jul 29, 2021 at 01:43:41PM +0200, Arnd Bergmann wrote:
> > On Thu, Jul 29, 2021 at 11:56 AM Will Deacon <will@kernel.org> wrote:
> > > On Thu, Jul 29, 2021 at 05:30:03PM +0800, Rui Wang wrote:
> > > > This patch introduce a new atomic primitive andnot_or:
> > >
> > > Please see my other comments on the other patches you posted:
> > >
> > > https://lore.kernel.org/r/20210729093923.GD21151@willie-the-truck
> > >
> > > Overall, I'm not thrilled to bits by extending the atomics API with
> > > operations that cannot be implemented efficiently on any (?) architectures
> > > and are only used by the qspinlock slowpath on machines with more than 16K
> > > CPUs.
> >
> > Wouldn't this also help improve set_mask_bits()? That one at least has
> > a handful of users in the kernel.
>
> For pure LL/SC architectures, yes, but I don't think it helps anybody else.
>
> Afaict, an architecture can already override set_mask_bits, so why do we
> need to add this primitive to the atomic API?
>
> Will

So what's next?

Now the set_mask_bits return oldval, return newval before 5.0. :-D
commit 1db604f676b("include/linux/bitops.h: set_mask_bits() to return
old value")

Regards,
Rui

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-07-30  2:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-29  9:30 [RFC PATCH v3] locking/atomic: Implement atomic{,64,_long}_{fetch_,}{andnot_or}{,_relaxed,_acquire,_release}() Rui Wang
2021-07-29  9:55 ` Will Deacon
2021-07-29 10:58   ` hev
2021-07-29 11:15   ` Peter Zijlstra
2021-07-29 12:34     ` Will Deacon
2021-07-29 11:43   ` Arnd Bergmann
2021-07-29 12:35     ` Will Deacon
2021-07-30  2:50       ` hev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.