[PATCH v3 0/2] accel/tcg: Improvements to atomic128.h

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h
@ 2023-05-24 18:32 Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Richard Henderson @ 2023-05-24 18:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, alex.bennee

Changes for v3:
  * Most of the v2 patch set merged, except x86_64 atomic128-ldst.h,
    which failed testing with clang-11 with debian 11.

  * New patch to change __int128_t detection.

  * This in turn enabled CONFIG_ATOMIC128, which was not ideal.
    This clang bug/mis-feature of using a cmpxchg sequence for
    implementing __atomic_load_n was already noted for aarch64,
    so I should have expected it would also be true for x86_64.
    Given that I am adding inline assembly for CPUINFO_ATOMIC_VMOVDQA
    anyway, this isn't a big deal, but I did need to adjust the ifdefs.


r~


Richard Henderson (2):
  meson: Split test for __int128_t type from __int128_t arithmetic
  qemu/atomic128: Add x86_64 atomic128-ldst.h

 meson.build                               | 15 +++--
 host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++
 include/qemu/int128.h                     |  4 +-
 3 files changed, 80 insertions(+), 7 deletions(-)
 create mode 100644 host/include/x86_64/host/atomic128-ldst.h

-- 
2.34.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic
  2023-05-24 18:32 [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
@ 2023-05-24 18:32 ` Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 2/2] qemu/atomic128: Add x86_64 atomic128-ldst.h Richard Henderson
  2023-05-26  0:24 ` [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2023-05-24 18:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, alex.bennee

Older versions of clang have missing runtime functions for arithmetic
with -fsanitize=undefined (see 464e3671f9d5c), so we cannot use
__int128_t for implementing Int128.  But __int128_t is present,
data movement works, and can be use for atomic128.

Probe for both CONFIG_INT128_TYPE and CONFIG_INT128, adjust
qemu/int128.h to define Int128Alias if CONFIG_INT128_TYPE,
and adjust the meson probe for atomics to use has_int128_type.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 meson.build           | 15 ++++++++++-----
 include/qemu/int128.h |  4 ++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/meson.build b/meson.build
index ef181ff2df..1f73c68a41 100644
--- a/meson.build
+++ b/meson.build
@@ -2536,7 +2536,13 @@ config_host_data.set('CONFIG_ATOMIC64', cc.links('''
     return 0;
   }'''))
 
-has_int128 = cc.links('''
+has_int128_type = cc.compiles('''
+  __int128_t a;
+  __uint128_t b;
+  int main(void) { b = a; }''')
+config_host_data.set('CONFIG_INT128_TYPE', has_int128_type)
+
+has_int128 = has_int128_type and cc.links('''
   __int128_t a;
   __uint128_t b;
   int main (void) {
@@ -2545,10 +2551,9 @@ has_int128 = cc.links('''
     a = a * a;
     return 0;
   }''')
-
 config_host_data.set('CONFIG_INT128', has_int128)
 
-if has_int128
+if has_int128_type
   # "do we have 128-bit atomics which are handled inline and specifically not
   # via libatomic". The reason we can't use libatomic is documented in the
   # comment starting "GCC is a house divided" in include/qemu/atomic128.h.
@@ -2557,7 +2562,7 @@ if has_int128
   # __alignof(unsigned __int128) for the host.
   atomic_test_128 = '''
     int main(int ac, char **av) {
-      unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16);
+      __uint128_t *p = __builtin_assume_aligned(av[ac - 1], 16);
       p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
       __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
       __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
@@ -2579,7 +2584,7 @@ if has_int128
       config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
         int main(void)
         {
-          unsigned __int128 x = 0, y = 0;
+          __uint128_t x = 0, y = 0;
           __sync_val_compare_and_swap_16(&x, y, x);
           return 0;
         }
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 9e46cfaefc..73624e8be7 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -481,7 +481,7 @@ static inline void bswap128s(Int128 *s)
  * a possible structure and the native types.  Ease parameter passing
  * via use of the transparent union extension.
  */
-#ifdef CONFIG_INT128
+#ifdef CONFIG_INT128_TYPE
 typedef union {
     __uint128_t u;
     __int128_t i;
@@ -489,6 +489,6 @@ typedef union {
 } Int128Alias __attribute__((transparent_union));
 #else
 typedef Int128 Int128Alias;
-#endif /* CONFIG_INT128 */
+#endif /* CONFIG_INT128_TYPE */
 
 #endif /* INT128_H */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v3 2/2] qemu/atomic128: Add x86_64 atomic128-ldst.h
  2023-05-24 18:32 [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic Richard Henderson
@ 2023-05-24 18:32 ` Richard Henderson
  2023-05-26  0:24 ` [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2023-05-24 18:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, alex.bennee

With CPUINFO_ATOMIC_VMOVDQA, we can perform proper atomic
load/store without cmpxchg16b.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 host/include/x86_64/host/atomic128-ldst.h

diff --git a/host/include/x86_64/host/atomic128-ldst.h b/host/include/x86_64/host/atomic128-ldst.h
new file mode 100644
index 0000000000..adc9332f91
--- /dev/null
+++ b/host/include/x86_64/host/atomic128-ldst.h
@@ -0,0 +1,68 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Load/store for 128-bit atomic operations, x86_64 version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef AARCH64_ATOMIC128_LDST_H
+#define AARCH64_ATOMIC128_LDST_H
+
+#ifdef CONFIG_INT128_TYPE
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+/*
+ * Through clang 16, with -mcx16, __atomic_load_n is incorrectly
+ * expanded to a read-write operation: lock cmpxchg16b.
+ */
+
+#define HAVE_ATOMIC128_RO  likely(cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
+#define HAVE_ATOMIC128_RW  1
+
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
+{
+    Int128Alias r;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr));
+
+    return r.s;
+}
+
+static inline Int128 atomic16_read_rw(Int128 *ptr)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias r;
+
+    if (HAVE_ATOMIC128_RO) {
+        asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
+    } else {
+        r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0);
+    }
+    return r.s;
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias new = { .s = val };
+
+    if (HAVE_ATOMIC128_RO) {
+        asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.i));
+    } else {
+        __int128_t old;
+        do {
+            old = *ptr_align;
+        } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
+    }
+}
+#else
+/* Provide QEMU_ERROR stubs. */
+#include "host/include/generic/host/atomic128-ldst.h"
+#endif
+
+#endif /* AARCH64_ATOMIC128_LDST_H */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h
  2023-05-24 18:32 [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 2/2] qemu/atomic128: Add x86_64 atomic128-ldst.h Richard Henderson
@ 2023-05-26  0:24 ` Richard Henderson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2023-05-26  0:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, alex.bennee

On 5/24/23 11:32, Richard Henderson wrote:
> Changes for v3:
>    * Most of the v2 patch set merged, except x86_64 atomic128-ldst.h,
>      which failed testing with clang-11 with debian 11.
> 
>    * New patch to change __int128_t detection.
> 
>    * This in turn enabled CONFIG_ATOMIC128, which was not ideal.
>      This clang bug/mis-feature of using a cmpxchg sequence for
>      implementing __atomic_load_n was already noted for aarch64,
>      so I should have expected it would also be true for x86_64.
>      Given that I am adding inline assembly for CPUINFO_ATOMIC_VMOVDQA
>      anyway, this isn't a big deal, but I did need to adjust the ifdefs.
> 
> 
> r~
> 
> 
> Richard Henderson (2):
>    meson: Split test for __int128_t type from __int128_t arithmetic
>    qemu/atomic128: Add x86_64 atomic128-ldst.h
> 
>   meson.build                               | 15 +++--
>   host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++
>   include/qemu/int128.h                     |  4 +-
>   3 files changed, 80 insertions(+), 7 deletions(-)
>   create mode 100644 host/include/x86_64/host/atomic128-ldst.h
> 

Superseded by
Message-Id: <20230526002334.1760495-1-richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-05-26  0:25 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-24 18:32 [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
2023-05-24 18:32 ` [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic Richard Henderson
2023-05-24 18:32 ` [PATCH v3 2/2] qemu/atomic128: Add x86_64 atomic128-ldst.h Richard Henderson
2023-05-26  0:24 ` [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.