All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PATCH for-8.0 13/29] meson: Detect atomic128 support with optimization
Date: Fri, 18 Nov 2022 01:47:38 -0800	[thread overview]
Message-ID: <20221118094754.242910-14-richard.henderson@linaro.org> (raw)
In-Reply-To: <20221118094754.242910-1-richard.henderson@linaro.org>

There is an edge condition prior to gcc13 for which optimization
is required to generate 16-byte atomic sequences.  Detect this.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/ldst_atomicity.c.inc | 38 ++++++++++++++++++-------
 meson.build                    | 52 ++++++++++++++++++++++------------
 2 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
index e6a7558399..68edab4398 100644
--- a/accel/tcg/ldst_atomicity.c.inc
+++ b/accel/tcg/ldst_atomicity.c.inc
@@ -16,6 +16,23 @@
 #endif
 #define HAVE_al8_fast      (ATOMIC_REG_SIZE >= 8)
 
+/*
+ * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
+ * that are supported by the host, e.g. s390x.  We can force the pointer to
+ * have our known alignment with __builtin_assume_aligned, however prior to
+ * GCC 13 that was only reliable with optimization enabled.  See
+ *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
+ */
+#if defined(CONFIG_ATOMIC128_OPT)
+# if !defined(__OPTIMIZE__)
+#  define ATTRIBUTE_ATOMIC128_OPT  __attribute__((optimize("O1")))
+# endif
+# define CONFIG_ATOMIC128
+#endif
+#ifndef ATTRIBUTE_ATOMIC128_OPT
+# define ATTRIBUTE_ATOMIC128_OPT
+#endif
+
 #if defined(CONFIG_ATOMIC128)
 # define HAVE_al16_fast    true
 #else
@@ -134,7 +151,8 @@ static inline uint64_t load_atomic8(void *pv)
  *
  * Atomically load 16 aligned bytes from @pv.
  */
-static inline Int128 load_atomic16(void *pv)
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+load_atomic16(void *pv)
 {
 #ifdef CONFIG_ATOMIC128
     __uint128_t *p = __builtin_assume_aligned(pv, 16);
@@ -336,7 +354,8 @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
  * cross an 16-byte boundary then the access must be 16-byte atomic,
  * otherwise the access must be 8-byte atomic.
  */
-static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
+load_atom_extract_al16_or_al8(void *pv, int s)
 {
 #if defined(CONFIG_ATOMIC128)
     uintptr_t pi = (uintptr_t)pv;
@@ -672,28 +691,24 @@ static inline void store_atomic8(void *pv, uint64_t val)
  *
  * Atomically store 16 aligned bytes to @pv.
  */
-static inline void store_atomic16(void *pv, Int128 val)
+static inline void ATTRIBUTE_ATOMIC128_OPT
+store_atomic16(void *pv, Int128Alias val)
 {
 #if defined(CONFIG_ATOMIC128)
     __uint128_t *pu = __builtin_assume_aligned(pv, 16);
-    Int128Alias new;
-
-    new.s = val;
-    qatomic_set__nocheck(pu, new.u);
+    qatomic_set__nocheck(pu, val.u);
 #elif defined(CONFIG_CMPXCHG128)
     __uint128_t *pu = __builtin_assume_aligned(pv, 16);
     __uint128_t o;
-    Int128Alias n;
 
     /*
      * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
      * defer to libatomic, so we must use __sync_val_compare_and_swap_16
      * and accept the sequential consistency that comes with it.
      */
-    n.s = val;
     do {
         o = *pu;
-    } while (!__sync_bool_compare_and_swap_16(pu, o, n.u));
+    } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
 #else
     qemu_build_not_reached();
 #endif
@@ -777,7 +792,8 @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
  *
  * Atomically store @val to @p masked by @msk.
  */
-static void store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
+static void ATTRIBUTE_ATOMIC128_OPT
+store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
 {
 #if defined(CONFIG_ATOMIC128)
     __uint128_t *pu, old, new;
diff --git a/meson.build b/meson.build
index 4984e80e71..503eeabd79 100644
--- a/meson.build
+++ b/meson.build
@@ -2215,23 +2215,21 @@ config_host_data.set('HAVE_BROKEN_SIZE_MAX', not cc.compiles('''
         return printf("%zu", SIZE_MAX);
     }''', args: ['-Werror']))
 
-atomic_test = '''
+# See if 64-bit atomic operations are supported.
+# Note that without __atomic builtins, we can only
+# assume atomic loads/stores max at pointer size.
+config_host_data.set('CONFIG_ATOMIC64', cc.links('''
   #include <stdint.h>
   int main(void)
   {
-    @0@ x = 0, y = 0;
+    uint64_t x = 0, y = 0;
     y = __atomic_load_n(&x, __ATOMIC_RELAXED);
     __atomic_store_n(&x, y, __ATOMIC_RELAXED);
     __atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
     __atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
     __atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
     return 0;
-  }'''
-
-# See if 64-bit atomic operations are supported.
-# Note that without __atomic builtins, we can only
-# assume atomic loads/stores max at pointer size.
-config_host_data.set('CONFIG_ATOMIC64', cc.links(atomic_test.format('uint64_t')))
+  }'''))
 
 has_int128 = cc.links('''
   __int128_t a;
@@ -2249,21 +2247,39 @@ if has_int128
   # "do we have 128-bit atomics which are handled inline and specifically not
   # via libatomic". The reason we can't use libatomic is documented in the
   # comment starting "GCC is a house divided" in include/qemu/atomic128.h.
-  has_atomic128 = cc.links(atomic_test.format('unsigned __int128'))
+  # We only care about these operations on 16-byte aligned pointers, so
+  # force 16-byte alignment of the pointer, which may be greater than
+  # __alignof(unsigned __int128) for the host.
+  atomic_test_128 = '''
+    int main(int ac, char **av) {
+      unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], sizeof(16));
+      p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
+      __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
+      __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+      return 0;
+    }'''
+  has_atomic128 = cc.links(atomic_test_128)
 
   config_host_data.set('CONFIG_ATOMIC128', has_atomic128)
 
   if not has_atomic128
-    has_cmpxchg128 = cc.links('''
-      int main(void)
-      {
-        unsigned __int128 x = 0, y = 0;
-        __sync_val_compare_and_swap_16(&x, y, x);
-        return 0;
-      }
-    ''')
+    # Even with __builtin_assume_aligned, the above test may have failed
+    # without optimization enabled.  Try again with optimizations locally
+    # enabled for the function.  See
+    #   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
+    has_atomic128_opt = cc.links('__attribute__((optimize("O1")))' + atomic_test_128)
+    config_host_data.set('CONFIG_ATOMIC128_OPT', has_atomic128_opt)
 
-    config_host_data.set('CONFIG_CMPXCHG128', has_cmpxchg128)
+    if not has_atomic128_opt
+      config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
+        int main(void)
+        {
+          unsigned __int128 x = 0, y = 0;
+          __sync_val_compare_and_swap_16(&x, y, x);
+          return 0;
+        }
+      '''))
+    endif
   endif
 endif
 
-- 
2.34.1



  parent reply	other threads:[~2022-11-18  9:51 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-18  9:47 [PATCH for-8.0 00/29] tcg: Improve atomicity support Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 01/29] include/qemu/cpuid: Introduce xgetbv_low Richard Henderson
2022-11-21 12:15   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 02/29] include/exec/memop: Add bits describing atomicity Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 03/29] accel/tcg: Add cpu_in_serial_context Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 04/29] accel/tcg: Introduce tlb_read_idx Richard Henderson
2022-11-21 12:25   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 05/29] accel/tcg: Reorg system mode load helpers Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 06/29] accel/tcg: Reorg system mode store helpers Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 07/29] accel/tcg: Honor atomicity of loads Richard Henderson
2022-11-22 14:35   ` Peter Maydell
2022-11-22 18:04     ` Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 08/29] accel/tcg: Honor atomicity of stores Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 09/29] tcg/tci: Use cpu_{ld,st}_mmu Richard Henderson
2022-11-21 12:40   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 10/29] tcg: Unify helper_{be,le}_{ld,st}* Richard Henderson
2022-11-21 12:48   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 11/29] accel/tcg: Implement helper_{ld, st}*_mmu for user-only Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 12/29] tcg: Add 128-bit guest memory primitives Richard Henderson
2022-11-22  3:30   ` Richard Henderson
2022-11-18  9:47 ` Richard Henderson [this message]
2022-11-18  9:47 ` [PATCH for-8.0 14/29] tcg/i386: Add have_atomic16 Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 15/29] include/qemu/int128: Add vector type to Int128Alias Richard Henderson
2022-11-21 23:45   ` Philippe Mathieu-Daudé
2022-11-22 18:21   ` Philippe Mathieu-Daudé
2022-11-22 18:31     ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 16/29] accel/tcg: Use have_atomic16 in ldst_atomicity.c.inc Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 17/29] tcg/aarch64: Add have_lse, have_lse2 Richard Henderson
2022-11-21 23:10   ` Philippe Mathieu-Daudé
2022-11-21 23:14     ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 18/29] accel/tcg: Add aarch64 specific support in ldst_atomicity Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 19/29] tcg: Introduce TCG_OPF_TYPE_MASK Richard Henderson
2022-11-21 16:12   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 20/29] tcg: Add INDEX_op_qemu_{ld,st}_i128 Richard Henderson
2022-11-21 22:59   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 21/29] tcg/i386: Introduce tcg_out_mov2 Richard Henderson
2022-11-21 16:21   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 22/29] tcg/i386: Introduce tcg_out_testi Richard Henderson
2022-11-21 16:22   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 23/29] tcg/i386: Use full load/store helpers in user-only mode Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 24/29] tcg/i386: Replace is64 with type in qemu_ld/st routines Richard Henderson
2022-11-21 16:27   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 25/29] tcg/i386: Mark Win64 call-saved vector regs as reserved Richard Henderson
2022-11-21 16:28   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 26/29] tcg/i386: Examine MemOp for atomicity and alignment Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 27/29] tcg/i386: Support 128-bit load/store with have_atomic16 Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 28/29] tcg/i386: Add vex_v argument to tcg_out_vex_modrm_pool Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 29/29] tcg/i386: Honor 64-bit atomicity in 32-bit mode Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221118094754.242910-14-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.