[Qemu-devel] [PATCH v2 10/27] tcg: Add atomic128 helpers

From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: cota@braap.org, alex.bennee@linaro.org, pbonzini@redhat.com,
	peter.maydell@linaro.org, serge.fdrv@gmail.com
Subject: [Qemu-devel] [PATCH v2 10/27] tcg: Add atomic128 helpers
Date: Fri,  1 Jul 2016 10:04:36 -0700	[thread overview]
Message-ID: <1467392693-22715-11-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1467392693-22715-1-git-send-email-rth@twiddle.net>

Force the use of cmpxchg16b on x86_64.

Wikipedia suggests that only very old AMD64 (circa 2004) did not have
this instruction.  Further, it's required by Windows 8 so no new cpus
will ever omit it.

If we truely care about these, then we could check this at startup time
and then avoid executing paths that use it.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 configure             |  29 ++++++++++++-
 cputlb.c              |   6 +++
 include/qemu/int128.h |   6 +++
 softmmu_template.h    | 110 +++++++++++++++++++++++++++++++++++++-------------
 tcg/tcg.h             |  22 ++++++++++
 5 files changed, 144 insertions(+), 29 deletions(-)

diff --git a/configure b/configure
index 59ea124..586abd6 100755
--- a/configure
+++ b/configure
@@ -1201,7 +1201,10 @@ case "$cpu" in
            cc_i386='$(CC) -m32'
            ;;
     x86_64)
-           CPU_CFLAGS="-m64"
+           # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+           # If we truly care, we should simply detect this case at
+           # runtime and generate the fallback to serial emulation.
+           CPU_CFLAGS="-m64 -mcx16"
            LDFLAGS="-m64 $LDFLAGS"
            cc_i386='$(CC) -m32'
            ;;
@@ -4434,6 +4437,26 @@ if compile_prog "" "" ; then
     int128=yes
 fi
 
+#########################################
+# See if 128-bit atomic operations are supported.
+
+atomic128=no
+if test "$int128" = "yes"; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  y = __atomic_load_16(&x, 0);
+  __atomic_store_16(&x, y, 0);
+  __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    atomic128=yes
+  fi
+fi
+
 ########################################
 # check if getauxval is available.
 
@@ -5383,6 +5406,10 @@ if test "$int128" = "yes" ; then
   echo "CONFIG_INT128=y" >> $config_host_mak
 fi
 
+if test "$atomic128" = "yes" ; then
+  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
+fi
+
 if test "$getauxval" = "yes" ; then
   echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
diff --git a/cputlb.c b/cputlb.c
index 5272456..660f824 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -510,6 +510,12 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+
+#ifdef CONFIG_ATOMIC128
+#define SHIFT 4
+#include "softmmu_template.h"
+#endif
+
 #undef MMUSUFFIX
 
 #define MMUSUFFIX _cmmu
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index ab67275..5819da4 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -2,6 +2,7 @@
 #define INT128_H
 
 #ifdef CONFIG_INT128
+#include "qemu/bswap.h"
 
 typedef __int128 Int128;
 
@@ -137,6 +138,11 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
     *a -= b;
 }
 
+static inline Int128 bswap128(Int128 a)
+{
+    return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a)));
+}
+
 #else /* !CONFIG_INT128 */
 
 /* Here we are catering to the ABI of the host.  If the host returns
diff --git a/softmmu_template.h b/softmmu_template.h
index 76712b9..0a9f49b 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -27,25 +27,30 @@
 
 #define DATA_SIZE (1 << SHIFT)
 
-#if DATA_SIZE == 8
-#define SUFFIX q
-#define LSUFFIX q
-#define SDATA_TYPE  int64_t
+#if DATA_SIZE == 16
+#define SUFFIX     o
+#define LSUFFIX    o
+#define SDATA_TYPE Int128
+#define DATA_TYPE  Int128
+#elif DATA_SIZE == 8
+#define SUFFIX     q
+#define LSUFFIX    q
+#define SDATA_TYPE int64_t
 #define DATA_TYPE  uint64_t
 #elif DATA_SIZE == 4
-#define SUFFIX l
-#define LSUFFIX l
-#define SDATA_TYPE  int32_t
+#define SUFFIX     l
+#define LSUFFIX    l
+#define SDATA_TYPE int32_t
 #define DATA_TYPE  uint32_t
 #elif DATA_SIZE == 2
-#define SUFFIX w
-#define LSUFFIX uw
-#define SDATA_TYPE  int16_t
+#define SUFFIX     w
+#define LSUFFIX    uw
+#define SDATA_TYPE int16_t
 #define DATA_TYPE  uint16_t
 #elif DATA_SIZE == 1
-#define SUFFIX b
-#define LSUFFIX ub
-#define SDATA_TYPE  int8_t
+#define SUFFIX     b
+#define LSUFFIX    ub
+#define SDATA_TYPE int8_t
 #define DATA_TYPE  uint8_t
 #else
 #error unsupported data size
@@ -56,7 +61,7 @@
    to the register size of the host.  This is tcg_target_long, except in the
    case of a 32-bit host and 64-bit data, and for that we always have
    uint64_t.  Don't bother with this widened value for SOFTMMU_CODE_ACCESS.  */
-#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
+#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE >= 8
 # define WORD_TYPE  DATA_TYPE
 # define USUFFIX    SUFFIX
 #else
@@ -73,7 +78,9 @@
 #define ADDR_READ addr_read
 #endif
 
-#if DATA_SIZE == 8
+#if DATA_SIZE == 16
+# define BSWAP(X)  bswap128(X)
+#elif DATA_SIZE == 8
 # define BSWAP(X)  bswap64(X)
 #elif DATA_SIZE == 4
 # define BSWAP(X)  bswap32(X)
@@ -140,6 +147,7 @@
     vidx >= 0;                                                                \
 })
 
+#if DATA_SIZE < 16
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               CPUIOTLBEntry *iotlbentry,
@@ -307,9 +315,10 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
     return res;
 }
 #endif /* DATA_SIZE > 1 */
+#endif /* DATA_SIZE < 16 */
 
 #ifndef SOFTMMU_CODE_ACCESS
-
+#if DATA_SIZE < 16
 /* Provide signed versions of the load routines as well.  We can of course
    avoid this for 64-bit data, or for 32-bit data on 32-bit host.  */
 #if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
@@ -507,6 +516,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
     }
 }
 #endif
+#endif /* DATA_SIZE < 16 */
 
 #if DATA_SIZE == 1
 # define HE_SUFFIX  _mmu
@@ -573,9 +583,30 @@ DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
      TCGMemOpIdx oi, uintptr_t retaddr)
 {
     ATOMIC_MMU_BODY;
+#if DATA_SIZE < 16
     return atomic_cmpxchg(haddr, cmpv, newv);
+#else
+    __atomic_compare_exchange(haddr, &cmpv, &newv, false,
+                              __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+    return cmpv;
+#endif
 }
 
+#if DATA_SIZE > 1
+DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv,
+     TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE retv;
+    cmpv = BSWAP(cmpv);
+    newv = BSWAP(newv);
+    retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
+            (env, addr, cmpv, newv, oi, retaddr));
+    return BSWAP(retv);
+}
+#endif
+
+#if DATA_SIZE < 16
 #define GEN_ATOMIC_HELPER(NAME)                                         \
 DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), HE_SUFFIX)     \
     (CPUArchState *env, target_ulong addr, DATA_TYPE val,               \
@@ -600,18 +631,6 @@ GEN_ATOMIC_HELPER(xchg)
 #undef GEN_ATOMIC_HELPER
 
 #if DATA_SIZE > 1
-DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX)
-    (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv,
-     TCGMemOpIdx oi, uintptr_t retaddr)
-{
-    DATA_TYPE retv;
-    cmpv = BSWAP(cmpv);
-    newv = BSWAP(newv);
-    retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
-            (env, addr, cmpv, newv, oi, retaddr));
-    return BSWAP(retv);
-}
-
 #define GEN_ATOMIC_HELPER(NAME)                                         \
 DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), RE_SUFFIX)     \
     (CPUArchState *env, target_ulong addr, DATA_TYPE val,               \
@@ -676,6 +695,41 @@ DATA_TYPE glue(glue(helper_atomic_add_fetch, SUFFIX), RE_SUFFIX)
     }
 }
 #endif /* DATA_SIZE > 1 */
+#else /* DATA_SIZE >= 16 */
+DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE res;
+    ATOMIC_MMU_BODY;
+    __atomic_load(haddr, &res, __ATOMIC_RELAXED);
+    return res;
+}
+
+DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), RE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE res;
+    res = (glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX)
+           (env, addr, oi, retaddr));
+    return BSWAP(res);
+}
+
+void glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, DATA_TYPE val,
+     TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    ATOMIC_MMU_BODY;
+    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+}
+
+void glue(glue(helper_atomic_st, SUFFIX), RE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, DATA_TYPE val,
+     TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    (glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX)
+     (env, addr, BSWAP(val), oi, retaddr));
+}
+#endif /* DATA_SIZE < 16 */
 
 #undef ATOMIC_MMU_BODY
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 4e60498..1304a42 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -1216,6 +1216,28 @@ GEN_ATOMIC_HELPER_ALL(xchg)
 #undef GEN_ATOMIC_HELPER_ALL
 #undef GEN_ATOMIC_HELPER
 
+#ifdef CONFIG_ATOMIC128
+#include "qemu/int128.h"
+
+/* These aren't really a "proper" helpers because TCG cannot manage Int128.
+   However, use the same format as the others, for use by the backends. */
+Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv,
+                                     TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv,
+                                     TCGMemOpIdx oi, uintptr_t retaddr);
+
+Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
+                                TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
+                                TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                              TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                              TCGMemOpIdx oi, uintptr_t retaddr);
+
+#endif /* CONFIG_ATOMIC128 */
 #endif /* CONFIG_SOFTMMU */
 
 #endif /* TCG_H */
-- 
2.5.5