All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: cota@braap.org, alex.bennee@linaro.org, pbonzini@redhat.com,
	peter.maydell@linaro.org, serge.fdrv@gmail.com
Subject: [Qemu-devel] [PATCH v2 10/27] tcg: Add atomic128 helpers
Date: Fri,  1 Jul 2016 10:04:36 -0700	[thread overview]
Message-ID: <1467392693-22715-11-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1467392693-22715-1-git-send-email-rth@twiddle.net>

Force the use of cmpxchg16b on x86_64.

Wikipedia suggests that only very old AMD64 (circa 2004) did not have
this instruction.  Further, it's required by Windows 8 so no new cpus
will ever omit it.

If we truely care about these, then we could check this at startup time
and then avoid executing paths that use it.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 configure             |  29 ++++++++++++-
 cputlb.c              |   6 +++
 include/qemu/int128.h |   6 +++
 softmmu_template.h    | 110 +++++++++++++++++++++++++++++++++++++-------------
 tcg/tcg.h             |  22 ++++++++++
 5 files changed, 144 insertions(+), 29 deletions(-)

diff --git a/configure b/configure
index 59ea124..586abd6 100755
--- a/configure
+++ b/configure
@@ -1201,7 +1201,10 @@ case "$cpu" in
            cc_i386='$(CC) -m32'
            ;;
     x86_64)
-           CPU_CFLAGS="-m64"
+           # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+           # If we truly care, we should simply detect this case at
+           # runtime and generate the fallback to serial emulation.
+           CPU_CFLAGS="-m64 -mcx16"
            LDFLAGS="-m64 $LDFLAGS"
            cc_i386='$(CC) -m32'
            ;;
@@ -4434,6 +4437,26 @@ if compile_prog "" "" ; then
     int128=yes
 fi
 
+#########################################
+# See if 128-bit atomic operations are supported.
+
+atomic128=no
+if test "$int128" = "yes"; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  y = __atomic_load_16(&x, 0);
+  __atomic_store_16(&x, y, 0);
+  __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    atomic128=yes
+  fi
+fi
+
 ########################################
 # check if getauxval is available.
 
@@ -5383,6 +5406,10 @@ if test "$int128" = "yes" ; then
   echo "CONFIG_INT128=y" >> $config_host_mak
 fi
 
+if test "$atomic128" = "yes" ; then
+  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
+fi
+
 if test "$getauxval" = "yes" ; then
   echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
diff --git a/cputlb.c b/cputlb.c
index 5272456..660f824 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -510,6 +510,12 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+
+#ifdef CONFIG_ATOMIC128
+#define SHIFT 4
+#include "softmmu_template.h"
+#endif
+
 #undef MMUSUFFIX
 
 #define MMUSUFFIX _cmmu
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index ab67275..5819da4 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -2,6 +2,7 @@
 #define INT128_H
 
 #ifdef CONFIG_INT128
+#include "qemu/bswap.h"
 
 typedef __int128 Int128;
 
@@ -137,6 +138,11 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
     *a -= b;
 }
 
+static inline Int128 bswap128(Int128 a)
+{
+    return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a)));
+}
+
 #else /* !CONFIG_INT128 */
 
 /* Here we are catering to the ABI of the host.  If the host returns
diff --git a/softmmu_template.h b/softmmu_template.h
index 76712b9..0a9f49b 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -27,25 +27,30 @@
 
 #define DATA_SIZE (1 << SHIFT)
 
-#if DATA_SIZE == 8
-#define SUFFIX q
-#define LSUFFIX q
-#define SDATA_TYPE  int64_t
+#if DATA_SIZE == 16
+#define SUFFIX     o
+#define LSUFFIX    o
+#define SDATA_TYPE Int128
+#define DATA_TYPE  Int128
+#elif DATA_SIZE == 8
+#define SUFFIX     q
+#define LSUFFIX    q
+#define SDATA_TYPE int64_t
 #define DATA_TYPE  uint64_t
 #elif DATA_SIZE == 4
-#define SUFFIX l
-#define LSUFFIX l
-#define SDATA_TYPE  int32_t
+#define SUFFIX     l
+#define LSUFFIX    l
+#define SDATA_TYPE int32_t
 #define DATA_TYPE  uint32_t
 #elif DATA_SIZE == 2
-#define SUFFIX w
-#define LSUFFIX uw
-#define SDATA_TYPE  int16_t
+#define SUFFIX     w
+#define LSUFFIX    uw
+#define SDATA_TYPE int16_t
 #define DATA_TYPE  uint16_t
 #elif DATA_SIZE == 1
-#define SUFFIX b
-#define LSUFFIX ub
-#define SDATA_TYPE  int8_t
+#define SUFFIX     b
+#define LSUFFIX    ub
+#define SDATA_TYPE int8_t
 #define DATA_TYPE  uint8_t
 #else
 #error unsupported data size
@@ -56,7 +61,7 @@
    to the register size of the host.  This is tcg_target_long, except in the
    case of a 32-bit host and 64-bit data, and for that we always have
    uint64_t.  Don't bother with this widened value for SOFTMMU_CODE_ACCESS.  */
-#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
+#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE >= 8
 # define WORD_TYPE  DATA_TYPE
 # define USUFFIX    SUFFIX
 #else
@@ -73,7 +78,9 @@
 #define ADDR_READ addr_read
 #endif
 
-#if DATA_SIZE == 8
+#if DATA_SIZE == 16
+# define BSWAP(X)  bswap128(X)
+#elif DATA_SIZE == 8
 # define BSWAP(X)  bswap64(X)
 #elif DATA_SIZE == 4
 # define BSWAP(X)  bswap32(X)
@@ -140,6 +147,7 @@
     vidx >= 0;                                                                \
 })
 
+#if DATA_SIZE < 16
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               CPUIOTLBEntry *iotlbentry,
@@ -307,9 +315,10 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
     return res;
 }
 #endif /* DATA_SIZE > 1 */
+#endif /* DATA_SIZE < 16 */
 
 #ifndef SOFTMMU_CODE_ACCESS
-
+#if DATA_SIZE < 16
 /* Provide signed versions of the load routines as well.  We can of course
    avoid this for 64-bit data, or for 32-bit data on 32-bit host.  */
 #if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
@@ -507,6 +516,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
     }
 }
 #endif
+#endif /* DATA_SIZE < 16 */
 
 #if DATA_SIZE == 1
 # define HE_SUFFIX  _mmu
@@ -573,9 +583,30 @@ DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
      TCGMemOpIdx oi, uintptr_t retaddr)
 {
     ATOMIC_MMU_BODY;
+#if DATA_SIZE < 16
     return atomic_cmpxchg(haddr, cmpv, newv);
+#else
+    __atomic_compare_exchange(haddr, &cmpv, &newv, false,
+                              __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+    return cmpv;
+#endif
 }
 
+#if DATA_SIZE > 1
+DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv,
+     TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE retv;
+    cmpv = BSWAP(cmpv);
+    newv = BSWAP(newv);
+    retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
+            (env, addr, cmpv, newv, oi, retaddr));
+    return BSWAP(retv);
+}
+#endif
+
+#if DATA_SIZE < 16
 #define GEN_ATOMIC_HELPER(NAME)                                         \
 DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), HE_SUFFIX)     \
     (CPUArchState *env, target_ulong addr, DATA_TYPE val,               \
@@ -600,18 +631,6 @@ GEN_ATOMIC_HELPER(xchg)
 #undef GEN_ATOMIC_HELPER
 
 #if DATA_SIZE > 1
-DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX)
-    (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv,
-     TCGMemOpIdx oi, uintptr_t retaddr)
-{
-    DATA_TYPE retv;
-    cmpv = BSWAP(cmpv);
-    newv = BSWAP(newv);
-    retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX)
-            (env, addr, cmpv, newv, oi, retaddr));
-    return BSWAP(retv);
-}
-
 #define GEN_ATOMIC_HELPER(NAME)                                         \
 DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), RE_SUFFIX)     \
     (CPUArchState *env, target_ulong addr, DATA_TYPE val,               \
@@ -676,6 +695,41 @@ DATA_TYPE glue(glue(helper_atomic_add_fetch, SUFFIX), RE_SUFFIX)
     }
 }
 #endif /* DATA_SIZE > 1 */
+#else /* DATA_SIZE >= 16 */
+DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE res;
+    ATOMIC_MMU_BODY;
+    __atomic_load(haddr, &res, __ATOMIC_RELAXED);
+    return res;
+}
+
+DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), RE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE res;
+    res = (glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX)
+           (env, addr, oi, retaddr));
+    return BSWAP(res);
+}
+
+void glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, DATA_TYPE val,
+     TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    ATOMIC_MMU_BODY;
+    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+}
+
+void glue(glue(helper_atomic_st, SUFFIX), RE_SUFFIX)
+    (CPUArchState *env, target_ulong addr, DATA_TYPE val,
+     TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    (glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX)
+     (env, addr, BSWAP(val), oi, retaddr));
+}
+#endif /* DATA_SIZE < 16 */
 
 #undef ATOMIC_MMU_BODY
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 4e60498..1304a42 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -1216,6 +1216,28 @@ GEN_ATOMIC_HELPER_ALL(xchg)
 #undef GEN_ATOMIC_HELPER_ALL
 #undef GEN_ATOMIC_HELPER
 
+#ifdef CONFIG_ATOMIC128
+#include "qemu/int128.h"
+
+/* These aren't really a "proper" helpers because TCG cannot manage Int128.
+   However, use the same format as the others, for use by the backends. */
+Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv,
+                                     TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv,
+                                     TCGMemOpIdx oi, uintptr_t retaddr);
+
+Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
+                                TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
+                                TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                              TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                              TCGMemOpIdx oi, uintptr_t retaddr);
+
+#endif /* CONFIG_ATOMIC128 */
 #endif /* CONFIG_SOFTMMU */
 
 #endif /* TCG_H */
-- 
2.5.5

  parent reply	other threads:[~2016-07-01 17:05 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-01 17:04 [Qemu-devel] [PATCH v2 00/27] cmpxchg-based emulation of atomics Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 01/27] atomics: add atomic_xor Richard Henderson
2016-08-11 17:19   ` Alex Bennée
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 02/27] atomics: add atomic_op_fetch variants Richard Henderson
2016-08-11 17:20   ` Alex Bennée
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 03/27] exec: Avoid direct references to Int128 parts Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 04/27] int128: Use __int128 if available Richard Henderson
2016-08-11 10:45   ` Alex Bennée
2016-08-25 19:09   ` [Qemu-devel] [PATCH] fixup! " Alex Bennée
2016-08-26 12:48     ` no-reply
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 06/27] int128: Use complex numbers if advisable Richard Henderson
2016-07-04 11:51   ` Paolo Bonzini
2016-07-04 12:07   ` Peter Maydell
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 07/27] tcg: Add EXCP_ATOMIC Richard Henderson
2016-09-08  8:38   ` Alex Bennée
2016-09-08 16:26     ` Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 08/27] HACK: Always enable parallel_cpus Richard Henderson
2016-09-08  8:39   ` Alex Bennée
2016-09-08 16:22     ` Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 09/27] tcg: Add atomic helpers Richard Henderson
2016-09-08 13:43   ` Alex Bennée
2016-09-08 16:08     ` Richard Henderson
2016-07-01 17:04 ` Richard Henderson [this message]
2016-07-08  3:00   ` [Qemu-devel] [PATCH v2 10/27] tcg: Add atomic128 helpers Emilio G. Cota
2016-07-08  5:26     ` Richard Henderson
2016-08-11 10:02   ` Alex Bennée
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 11/27] target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers Richard Henderson
2016-07-08  3:08   ` Emilio G. Cota
2016-07-08  3:19     ` Emilio G. Cota
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 12/27] target-i386: emulate LOCK'ed OP instructions using atomic helpers Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 13/27] target-i386: emulate LOCK'ed INC using atomic helper Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 14/27] target-i386: emulate LOCK'ed NOT " Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 15/27] target-i386: emulate LOCK'ed NEG using cmpxchg helper Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 16/27] target-i386: emulate LOCK'ed XADD using atomic helper Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 17/27] target-i386: emulate LOCK'ed BTX ops using atomic helpers Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 18/27] target-i386: emulate XCHG using atomic helper Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 19/27] target-i386: remove helper_lock() Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 20/27] tests: add atomic_add-bench Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 21/27] target-arm: Rearrange aa32 load and store functions Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 22/27] target-arm: emulate LL/SC using cmpxchg helpers Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 23/27] target-arm: emulate SWP with atomic_xchg helper Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 24/27] target-arm: emulate aarch64's LL/SC using cmpxchg helpers Richard Henderson
2016-07-08  3:34   ` Emilio G. Cota
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 25/27] linux-user: remove handling of ARM's EXCP_STREX Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 26/27] linux-user: remove handling of aarch64's EXCP_STREX Richard Henderson
2016-07-01 17:04 ` [Qemu-devel] [PATCH v2 27/27] target-arm: remove EXCP_STREX + cpu_exclusive_{test, info} Richard Henderson
2016-07-01 17:23 ` [Qemu-devel] [PATCH v2 00/27] cmpxchg-based emulation of atomics Richard Henderson
2016-07-08  2:53 ` Emilio G. Cota

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1467392693-22715-11-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=alex.bennee@linaro.org \
    --cc=cota@braap.org \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=serge.fdrv@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.