All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations
@ 2018-10-03 19:39 Richard Henderson
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 1/9] tcg: Split CONFIG_ATOMIC128 Richard Henderson
                   ` (9 more replies)
  0 siblings, 10 replies; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel

For v2, and history, see
  http://lists.nongnu.org/archive/html/qemu-devel/2018-08/msg04533.html

Changes since v2:
  * Fixed a typo noticed by Emilio.
  * Brought the target/s390x changes back, as the patches with
    which they conflicted are now in mainline.


r~


Richard Henderson (9):
  tcg: Split CONFIG_ATOMIC128
  target/i386: Convert to HAVE_CMPXCHG128
  target/arm: Convert to HAVE_CMPXCHG128
  target/arm: Check HAVE_CMPXCHG128 at translate time
  target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
  target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
  target/s390x: Split do_cdsg, do_lpq, do_stpq
  target/s390x: Skip wout, cout helpers if op helper does not return
  target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate

 accel/tcg/atomic_template.h |  20 ++-
 include/qemu/atomic128.h    | 155 ++++++++++++++++++++++
 target/ppc/helper.h         |   2 +-
 tcg/tcg.h                   |  16 ++-
 accel/tcg/cputlb.c          |   3 +-
 accel/tcg/user-exec.c       |   5 +-
 target/arm/helper-a64.c     | 251 ++++++++++++++++++------------------
 target/arm/translate-a64.c  |  38 +++---
 target/i386/mem_helper.c    |   9 +-
 target/ppc/mem_helper.c     |  33 ++++-
 target/ppc/translate.c      | 115 +++++++++--------
 target/s390x/mem_helper.c   | 202 +++++++++++++----------------
 target/s390x/translate.c    |  37 ++++--
 configure                   |  19 +++
 14 files changed, 561 insertions(+), 344 deletions(-)
 create mode 100644 include/qemu/atomic128.h

-- 
2.17.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 1/9] tcg: Split CONFIG_ATOMIC128
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
@ 2018-10-03 19:39 ` Richard Henderson
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 2/9] target/i386: Convert to HAVE_CMPXCHG128 Richard Henderson
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel

GCC7+ will no longer advertise support for 16-byte __atomic operations
if only cmpxchg is supported, as for x86_64.  Fortunately, x86_64 still
has support for __sync_compare_and_swap_16 and we can make use of that.
AArch64 does not have, nor ever has had such support, so open-code it.

Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/atomic_template.h |  20 ++++-
 include/qemu/atomic128.h    | 155 ++++++++++++++++++++++++++++++++++++
 tcg/tcg.h                   |  16 ++--
 accel/tcg/cputlb.c          |   3 +-
 accel/tcg/user-exec.c       |   5 +-
 configure                   |  19 +++++
 6 files changed, 204 insertions(+), 14 deletions(-)
 create mode 100644 include/qemu/atomic128.h

diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index d751bcba48..efde12fdb2 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -100,19 +100,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
     DATA_TYPE ret;
 
     ATOMIC_TRACE_RMW;
+#if DATA_SIZE == 16
+    ret = atomic16_cmpxchg(haddr, cmpv, newv);
+#else
     ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+#endif
     ATOMIC_MMU_CLEANUP;
     return ret;
 }
 
 #if DATA_SIZE >= 16
+#if HAVE_ATOMIC128
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
 
     ATOMIC_TRACE_LD;
-    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    val = atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
     return val;
 }
@@ -124,9 +129,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
 
     ATOMIC_TRACE_ST;
-    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+    atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
 }
+#endif
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                            ABI_TYPE val EXTRA_ARGS)
@@ -228,19 +234,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
     DATA_TYPE ret;
 
     ATOMIC_TRACE_RMW;
+#if DATA_SIZE == 16
+    ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
+#else
     ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
+#endif
     ATOMIC_MMU_CLEANUP;
     return BSWAP(ret);
 }
 
 #if DATA_SIZE >= 16
+#if HAVE_ATOMIC128
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
 
     ATOMIC_TRACE_LD;
-    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    val = atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
     return BSWAP(val);
 }
@@ -253,9 +264,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
 
     ATOMIC_TRACE_ST;
     val = BSWAP(val);
-    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+    atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
 }
+#endif
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                            ABI_TYPE val EXTRA_ARGS)
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
new file mode 100644
index 0000000000..fdea225132
--- /dev/null
+++ b/include/qemu/atomic128.h
@@ -0,0 +1,155 @@
+/*
+ * Simple interface for 128-bit atomic operations.
+ *
+ * Copyright (C) 2018 Linaro, Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * See docs/devel/atomics.txt for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef QEMU_ATOMIC128_H
+#define QEMU_ATOMIC128_H
+
+/*
+ * GCC is a house divided about supporting large atomic operations.
+ *
+ * For hosts that only have large compare-and-swap, a legalistic reading
+ * of the C++ standard means that one cannot implement __atomic_read on
+ * read-only memory, and thus all atomic operations must synchronize
+ * through libatomic.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878
+ *
+ * This interpretation is not especially helpful for QEMU.
+ * For softmmu, all RAM is always read/write from the hypervisor.
+ * For user-only, if the guest doesn't implement such an __atomic_read
+ * then the host need not worry about it either.
+ *
+ * Moreover, using libatomic is not an option, because its interface is
+ * built for std::atomic<T>, and requires that *all* accesses to such an
+ * object go through the library.  In our case we do not have an object
+ * in the C/C++ sense, but a view of memory as seen by the guest.
+ * The guest may issue a large atomic operation and then access those
+ * pieces using word-sized accesses.  From the hypervisor, we have no
+ * way to connect those two actions.
+ *
+ * Therefore, special case each platform.
+ */
+
+#if defined(CONFIG_ATOMIC128)
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    return atomic_cmpxchg__nocheck(ptr, cmp, new);
+}
+# define HAVE_CMPXCHG128 1
+#elif defined(CONFIG_CMPXCHG128)
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    return __sync_val_compare_and_swap_16(ptr, cmp, new);
+}
+# define HAVE_CMPXCHG128 1
+#elif defined(__aarch64__)
+/* Through gcc 8, aarch64 has no support for 128-bit at all.  */
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
+    uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
+    uint64_t oldl, oldh;
+    uint32_t tmp;
+
+    asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
+        "cmp %[oldl], %[cmpl]\n\t"
+        "ccmp %[oldh], %[cmph], #0, eq\n\t"
+        "b.ne 1f\n\t"
+        "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
+        "cbnz %w[tmp], 0b\n"
+        "1:"
+        : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
+          [oldl] "=&r"(oldl), [oldh] "=r"(oldh)
+        : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
+          [newl] "r"(newl), [newh] "r"(newh)
+        : "memory", "cc");
+
+    return int128_make128(oldl, oldh);
+}
+# define HAVE_CMPXCHG128 1
+#else
+/* Fallback definition that must be optimized away, or error.  */
+Int128 __attribute__((error("unsupported atomic")))
+    atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
+# define HAVE_CMPXCHG128 0
+#endif /* Some definition for HAVE_CMPXCHG128 */
+
+
+#if defined(CONFIG_ATOMIC128)
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    return atomic_read__nocheck(ptr);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    atomic_set__nocheck(ptr, val);
+}
+
+# define HAVE_ATOMIC128 1
+#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__)
+/* We can do better than cmpxchg for AArch64.  */
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    uint64_t l, h;
+    uint32_t tmp;
+
+    /* The load must be paired with the store to guarantee not tearing.  */
+    asm("0: ldxp %[l], %[h], %[mem]\n\t"
+        "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
+        "cbnz %w[tmp], 0b"
+        : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
+
+    return int128_make128(l, h);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    uint64_t l = int128_getlo(val), h = int128_gethi(val);
+    uint64_t t1, t2;
+
+    /* Load into temporaries to acquire the exclusive access lock.  */
+    asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
+        "stxp %w[t1], %[l], %[h], %[mem]\n\t"
+        "cbnz %w[t1], 0b"
+        : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
+        : [l] "r"(l), [h] "r"(h));
+}
+
+# define HAVE_ATOMIC128 1
+#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    /* Maybe replace 0 with 0, returning the old value.  */
+    return atomic16_cmpxchg(ptr, 0, 0);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    Int128 old = *ptr, cmp;
+    do {
+        cmp = old;
+        old = atomic16_cmpxchg(ptr, cmp, val);
+    } while (old != cmp);
+}
+
+# define HAVE_ATOMIC128 1
+#else
+/* Fallback definitions that must be optimized away, or error.  */
+Int128 __attribute__((error("unsupported atomic")))
+    atomic16_read(Int128 *ptr);
+void __attribute__((error("unsupported atomic")))
+    atomic16_set(Int128 *ptr, Int128 val);
+# define HAVE_ATOMIC128 0
+#endif /* Some definition for HAVE_ATOMIC128 */
+
+#endif /* QEMU_ATOMIC128_H */
diff --git a/tcg/tcg.h b/tcg/tcg.h
index f9f12378e9..20803b8b64 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -32,6 +32,7 @@
 #include "qemu/queue.h"
 #include "tcg-mo.h"
 #include "tcg-target.h"
+#include "qemu/int128.h"
 
 /* XXX: make safe guess about sizes */
 #define MAX_OP_PER_INSTR 266
@@ -1454,11 +1455,14 @@ GEN_ATOMIC_HELPER_ALL(xchg)
 #undef GEN_ATOMIC_HELPER
 #endif /* CONFIG_SOFTMMU */
 
-#ifdef CONFIG_ATOMIC128
-#include "qemu/int128.h"
-
-/* These aren't really a "proper" helpers because TCG cannot manage Int128.
-   However, use the same format as the others, for use by the backends. */
+/*
+ * These aren't really a "proper" helpers because TCG cannot manage Int128.
+ * However, use the same format as the others, for use by the backends.
+ *
+ * The cmpxchg functions are only defined if HAVE_CMPXCHG128;
+ * the ld/st functions are only defined if HAVE_ATOMIC128,
+ * as defined by <qemu/atomic128.h>.
+ */
 Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
                                      Int128 cmpv, Int128 newv,
                                      TCGMemOpIdx oi, uintptr_t retaddr);
@@ -1475,6 +1479,4 @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
 void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
                               TCGMemOpIdx oi, uintptr_t retaddr);
 
-#endif /* CONFIG_ATOMIC128 */
-
 #endif /* TCG_H */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index f4702ce91f..0e740f5296 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -32,6 +32,7 @@
 #include "exec/log.h"
 #include "exec/helper-proto.h"
 #include "qemu/atomic.h"
+#include "qemu/atomic128.h"
 
 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@@ -1101,7 +1102,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 #include "atomic_template.h"
 #endif
 
-#ifdef CONFIG_ATOMIC128
+#if HAVE_CMPXCHG128 || HAVE_ATOMIC128
 #define DATA_SIZE 16
 #include "atomic_template.h"
 #endif
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 26a3ffbba1..cd75829cf2 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -25,6 +25,7 @@
 #include "exec/cpu_ldst.h"
 #include "translate-all.h"
 #include "exec/helper-proto.h"
+#include "qemu/atomic128.h"
 
 #undef EAX
 #undef ECX
@@ -615,7 +616,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 /* The following is only callable from other helpers, and matches up
    with the softmmu version.  */
 
-#ifdef CONFIG_ATOMIC128
+#if HAVE_ATOMIC128 || HAVE_CMPXCHG128
 
 #undef EXTRA_ARGS
 #undef ATOMIC_NAME
@@ -628,4 +629,4 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 
 #define DATA_SIZE 16
 #include "atomic_template.h"
-#endif /* CONFIG_ATOMIC128 */
+#endif
diff --git a/configure b/configure
index f3d4b799a5..f6fa341f33 100755
--- a/configure
+++ b/configure
@@ -5186,6 +5186,21 @@ EOF
   fi
 fi
 
+cmpxchg128=no
+if test "$int128" = yes -a "$atomic128" = no; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  __sync_val_compare_and_swap_16(&x, y, x);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    cmpxchg128=yes
+  fi
+fi
+
 #########################################
 # See if 64-bit atomic operations are supported.
 # Note that without __atomic builtins, we can only
@@ -6702,6 +6717,10 @@ if test "$atomic128" = "yes" ; then
   echo "CONFIG_ATOMIC128=y" >> $config_host_mak
 fi
 
+if test "$cmpxchg128" = "yes" ; then
+  echo "CONFIG_CMPXCHG128=y" >> $config_host_mak
+fi
+
 if test "$atomic64" = "yes" ; then
   echo "CONFIG_ATOMIC64=y" >> $config_host_mak
 fi
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 2/9] target/i386: Convert to HAVE_CMPXCHG128
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 1/9] tcg: Split CONFIG_ATOMIC128 Richard Henderson
@ 2018-10-03 19:39 ` Richard Henderson
  2018-10-11 10:55   ` Philippe Mathieu-Daudé
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 3/9] target/arm: " Richard Henderson
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel

Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/i386/mem_helper.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c
index 30c26b9d9c..6cc53bcb40 100644
--- a/target/i386/mem_helper.c
+++ b/target/i386/mem_helper.c
@@ -23,6 +23,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
+#include "qemu/atomic128.h"
 #include "tcg.h"
 
 void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
@@ -137,10 +138,7 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
 
     if ((a0 & 0xf) != 0) {
         raise_exception_ra(env, EXCP0D_GPF, ra);
-    } else {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
+    } else if (HAVE_CMPXCHG128) {
         int eflags = cpu_cc_compute_all(env, CC_OP);
 
         Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
@@ -159,7 +157,8 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
             eflags &= ~CC_Z;
         }
         CC_SRC = eflags;
-#endif
+    } else {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
     }
 }
 #endif
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 3/9] target/arm: Convert to HAVE_CMPXCHG128
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 1/9] tcg: Split CONFIG_ATOMIC128 Richard Henderson
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 2/9] target/i386: Convert to HAVE_CMPXCHG128 Richard Henderson
@ 2018-10-03 19:39 ` Richard Henderson
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 4/9] target/arm: Check HAVE_CMPXCHG128 at translate time Richard Henderson
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel

Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper-a64.c | 259 +++++++++++++++++++++-------------------
 1 file changed, 133 insertions(+), 126 deletions(-)

diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 7f6ad3000b..6e4e1b8a19 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -30,6 +30,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
+#include "qemu/atomic128.h"
 #include "tcg.h"
 #include "fpu/softfloat.h"
 #include <zlib.h> /* For crc32 */
@@ -509,189 +510,195 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
     return crc32c(acc, buf, bytes) ^ 0xffffffff;
 }
 
-/* Returns 0 on success; 1 otherwise.  */
-static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr,
-                                       uint64_t new_lo, uint64_t new_hi,
-                                       bool parallel, uintptr_t ra)
+uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
+                                     uint64_t new_lo, uint64_t new_hi)
 {
-    Int128 oldv, cmpv, newv;
+    Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    Int128 newv = int128_make128(new_lo, new_hi);
+    Int128 oldv;
+    uintptr_t ra = GETPC();
+    uint64_t o0, o1;
     bool success;
 
-    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
-    newv = int128_make128(new_lo, new_hi);
-
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
-        oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
-        success = int128_eq(oldv, cmpv);
-#endif
-    } else {
-        uint64_t o0, o1;
-
 #ifdef CONFIG_USER_ONLY
-        /* ??? Enforce alignment.  */
-        uint64_t *haddr = g2h(addr);
+    /* ??? Enforce alignment.  */
+    uint64_t *haddr = g2h(addr);
 
-        helper_retaddr = ra;
-        o0 = ldq_le_p(haddr + 0);
-        o1 = ldq_le_p(haddr + 1);
-        oldv = int128_make128(o0, o1);
+    helper_retaddr = ra;
+    o0 = ldq_le_p(haddr + 0);
+    o1 = ldq_le_p(haddr + 1);
+    oldv = int128_make128(o0, o1);
 
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            stq_le_p(haddr + 0, int128_getlo(newv));
-            stq_le_p(haddr + 1, int128_gethi(newv));
-        }
-        helper_retaddr = 0;
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
-        TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
-
-        o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
-        o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
-            helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
-        }
-#endif
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        stq_le_p(haddr + 0, int128_getlo(newv));
+        stq_le_p(haddr + 1, int128_gethi(newv));
     }
+    helper_retaddr = 0;
+#else
+    int mem_idx = cpu_mmu_index(env, false);
+    TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+    TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
+
+    o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
+    o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
+        helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
+    }
+#endif
 
     return !success;
 }
 
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
-                                              uint64_t new_lo, uint64_t new_hi)
-{
-    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC());
-}
-
 uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
                                               uint64_t new_lo, uint64_t new_hi)
-{
-    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC());
-}
-
-static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr,
-                                       uint64_t new_lo, uint64_t new_hi,
-                                       bool parallel, uintptr_t ra)
 {
     Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
     bool success;
+    int mem_idx;
+    TCGMemOpIdx oi;
 
-    /* high and low need to be switched here because this is not actually a
-     * 128bit store but two doublewords stored consecutively
-     */
-    cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
-    newv = int128_make128(new_hi, new_lo);
-
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
+    if (!HAVE_CMPXCHG128) {
         cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
-        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-        success = int128_eq(oldv, cmpv);
-#endif
-    } else {
-        uint64_t o0, o1;
-
-#ifdef CONFIG_USER_ONLY
-        /* ??? Enforce alignment.  */
-        uint64_t *haddr = g2h(addr);
-
-        helper_retaddr = ra;
-        o1 = ldq_be_p(haddr + 0);
-        o0 = ldq_be_p(haddr + 1);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            stq_be_p(haddr + 0, int128_gethi(newv));
-            stq_be_p(haddr + 1, int128_getlo(newv));
-        }
-        helper_retaddr = 0;
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
-        TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
-
-        o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
-        o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
-            helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
-        }
-#endif
     }
 
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+
+    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    newv = int128_make128(new_lo, new_hi);
+    oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
+
+    success = int128_eq(oldv, cmpv);
     return !success;
 }
 
 uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
                                      uint64_t new_lo, uint64_t new_hi)
 {
-    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC());
+    /*
+     * High and low need to be switched here because this is not actually a
+     * 128bit store but two doublewords stored consecutively
+     */
+    Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    Int128 newv = int128_make128(new_lo, new_hi);
+    Int128 oldv;
+    uintptr_t ra = GETPC();
+    uint64_t o0, o1;
+    bool success;
+
+#ifdef CONFIG_USER_ONLY
+    /* ??? Enforce alignment.  */
+    uint64_t *haddr = g2h(addr);
+
+    helper_retaddr = ra;
+    o1 = ldq_be_p(haddr + 0);
+    o0 = ldq_be_p(haddr + 1);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        stq_be_p(haddr + 0, int128_gethi(newv));
+        stq_be_p(haddr + 1, int128_getlo(newv));
+    }
+    helper_retaddr = 0;
+#else
+    int mem_idx = cpu_mmu_index(env, false);
+    TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+    TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
+
+    o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
+    o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
+        helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
+    }
+#endif
+
+    return !success;
 }
 
 uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
-                                     uint64_t new_lo, uint64_t new_hi)
+                                              uint64_t new_lo, uint64_t new_hi)
 {
-    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
+    Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
+    bool success;
+    int mem_idx;
+    TCGMemOpIdx oi;
+
+    if (!HAVE_CMPXCHG128) {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+    }
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+
+    /*
+     * High and low need to be switched here because this is not actually a
+     * 128bit store but two doublewords stored consecutively
+     */
+    cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
+    newv = int128_make128(new_hi, new_lo);
+    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+
+    success = int128_eq(oldv, cmpv);
+    return !success;
 }
 
 /* Writes back the old data into Rs.  */
 void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
                               uint64_t new_lo, uint64_t new_hi)
 {
-    uintptr_t ra = GETPC();
-#ifndef CONFIG_ATOMIC128
-    cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
     Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+
+    if (!HAVE_CMPXCHG128) {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+    }
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 
     cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
     newv = int128_make128(new_lo, new_hi);
-
-    int mem_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
     oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 
     env->xregs[rs] = int128_getlo(oldv);
     env->xregs[rs + 1] = int128_gethi(oldv);
-#endif
 }
 
 void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
                               uint64_t new_hi, uint64_t new_lo)
 {
-    uintptr_t ra = GETPC();
-#ifndef CONFIG_ATOMIC128
-    cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
     Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+
+    if (!HAVE_CMPXCHG128) {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+    }
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 
     cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
     newv = int128_make128(new_lo, new_hi);
-
-    int mem_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
     oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
 
     env->xregs[rs + 1] = int128_getlo(oldv);
     env->xregs[rs] = int128_gethi(oldv);
-#endif
 }
 
 /*
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 4/9] target/arm: Check HAVE_CMPXCHG128 at translate time
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
                   ` (2 preceding siblings ...)
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 3/9] target/arm: " Richard Henderson
@ 2018-10-03 19:39 ` Richard Henderson
  2018-10-11 10:55   ` Philippe Mathieu-Daudé
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 5/9] target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128 Richard Henderson
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel

Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper-a64.c    | 16 ++++------------
 target/arm/translate-a64.c | 38 ++++++++++++++++++++++----------------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 6e4e1b8a19..61799d20e1 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -563,9 +563,7 @@ uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
     int mem_idx;
     TCGMemOpIdx oi;
 
-    if (!HAVE_CMPXCHG128) {
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-    }
+    assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
     oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
@@ -635,9 +633,7 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
     int mem_idx;
     TCGMemOpIdx oi;
 
-    if (!HAVE_CMPXCHG128) {
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-    }
+    assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
     oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
@@ -663,9 +659,7 @@ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
     int mem_idx;
     TCGMemOpIdx oi;
 
-    if (!HAVE_CMPXCHG128) {
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-    }
+    assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
     oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
@@ -686,9 +680,7 @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
     int mem_idx;
     TCGMemOpIdx oi;
 
-    if (!HAVE_CMPXCHG128) {
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-    }
+    assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
     oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8ca3876707..77ee8d9085 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -37,6 +37,7 @@
 
 #include "trace-tcg.h"
 #include "translate-a64.h"
+#include "qemu/atomic128.h"
 
 static TCGv_i64 cpu_X[32];
 static TCGv_i64 cpu_pc;
@@ -2082,26 +2083,27 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
                                        get_mem_index(s),
                                        MO_64 | MO_ALIGN | s->be_data);
             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
-        } else if (s->be_data == MO_LE) {
-            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+            if (!HAVE_CMPXCHG128) {
+                gen_helper_exit_atomic(cpu_env);
+                s->base.is_jmp = DISAS_NORETURN;
+            } else if (s->be_data == MO_LE) {
                 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
                                                         cpu_exclusive_addr,
                                                         cpu_reg(s, rt),
                                                         cpu_reg(s, rt2));
             } else {
-                gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
-                                               cpu_reg(s, rt), cpu_reg(s, rt2));
-            }
-        } else {
-            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
                 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
                                                         cpu_exclusive_addr,
                                                         cpu_reg(s, rt),
                                                         cpu_reg(s, rt2));
-            } else {
-                gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
-                                               cpu_reg(s, rt), cpu_reg(s, rt2));
             }
+        } else if (s->be_data == MO_LE) {
+            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
+                                           cpu_reg(s, rt), cpu_reg(s, rt2));
+        } else {
+            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
+                                           cpu_reg(s, rt), cpu_reg(s, rt2));
         }
     } else {
         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
@@ -2171,14 +2173,18 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
         }
         tcg_temp_free_i64(cmp);
     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
-        TCGv_i32 tcg_rs = tcg_const_i32(rs);
-
-        if (s->be_data == MO_LE) {
-            gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+        if (HAVE_CMPXCHG128) {
+            TCGv_i32 tcg_rs = tcg_const_i32(rs);
+            if (s->be_data == MO_LE) {
+                gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+            } else {
+                gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+            }
+            tcg_temp_free_i32(tcg_rs);
         } else {
-            gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+            gen_helper_exit_atomic(cpu_env);
+            s->base.is_jmp = DISAS_NORETURN;
         }
-        tcg_temp_free_i32(tcg_rs);
     } else {
         TCGv_i64 d1 = tcg_temp_new_i64();
         TCGv_i64 d2 = tcg_temp_new_i64();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 5/9] target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
                   ` (3 preceding siblings ...)
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 4/9] target/arm: Check HAVE_CMPXCHG128 at translate time Richard Henderson
@ 2018-10-03 19:39 ` Richard Henderson
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 6/9] target/s390x: " Richard Henderson
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel

Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/ppc/helper.h     |   2 +-
 target/ppc/mem_helper.c |  33 ++++++++++--
 target/ppc/translate.c  | 115 +++++++++++++++++++++-------------------
 3 files changed, 88 insertions(+), 62 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index ef64248bc4..7a1481fd0b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -800,7 +800,7 @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
 DEF_HELPER_1(tbegin, void, env)
 DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
 
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+#ifdef TARGET_PPC64
 DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
 DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
 DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 8f0d86d104..a1485fad9b 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -25,6 +25,7 @@
 #include "exec/cpu_ldst.h"
 #include "tcg.h"
 #include "internal.h"
+#include "qemu/atomic128.h"
 
 //#define DEBUG_OP
 
@@ -215,11 +216,15 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
     return i;
 }
 
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+#ifdef TARGET_PPC64
 uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
                                uint32_t opidx)
 {
-    Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
+    Int128 ret;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
     env->retxh = int128_gethi(ret);
     return int128_getlo(ret);
 }
@@ -227,7 +232,11 @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
 uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
                                uint32_t opidx)
 {
-    Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
+    Int128 ret;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
     env->retxh = int128_gethi(ret);
     return int128_getlo(ret);
 }
@@ -235,14 +244,22 @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
 void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
                             uint64_t lo, uint64_t hi, uint32_t opidx)
 {
-    Int128 val = int128_make128(lo, hi);
+    Int128 val;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    val = int128_make128(lo, hi);
     helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
 }
 
 void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
                             uint64_t lo, uint64_t hi, uint32_t opidx)
 {
-    Int128 val = int128_make128(lo, hi);
+    Int128 val;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    val = int128_make128(lo, hi);
     helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
 }
 
@@ -252,6 +269,9 @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
 {
     bool success = false;
 
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_CMPXCHG128);
+
     if (likely(addr == env->reserve_addr)) {
         Int128 oldv, cmpv, newv;
 
@@ -271,6 +291,9 @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
 {
     bool success = false;
 
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_CMPXCHG128);
+
     if (likely(addr == env->reserve_addr)) {
         Int128 oldv, cmpv, newv;
 
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 881743571b..4e59dd5f42 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -33,6 +33,7 @@
 #include "trace-tcg.h"
 #include "exec/translator.h"
 #include "exec/log.h"
+#include "qemu/atomic128.h"
 
 
 #define CPU_SINGLE_STEP 0x1
@@ -2654,22 +2655,22 @@ static void gen_lq(DisasContext *ctx)
     hi = cpu_gpr[rd];
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-#ifdef CONFIG_ATOMIC128
-        TCGv_i32 oi = tcg_temp_new_i32();
-        if (ctx->le_mode) {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
-            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        if (HAVE_ATOMIC128) {
+            TCGv_i32 oi = tcg_temp_new_i32();
+            if (ctx->le_mode) {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
+                gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+            } else {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
+                gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            }
+            tcg_temp_free_i32(oi);
+            tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
         } else {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
-            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
         }
-        tcg_temp_free_i32(oi);
-        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
-#else
-        /* Restart with exclusive lock.  */
-        gen_helper_exit_atomic(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-#endif
     } else if (ctx->le_mode) {
         tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);
         gen_addr_add(ctx, EA, EA, 8);
@@ -2805,21 +2806,21 @@ static void gen_std(DisasContext *ctx)
         hi = cpu_gpr[rs];
 
         if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-#ifdef CONFIG_ATOMIC128
-            TCGv_i32 oi = tcg_temp_new_i32();
-            if (ctx->le_mode) {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
-                gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
+            if (HAVE_ATOMIC128) {
+                TCGv_i32 oi = tcg_temp_new_i32();
+                if (ctx->le_mode) {
+                    tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
+                    gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
+                } else {
+                    tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
+                    gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
+                }
+                tcg_temp_free_i32(oi);
             } else {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
-                gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
+                /* Restart with exclusive lock.  */
+                gen_helper_exit_atomic(cpu_env);
+                ctx->base.is_jmp = DISAS_NORETURN;
             }
-            tcg_temp_free_i32(oi);
-#else
-            /* Restart with exclusive lock.  */
-            gen_helper_exit_atomic(cpu_env);
-            ctx->base.is_jmp = DISAS_NORETURN;
-#endif
         } else if (ctx->le_mode) {
             tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ);
             gen_addr_add(ctx, EA, EA, 8);
@@ -3404,26 +3405,26 @@ static void gen_lqarx(DisasContext *ctx)
     hi = cpu_gpr[rd];
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-#ifdef CONFIG_ATOMIC128
-        TCGv_i32 oi = tcg_temp_new_i32();
-        if (ctx->le_mode) {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
-                                                ctx->mem_idx));
-            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        if (HAVE_ATOMIC128) {
+            TCGv_i32 oi = tcg_temp_new_i32();
+            if (ctx->le_mode) {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
+                                                    ctx->mem_idx));
+                gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+            } else {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
+                                                    ctx->mem_idx));
+                gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            }
+            tcg_temp_free_i32(oi);
+            tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
         } else {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
-                                                ctx->mem_idx));
-            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
+            tcg_temp_free(EA);
+            return;
         }
-        tcg_temp_free_i32(oi);
-        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
-#else
-        /* Restart with exclusive lock.  */
-        gen_helper_exit_atomic(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-        tcg_temp_free(EA);
-        return;
-#endif
     } else if (ctx->le_mode) {
         tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16);
         tcg_gen_mov_tl(cpu_reserve, EA);
@@ -3461,20 +3462,22 @@ static void gen_stqcx_(DisasContext *ctx)
     hi = cpu_gpr[rs];
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-        TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
-#ifdef CONFIG_ATOMIC128
-        if (ctx->le_mode) {
-            gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
+        if (HAVE_CMPXCHG128) {
+            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
+            if (ctx->le_mode) {
+                gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
+                                             EA, lo, hi, oi);
+            } else {
+                gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
+                                             EA, lo, hi, oi);
+            }
+            tcg_temp_free_i32(oi);
         } else {
-            gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
         }
-#else
-        /* Restart with exclusive lock.  */
-        gen_helper_exit_atomic(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-#endif
         tcg_temp_free(EA);
-        tcg_temp_free_i32(oi);
     } else {
         TCGLabel *lab_fail = gen_new_label();
         TCGLabel *lab_over = gen_new_label();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 6/9] target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
                   ` (4 preceding siblings ...)
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 5/9] target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128 Richard Henderson
@ 2018-10-03 19:39 ` Richard Henderson
  2018-10-11  7:58   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 7/9] target/s390x: Split do_cdsg, do_lpq, do_stpq Richard Henderson
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-s390x

Cc: qemu-s390x@nongnu.org
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/s390x/mem_helper.c | 92 +++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 51 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index bacae4f503..e106f61b4e 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -25,6 +25,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
+#include "qemu/atomic128.h"
 
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/s390x/storage-keys.h"
@@ -1389,7 +1390,7 @@ static void do_cdsg(CPUS390XState *env, uint64_t addr,
     bool fail;
 
     if (parallel) {
-#ifndef CONFIG_ATOMIC128
+#if !HAVE_CMPXCHG128
         cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
 #else
         int mem_idx = cpu_mmu_index(env, false);
@@ -1435,9 +1436,7 @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                         uint64_t a2, bool parallel)
 {
-#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
     uint32_t mem_idx = cpu_mmu_index(env, false);
-#endif
     uintptr_t ra = GETPC();
     uint32_t fc = extract32(env->regs[0], 0, 8);
     uint32_t sc = extract32(env->regs[0], 8, 8);
@@ -1465,18 +1464,20 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
     probe_write(env, a2, 0, mem_idx, ra);
 #endif
 
-    /* Note that the compare-and-swap is atomic, and the store is atomic, but
-       the complete operation is not.  Therefore we do not need to assert serial
-       context in order to implement this.  That said, restart early if we can't
-       support either operation that is supposed to be atomic.  */
+    /*
+     * Note that the compare-and-swap is atomic, and the store is atomic,
+     * but the complete operation is not.  Therefore we do not need to
+     * assert serial context in order to implement this.  That said,
+     * restart early if we can't support either operation that is supposed
+     * to be atomic.
+     */
     if (parallel) {
-        int mask = 0;
-#if !defined(CONFIG_ATOMIC64)
-        mask = -8;
-#elif !defined(CONFIG_ATOMIC128)
-        mask = -16;
+        uint32_t max = 2;
+#ifdef CONFIG_ATOMIC64
+        max = 3;
 #endif
-        if (((4 << fc) | (1 << sc)) & mask) {
+        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
+            (HAVE_ATOMIC128  ? 0 : sc > max)) {
             cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
         }
     }
@@ -1546,16 +1547,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
             Int128 ov;
 
-            if (parallel) {
-#ifdef CONFIG_ATOMIC128
-                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
-                cc = !int128_eq(ov, cv);
-#else
-                /* Note that we asserted !parallel above.  */
-                g_assert_not_reached();
-#endif
-            } else {
+            if (!parallel) {
                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
 
@@ -1567,6 +1559,13 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
 
                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
+            } else if (HAVE_CMPXCHG128) {
+                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
+                cc = !int128_eq(ov, cv);
+            } else {
+                /* Note that we asserted !parallel above.  */
+                g_assert_not_reached();
             }
 
             env->regs[r3 + 0] = int128_gethi(ov);
@@ -1596,18 +1595,16 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
             cpu_stq_data_ra(env, a2, svh, ra);
             break;
         case 4:
-            if (parallel) {
-#ifdef CONFIG_ATOMIC128
+            if (!parallel) {
+                cpu_stq_data_ra(env, a2 + 0, svh, ra);
+                cpu_stq_data_ra(env, a2 + 8, svl, ra);
+            } else if (HAVE_ATOMIC128) {
                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
                 Int128 sv = int128_make128(svl, svh);
                 helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
-#else
+            } else {
                 /* Note that we asserted !parallel above.  */
                 g_assert_not_reached();
-#endif
-            } else {
-                cpu_stq_data_ra(env, a2 + 0, svh, ra);
-                cpu_stq_data_ra(env, a2 + 8, svl, ra);
             }
             break;
         default:
@@ -2105,21 +2102,18 @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
     uintptr_t ra = GETPC();
     uint64_t hi, lo;
 
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
+    if (!parallel) {
+        check_alignment(env, addr, 16, ra);
+        hi = cpu_ldq_data_ra(env, addr + 0, ra);
+        lo = cpu_ldq_data_ra(env, addr + 8, ra);
+    } else if (HAVE_ATOMIC128) {
         int mem_idx = cpu_mmu_index(env, false);
         TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
         Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
         hi = int128_gethi(v);
         lo = int128_getlo(v);
-#endif
     } else {
-        check_alignment(env, addr, 16, ra);
-
-        hi = cpu_ldq_data_ra(env, addr + 0, ra);
-        lo = cpu_ldq_data_ra(env, addr + 8, ra);
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
     }
 
     env->retxl = lo;
@@ -2142,21 +2136,17 @@ static void do_stpq(CPUS390XState *env, uint64_t addr,
 {
     uintptr_t ra = GETPC();
 
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-
-        Int128 v = int128_make128(low, high);
-        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
-#endif
-    } else {
+    if (!parallel) {
         check_alignment(env, addr, 16, ra);
-
         cpu_stq_data_ra(env, addr + 0, high, ra);
         cpu_stq_data_ra(env, addr + 8, low, ra);
+    } else if (HAVE_ATOMIC128) {
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+        Int128 v = int128_make128(low, high);
+        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
+    } else {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
     }
 }
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 7/9] target/s390x: Split do_cdsg, do_lpq, do_stpq
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
                   ` (5 preceding siblings ...)
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 6/9] target/s390x: " Richard Henderson
@ 2018-10-03 19:39 ` Richard Henderson
  2018-10-11  8:02   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 8/9] target/s390x: Skip wout, cout helpers if op helper does not return Richard Henderson
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-s390x

Cc: qemu-s390x@nongnu.org
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/s390x/mem_helper.c | 128 ++++++++++++++++++--------------------
 1 file changed, 61 insertions(+), 67 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index e106f61b4e..b5858d2fa2 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1380,57 +1380,58 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
     return cc;
 }
 
-static void do_cdsg(CPUS390XState *env, uint64_t addr,
-                    uint32_t r1, uint32_t r3, bool parallel)
+void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
+                  uint32_t r1, uint32_t r3)
 {
     uintptr_t ra = GETPC();
     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
     Int128 oldv;
+    uint64_t oldh, oldl;
     bool fail;
 
-    if (parallel) {
-#if !HAVE_CMPXCHG128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-        fail = !int128_eq(oldv, cmpv);
-#endif
-    } else {
-        uint64_t oldh, oldl;
+    check_alignment(env, addr, 16, ra);
 
-        check_alignment(env, addr, 16, ra);
+    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
+    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
 
-        oldh = cpu_ldq_data_ra(env, addr + 0, ra);
-        oldl = cpu_ldq_data_ra(env, addr + 8, ra);
-
-        oldv = int128_make128(oldl, oldh);
-        fail = !int128_eq(oldv, cmpv);
-        if (fail) {
-            newv = oldv;
-        }
-
-        cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
-        cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
+    oldv = int128_make128(oldl, oldh);
+    fail = !int128_eq(oldv, cmpv);
+    if (fail) {
+        newv = oldv;
     }
 
+    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
+    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
+
     env->cc_op = fail;
     env->regs[r1] = int128_gethi(oldv);
     env->regs[r1 + 1] = int128_getlo(oldv);
 }
 
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
-                  uint32_t r1, uint32_t r3)
-{
-    do_cdsg(env, addr, r1, r3, false);
-}
-
 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
                            uint32_t r1, uint32_t r3)
 {
-    do_cdsg(env, addr, r1, r3, true);
+    uintptr_t ra = GETPC();
+    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
+    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 oldv;
+    bool fail;
+
+    if (!HAVE_CMPXCHG128) {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+    }
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+    fail = !int128_eq(oldv, cmpv);
+
+    env->cc_op = fail;
+    env->regs[r1] = int128_gethi(oldv);
+    env->regs[r1 + 1] = int128_getlo(oldv);
 }
 
 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
@@ -2097,16 +2098,25 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
 #endif
 
 /* load pair from quadword */
-static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
+uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
 {
     uintptr_t ra = GETPC();
     uint64_t hi, lo;
 
-    if (!parallel) {
-        check_alignment(env, addr, 16, ra);
-        hi = cpu_ldq_data_ra(env, addr + 0, ra);
-        lo = cpu_ldq_data_ra(env, addr + 8, ra);
-    } else if (HAVE_ATOMIC128) {
+    check_alignment(env, addr, 16, ra);
+    hi = cpu_ldq_data_ra(env, addr + 0, ra);
+    lo = cpu_ldq_data_ra(env, addr + 8, ra);
+
+    env->retxl = lo;
+    return hi;
+}
+
+uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
+{
+    uintptr_t ra = GETPC();
+    uint64_t hi, lo;
+
+    if (HAVE_ATOMIC128) {
         int mem_idx = cpu_mmu_index(env, false);
         TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
         Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
@@ -2120,27 +2130,23 @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
     return hi;
 }
 
-uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
-{
-    return do_lpq(env, addr, false);
-}
-
-uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
-{
-    return do_lpq(env, addr, true);
-}
-
 /* store pair to quadword */
-static void do_stpq(CPUS390XState *env, uint64_t addr,
-                    uint64_t low, uint64_t high, bool parallel)
+void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
+                  uint64_t low, uint64_t high)
 {
     uintptr_t ra = GETPC();
 
-    if (!parallel) {
-        check_alignment(env, addr, 16, ra);
-        cpu_stq_data_ra(env, addr + 0, high, ra);
-        cpu_stq_data_ra(env, addr + 8, low, ra);
-    } else if (HAVE_ATOMIC128) {
+    check_alignment(env, addr, 16, ra);
+    cpu_stq_data_ra(env, addr + 0, high, ra);
+    cpu_stq_data_ra(env, addr + 8, low, ra);
+}
+
+void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
+                           uint64_t low, uint64_t high)
+{
+    uintptr_t ra = GETPC();
+
+    if (HAVE_ATOMIC128) {
         int mem_idx = cpu_mmu_index(env, false);
         TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
         Int128 v = int128_make128(low, high);
@@ -2150,18 +2156,6 @@ static void do_stpq(CPUS390XState *env, uint64_t addr,
     }
 }
 
-void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
-                  uint64_t low, uint64_t high)
-{
-    do_stpq(env, addr, low, high, false);
-}
-
-void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
-                           uint64_t low, uint64_t high)
-{
-    do_stpq(env, addr, low, high, true);
-}
-
 /* Execute instruction.  This instruction executes an insn modified with
    the contents of r1.  It does not change the executed instruction in memory;
    it does not change the program counter.
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 8/9] target/s390x: Skip wout, cout helpers if op helper does not return
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
                   ` (6 preceding siblings ...)
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 7/9] target/s390x: Split do_cdsg, do_lpq, do_stpq Richard Henderson
@ 2018-10-03 19:39 ` Richard Henderson
  2018-10-11  8:06   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 9/9] target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate Richard Henderson
  2018-10-09 18:51 ` [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Emilio G. Cota
  9 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-s390x

When op raises an exception, it may not have initialized the output
temps that would be written back by wout or cout.

Cc: qemu-s390x@nongnu.org
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/s390x/translate.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 7363aabf3a..7fad3ad8e9 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -6164,11 +6164,13 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
     if (insn->help_op) {
         ret = insn->help_op(s, &o);
     }
-    if (insn->help_wout) {
-        insn->help_wout(s, &f, &o);
-    }
-    if (insn->help_cout) {
-        insn->help_cout(s, &o);
+    if (ret != DISAS_NORETURN) {
+        if (insn->help_wout) {
+            insn->help_wout(s, &f, &o);
+        }
+        if (insn->help_cout) {
+            insn->help_cout(s, &o);
+        }
     }
 
     /* Free any temporaries created by the helpers.  */
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH v3 9/9] target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
                   ` (7 preceding siblings ...)
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 8/9] target/s390x: Skip wout, cout helpers if op helper does not return Richard Henderson
@ 2018-10-03 19:39 ` Richard Henderson
  2018-10-11  8:09   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
  2018-10-09 18:51 ` [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Emilio G. Cota
  9 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2018-10-03 19:39 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-s390x

Cc: qemu-s390x@nongnu.org
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/s390x/mem_helper.c | 40 +++++++++++++++++++--------------------
 target/s390x/translate.c  | 25 +++++++++++++++++-------
 2 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index b5858d2fa2..490c43e6e6 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1420,9 +1420,7 @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
     Int128 oldv;
     bool fail;
 
-    if (!HAVE_CMPXCHG128) {
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-    }
+    assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
@@ -2115,16 +2113,17 @@ uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
 {
     uintptr_t ra = GETPC();
     uint64_t hi, lo;
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 v;
 
-    if (HAVE_ATOMIC128) {
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-        Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
-        hi = int128_gethi(v);
-        lo = int128_getlo(v);
-    } else {
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-    }
+    assert(HAVE_ATOMIC128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
+    hi = int128_gethi(v);
+    lo = int128_getlo(v);
 
     env->retxl = lo;
     return hi;
@@ -2145,15 +2144,16 @@ void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
                            uint64_t low, uint64_t high)
 {
     uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 v;
 
-    if (HAVE_ATOMIC128) {
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-        Int128 v = int128_make128(low, high);
-        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
-    } else {
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-    }
+    assert(HAVE_ATOMIC128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    v = int128_make128(low, high);
+    helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
 }
 
 /* Execute instruction.  This instruction executes an insn modified with
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 7fad3ad8e9..57fe74e4a0 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -44,6 +44,7 @@
 #include "trace-tcg.h"
 #include "exec/translator.h"
 #include "exec/log.h"
+#include "qemu/atomic128.h"
 
 
 /* Information that (most) every instruction needs to manipulate.  */
@@ -2032,6 +2033,7 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
     int r3 = get_field(s->fields, r3);
     int d2 = get_field(s->fields, d2);
     int b2 = get_field(s->fields, b2);
+    DisasJumpType ret = DISAS_NEXT;
     TCGv_i64 addr;
     TCGv_i32 t_r1, t_r3;
 
@@ -2039,17 +2041,20 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
     addr = get_address(s, 0, b2, d2);
     t_r1 = tcg_const_i32(r1);
     t_r3 = tcg_const_i32(r3);
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
+    } else if (HAVE_CMPXCHG128) {
         gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
     } else {
-        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
+        gen_helper_exit_atomic(cpu_env);
+        ret = DISAS_NORETURN;
     }
     tcg_temp_free_i64(addr);
     tcg_temp_free_i32(t_r1);
     tcg_temp_free_i32(t_r3);
 
     set_cc_static(s);
-    return DISAS_NEXT;
+    return ret;
 }
 
 static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
@@ -3036,10 +3041,13 @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
 {
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_lpq(o->out, cpu_env, o->in2);
+    } else if (HAVE_ATOMIC128) {
         gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
     } else {
-        gen_helper_lpq(o->out, cpu_env, o->in2);
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
     }
     return_low128(o->out2);
     return DISAS_NEXT;
@@ -4462,10 +4470,13 @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
 {
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+    } else if (HAVE_ATOMIC128) {
         gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
     } else {
-        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
     }
     return DISAS_NEXT;
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations
  2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
                   ` (8 preceding siblings ...)
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 9/9] target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate Richard Henderson
@ 2018-10-09 18:51 ` Emilio G. Cota
  2018-10-11  8:10   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
  9 siblings, 1 reply; 20+ messages in thread
From: Emilio G. Cota @ 2018-10-09 18:51 UTC (permalink / raw)
  To: qemu-devel
  Cc: Richard Henderson, David Gibson, Alexander Graf, Cornelia Huck,
	David Hildenbrand, qemu-ppc, qemu-s390x

On Wed, Oct 03, 2018 at 14:39:22 -0500, Richard Henderson wrote:
(snip)
> Richard Henderson (9):
>   tcg: Split CONFIG_ATOMIC128
>   target/i386: Convert to HAVE_CMPXCHG128
>   target/arm: Convert to HAVE_CMPXCHG128
>   target/arm: Check HAVE_CMPXCHG128 at translate time
>   target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
>   target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
>   target/s390x: Split do_cdsg, do_lpq, do_stpq
>   target/s390x: Skip wout, cout helpers if op helper does not return
>   target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate

Cc'ing ppc/s390x maintainers -- I'm not sure they've seen this series.
Link to this thread (v3):
  https://lists.gnu.org/archive/html/qemu-devel/2018-10/msg00598.html

I'm eager to get this merged, particularly for i386 where it is
fixing unnecessary exits via exec_atomic for cmpxchg16 emulation.

Thanks,

		Emilio

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [qemu-s390x] [PATCH v3 6/9] target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 6/9] target/s390x: " Richard Henderson
@ 2018-10-11  7:58   ` David Hildenbrand
  0 siblings, 0 replies; 20+ messages in thread
From: David Hildenbrand @ 2018-10-11  7:58 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: qemu-s390x

On 03/10/2018 21:39, Richard Henderson wrote:
> Cc: qemu-s390x@nongnu.org
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/s390x/mem_helper.c | 92 +++++++++++++++++----------------------
>  1 file changed, 41 insertions(+), 51 deletions(-)
> 
> diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> index bacae4f503..e106f61b4e 100644
> --- a/target/s390x/mem_helper.c
> +++ b/target/s390x/mem_helper.c
> @@ -25,6 +25,7 @@
>  #include "exec/exec-all.h"
>  #include "exec/cpu_ldst.h"
>  #include "qemu/int128.h"
> +#include "qemu/atomic128.h"
>  
>  #if !defined(CONFIG_USER_ONLY)
>  #include "hw/s390x/storage-keys.h"
> @@ -1389,7 +1390,7 @@ static void do_cdsg(CPUS390XState *env, uint64_t addr,
>      bool fail;
>  
>      if (parallel) {
> -#ifndef CONFIG_ATOMIC128
> +#if !HAVE_CMPXCHG128
>          cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
>  #else
>          int mem_idx = cpu_mmu_index(env, false);
> @@ -1435,9 +1436,7 @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
>  static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
>                          uint64_t a2, bool parallel)
>  {
> -#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
>      uint32_t mem_idx = cpu_mmu_index(env, false);
> -#endif
>      uintptr_t ra = GETPC();
>      uint32_t fc = extract32(env->regs[0], 0, 8);
>      uint32_t sc = extract32(env->regs[0], 8, 8);
> @@ -1465,18 +1464,20 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
>      probe_write(env, a2, 0, mem_idx, ra);
>  #endif
>  
> -    /* Note that the compare-and-swap is atomic, and the store is atomic, but
> -       the complete operation is not.  Therefore we do not need to assert serial
> -       context in order to implement this.  That said, restart early if we can't
> -       support either operation that is supposed to be atomic.  */
> +    /*
> +     * Note that the compare-and-swap is atomic, and the store is atomic,
> +     * but the complete operation is not.  Therefore we do not need to
> +     * assert serial context in order to implement this.  That said,
> +     * restart early if we can't support either operation that is supposed
> +     * to be atomic.
> +     */
>      if (parallel) {
> -        int mask = 0;
> -#if !defined(CONFIG_ATOMIC64)
> -        mask = -8;
> -#elif !defined(CONFIG_ATOMIC128)
> -        mask = -16;
> +        uint32_t max = 2;
> +#ifdef CONFIG_ATOMIC64
> +        max = 3;
>  #endif
> -        if (((4 << fc) | (1 << sc)) & mask) {
> +        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
> +            (HAVE_ATOMIC128  ? 0 : sc > max)) {
>              cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
>          }
>      }
> @@ -1546,16 +1547,7 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
>              Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
>              Int128 ov;
>  
> -            if (parallel) {
> -#ifdef CONFIG_ATOMIC128
> -                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> -                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
> -                cc = !int128_eq(ov, cv);
> -#else
> -                /* Note that we asserted !parallel above.  */
> -                g_assert_not_reached();
> -#endif
> -            } else {
> +            if (!parallel) {
>                  uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
>                  uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
>  
> @@ -1567,6 +1559,13 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
>  
>                  cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
>                  cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
> +            } else if (HAVE_CMPXCHG128) {
> +                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> +                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
> +                cc = !int128_eq(ov, cv);
> +            } else {
> +                /* Note that we asserted !parallel above.  */
> +                g_assert_not_reached();
>              }
>  
>              env->regs[r3 + 0] = int128_gethi(ov);
> @@ -1596,18 +1595,16 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
>              cpu_stq_data_ra(env, a2, svh, ra);
>              break;
>          case 4:
> -            if (parallel) {
> -#ifdef CONFIG_ATOMIC128
> +            if (!parallel) {
> +                cpu_stq_data_ra(env, a2 + 0, svh, ra);
> +                cpu_stq_data_ra(env, a2 + 8, svl, ra);
> +            } else if (HAVE_ATOMIC128) {
>                  TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
>                  Int128 sv = int128_make128(svl, svh);
>                  helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
> -#else
> +            } else {
>                  /* Note that we asserted !parallel above.  */
>                  g_assert_not_reached();
> -#endif
> -            } else {
> -                cpu_stq_data_ra(env, a2 + 0, svh, ra);
> -                cpu_stq_data_ra(env, a2 + 8, svl, ra);
>              }
>              break;
>          default:
> @@ -2105,21 +2102,18 @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
>      uintptr_t ra = GETPC();
>      uint64_t hi, lo;
>  
> -    if (parallel) {
> -#ifndef CONFIG_ATOMIC128
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -#else
> +    if (!parallel) {
> +        check_alignment(env, addr, 16, ra);
> +        hi = cpu_ldq_data_ra(env, addr + 0, ra);
> +        lo = cpu_ldq_data_ra(env, addr + 8, ra);
> +    } else if (HAVE_ATOMIC128) {
>          int mem_idx = cpu_mmu_index(env, false);
>          TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
>          Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
>          hi = int128_gethi(v);
>          lo = int128_getlo(v);
> -#endif
>      } else {
> -        check_alignment(env, addr, 16, ra);
> -
> -        hi = cpu_ldq_data_ra(env, addr + 0, ra);
> -        lo = cpu_ldq_data_ra(env, addr + 8, ra);
> +        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
>      }
>  
>      env->retxl = lo;
> @@ -2142,21 +2136,17 @@ static void do_stpq(CPUS390XState *env, uint64_t addr,
>  {
>      uintptr_t ra = GETPC();
>  
> -    if (parallel) {
> -#ifndef CONFIG_ATOMIC128
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -#else
> -        int mem_idx = cpu_mmu_index(env, false);
> -        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> -
> -        Int128 v = int128_make128(low, high);
> -        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
> -#endif
> -    } else {
> +    if (!parallel) {
>          check_alignment(env, addr, 16, ra);
> -
>          cpu_stq_data_ra(env, addr + 0, high, ra);
>          cpu_stq_data_ra(env, addr + 8, low, ra);
> +    } else if (HAVE_ATOMIC128) {
> +        int mem_idx = cpu_mmu_index(env, false);
> +        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> +        Int128 v = int128_make128(low, high);
> +        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
> +    } else {
> +        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
>      }
>  }
>  
> 

Reviewed-by: David Hildenbrand <david@redhat.com>

Only case 1 still looks fairly ugly with the ifdefs.

-- 

Thanks,

David / dhildenb

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [qemu-s390x] [PATCH v3 7/9] target/s390x: Split do_cdsg, do_lpq, do_stpq
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 7/9] target/s390x: Split do_cdsg, do_lpq, do_stpq Richard Henderson
@ 2018-10-11  8:02   ` David Hildenbrand
  0 siblings, 0 replies; 20+ messages in thread
From: David Hildenbrand @ 2018-10-11  8:02 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: qemu-s390x

On 03/10/2018 21:39, Richard Henderson wrote:
> Cc: qemu-s390x@nongnu.org
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/s390x/mem_helper.c | 128 ++++++++++++++++++--------------------
>  1 file changed, 61 insertions(+), 67 deletions(-)
> 
> diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> index e106f61b4e..b5858d2fa2 100644
> --- a/target/s390x/mem_helper.c
> +++ b/target/s390x/mem_helper.c
> @@ -1380,57 +1380,58 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
>      return cc;
>  }
>  
> -static void do_cdsg(CPUS390XState *env, uint64_t addr,
> -                    uint32_t r1, uint32_t r3, bool parallel)
> +void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
> +                  uint32_t r1, uint32_t r3)
>  {
>      uintptr_t ra = GETPC();
>      Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
>      Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
>      Int128 oldv;
> +    uint64_t oldh, oldl;
>      bool fail;
>  
> -    if (parallel) {
> -#if !HAVE_CMPXCHG128
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -#else
> -        int mem_idx = cpu_mmu_index(env, false);
> -        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> -        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
> -        fail = !int128_eq(oldv, cmpv);
> -#endif
> -    } else {
> -        uint64_t oldh, oldl;
> +    check_alignment(env, addr, 16, ra);
>  
> -        check_alignment(env, addr, 16, ra);
> +    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
> +    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
>  
> -        oldh = cpu_ldq_data_ra(env, addr + 0, ra);
> -        oldl = cpu_ldq_data_ra(env, addr + 8, ra);
> -
> -        oldv = int128_make128(oldl, oldh);
> -        fail = !int128_eq(oldv, cmpv);
> -        if (fail) {
> -            newv = oldv;
> -        }
> -
> -        cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
> -        cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
> +    oldv = int128_make128(oldl, oldh);
> +    fail = !int128_eq(oldv, cmpv);
> +    if (fail) {
> +        newv = oldv;
>      }
>  
> +    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
> +    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
> +
>      env->cc_op = fail;
>      env->regs[r1] = int128_gethi(oldv);
>      env->regs[r1 + 1] = int128_getlo(oldv);
>  }
>  
> -void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
> -                  uint32_t r1, uint32_t r3)
> -{
> -    do_cdsg(env, addr, r1, r3, false);
> -}
> -
>  void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
>                             uint32_t r1, uint32_t r3)
>  {
> -    do_cdsg(env, addr, r1, r3, true);
> +    uintptr_t ra = GETPC();
> +    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
> +    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
> +    int mem_idx;
> +    TCGMemOpIdx oi;
> +    Int128 oldv;
> +    bool fail;
> +
> +    if (!HAVE_CMPXCHG128) {
> +        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> +    }
> +
> +    mem_idx = cpu_mmu_index(env, false);
> +    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> +    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
> +    fail = !int128_eq(oldv, cmpv);
> +
> +    env->cc_op = fail;
> +    env->regs[r1] = int128_gethi(oldv);
> +    env->regs[r1 + 1] = int128_getlo(oldv);
>  }
>  
>  static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
> @@ -2097,16 +2098,25 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
>  #endif
>  
>  /* load pair from quadword */
> -static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
> +uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
>  {
>      uintptr_t ra = GETPC();
>      uint64_t hi, lo;
>  
> -    if (!parallel) {
> -        check_alignment(env, addr, 16, ra);
> -        hi = cpu_ldq_data_ra(env, addr + 0, ra);
> -        lo = cpu_ldq_data_ra(env, addr + 8, ra);
> -    } else if (HAVE_ATOMIC128) {
> +    check_alignment(env, addr, 16, ra);
> +    hi = cpu_ldq_data_ra(env, addr + 0, ra);
> +    lo = cpu_ldq_data_ra(env, addr + 8, ra);
> +
> +    env->retxl = lo;
> +    return hi;
> +}
> +
> +uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
> +{
> +    uintptr_t ra = GETPC();
> +    uint64_t hi, lo;
> +
> +    if (HAVE_ATOMIC128) {
>          int mem_idx = cpu_mmu_index(env, false);
>          TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
>          Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
> @@ -2120,27 +2130,23 @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
>      return hi;
>  }
>  
> -uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
> -{
> -    return do_lpq(env, addr, false);
> -}
> -
> -uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
> -{
> -    return do_lpq(env, addr, true);
> -}
> -
>  /* store pair to quadword */
> -static void do_stpq(CPUS390XState *env, uint64_t addr,
> -                    uint64_t low, uint64_t high, bool parallel)
> +void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
> +                  uint64_t low, uint64_t high)
>  {
>      uintptr_t ra = GETPC();
>  
> -    if (!parallel) {
> -        check_alignment(env, addr, 16, ra);
> -        cpu_stq_data_ra(env, addr + 0, high, ra);
> -        cpu_stq_data_ra(env, addr + 8, low, ra);
> -    } else if (HAVE_ATOMIC128) {
> +    check_alignment(env, addr, 16, ra);
> +    cpu_stq_data_ra(env, addr + 0, high, ra);
> +    cpu_stq_data_ra(env, addr + 8, low, ra);
> +}
> +
> +void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
> +                           uint64_t low, uint64_t high)
> +{
> +    uintptr_t ra = GETPC();
> +
> +    if (HAVE_ATOMIC128) {
>          int mem_idx = cpu_mmu_index(env, false);
>          TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
>          Int128 v = int128_make128(low, high);
> @@ -2150,18 +2156,6 @@ static void do_stpq(CPUS390XState *env, uint64_t addr,
>      }
>  }
>  
> -void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
> -                  uint64_t low, uint64_t high)
> -{
> -    do_stpq(env, addr, low, high, false);
> -}
> -
> -void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
> -                           uint64_t low, uint64_t high)
> -{
> -    do_stpq(env, addr, low, high, true);
> -}
> -
>  /* Execute instruction.  This instruction executes an insn modified with
>     the contents of r1.  It does not change the executed instruction in memory;
>     it does not change the program counter.
> 

Way easier to read.

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 

Thanks,

David / dhildenb

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [qemu-s390x] [PATCH v3 8/9] target/s390x: Skip wout, cout helpers if op helper does not return
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 8/9] target/s390x: Skip wout, cout helpers if op helper does not return Richard Henderson
@ 2018-10-11  8:06   ` David Hildenbrand
  2018-10-11 16:47     ` Richard Henderson
  0 siblings, 1 reply; 20+ messages in thread
From: David Hildenbrand @ 2018-10-11  8:06 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: qemu-s390x

On 03/10/2018 21:39, Richard Henderson wrote:
> When op raises an exception, it may not have initialized the output
> temps that would be written back by wout or cout.
> 
> Cc: qemu-s390x@nongnu.org
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/s390x/translate.c | 12 +++++++-----
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index 7363aabf3a..7fad3ad8e9 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -6164,11 +6164,13 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
>      if (insn->help_op) {
>          ret = insn->help_op(s, &o);
>      }
> -    if (insn->help_wout) {
> -        insn->help_wout(s, &f, &o);
> -    }
> -    if (insn->help_cout) {
> -        insn->help_cout(s, &o);
> +    if (ret != DISAS_NORETURN) {
> +        if (insn->help_wout) {
> +            insn->help_wout(s, &f, &o);
> +        }
> +        if (insn->help_cout) {
> +            insn->help_cout(s, &o);
> +        }
>      }
>  
>      /* Free any temporaries created by the helpers.  */
> 

What about things like LPSW/LPWSE ? They certainly don't imply that we
had an exception.

(these two don't use wout/cout, so it is still fine, but I would prefer
a comment somewhere because otherwise it is really easy to miss that
DISAS_NORETURN makes us skip these handlers)

Apart from that

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 

Thanks,

David / dhildenb

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [qemu-s390x] [PATCH v3 9/9] target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 9/9] target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate Richard Henderson
@ 2018-10-11  8:09   ` David Hildenbrand
  0 siblings, 0 replies; 20+ messages in thread
From: David Hildenbrand @ 2018-10-11  8:09 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: qemu-s390x

On 03/10/2018 21:39, Richard Henderson wrote:
> Cc: qemu-s390x@nongnu.org
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/s390x/mem_helper.c | 40 +++++++++++++++++++--------------------
>  target/s390x/translate.c  | 25 +++++++++++++++++-------
>  2 files changed, 38 insertions(+), 27 deletions(-)
> 
> diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> index b5858d2fa2..490c43e6e6 100644
> --- a/target/s390x/mem_helper.c
> +++ b/target/s390x/mem_helper.c
> @@ -1420,9 +1420,7 @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
>      Int128 oldv;
>      bool fail;
>  
> -    if (!HAVE_CMPXCHG128) {
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -    }
> +    assert(HAVE_CMPXCHG128);
>  
>      mem_idx = cpu_mmu_index(env, false);
>      oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> @@ -2115,16 +2113,17 @@ uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
>  {
>      uintptr_t ra = GETPC();
>      uint64_t hi, lo;
> +    int mem_idx;
> +    TCGMemOpIdx oi;
> +    Int128 v;
>  
> -    if (HAVE_ATOMIC128) {
> -        int mem_idx = cpu_mmu_index(env, false);
> -        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> -        Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
> -        hi = int128_gethi(v);
> -        lo = int128_getlo(v);
> -    } else {
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -    }
> +    assert(HAVE_ATOMIC128);
> +
> +    mem_idx = cpu_mmu_index(env, false);
> +    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> +    v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
> +    hi = int128_gethi(v);
> +    lo = int128_getlo(v);
>  
>      env->retxl = lo;
>      return hi;
> @@ -2145,15 +2144,16 @@ void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
>                             uint64_t low, uint64_t high)
>  {
>      uintptr_t ra = GETPC();
> +    int mem_idx;
> +    TCGMemOpIdx oi;
> +    Int128 v;
>  
> -    if (HAVE_ATOMIC128) {
> -        int mem_idx = cpu_mmu_index(env, false);
> -        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> -        Int128 v = int128_make128(low, high);
> -        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
> -    } else {
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -    }
> +    assert(HAVE_ATOMIC128);
> +
> +    mem_idx = cpu_mmu_index(env, false);
> +    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
> +    v = int128_make128(low, high);
> +    helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
>  }
>  
>  /* Execute instruction.  This instruction executes an insn modified with
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index 7fad3ad8e9..57fe74e4a0 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -44,6 +44,7 @@
>  #include "trace-tcg.h"
>  #include "exec/translator.h"
>  #include "exec/log.h"
> +#include "qemu/atomic128.h"
>  
>  
>  /* Information that (most) every instruction needs to manipulate.  */
> @@ -2032,6 +2033,7 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
>      int r3 = get_field(s->fields, r3);
>      int d2 = get_field(s->fields, d2);
>      int b2 = get_field(s->fields, b2);
> +    DisasJumpType ret = DISAS_NEXT;
>      TCGv_i64 addr;
>      TCGv_i32 t_r1, t_r3;
>  
> @@ -2039,17 +2041,20 @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
>      addr = get_address(s, 0, b2, d2);
>      t_r1 = tcg_const_i32(r1);
>      t_r3 = tcg_const_i32(r3);
> -    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
> +    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
> +        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
> +    } else if (HAVE_CMPXCHG128) {
>          gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
>      } else {
> -        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
> +        gen_helper_exit_atomic(cpu_env);
> +        ret = DISAS_NORETURN;
>      }
>      tcg_temp_free_i64(addr);
>      tcg_temp_free_i32(t_r1);
>      tcg_temp_free_i32(t_r3);
>  
>      set_cc_static(s);
> -    return DISAS_NEXT;
> +    return ret;
>  }
>  
>  static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
> @@ -3036,10 +3041,13 @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
>  
>  static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
>  {
> -    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
> +    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
> +        gen_helper_lpq(o->out, cpu_env, o->in2);
> +    } else if (HAVE_ATOMIC128) {
>          gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
>      } else {
> -        gen_helper_lpq(o->out, cpu_env, o->in2);
> +        gen_helper_exit_atomic(cpu_env);
> +        return DISAS_NORETURN;
>      }
>      return_low128(o->out2);
>      return DISAS_NEXT;
> @@ -4462,10 +4470,13 @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
>  
>  static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
>  {
> -    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
> +    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
> +        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
> +    } else if (HAVE_ATOMIC128) {
>          gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
>      } else {
> -        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
> +        gen_helper_exit_atomic(cpu_env);
> +        return DISAS_NORETURN;
>      }
>      return DISAS_NEXT;
>  }
> 

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 

Thanks,

David / dhildenb

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [qemu-s390x] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations
  2018-10-09 18:51 ` [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Emilio G. Cota
@ 2018-10-11  8:10   ` David Hildenbrand
  0 siblings, 0 replies; 20+ messages in thread
From: David Hildenbrand @ 2018-10-11  8:10 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Cornelia Huck, Richard Henderson, Alexander Graf, qemu-s390x,
	qemu-ppc, David Gibson

On 09/10/2018 20:51, Emilio G. Cota wrote:
> On Wed, Oct 03, 2018 at 14:39:22 -0500, Richard Henderson wrote:
> (snip)
>> Richard Henderson (9):
>>   tcg: Split CONFIG_ATOMIC128
>>   target/i386: Convert to HAVE_CMPXCHG128
>>   target/arm: Convert to HAVE_CMPXCHG128
>>   target/arm: Check HAVE_CMPXCHG128 at translate time
>>   target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
>>   target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
>>   target/s390x: Split do_cdsg, do_lpq, do_stpq
>>   target/s390x: Skip wout, cout helpers if op helper does not return
>>   target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate
> 
> Cc'ing ppc/s390x maintainers -- I'm not sure they've seen this series.
> Link to this thread (v3):
>   https://lists.gnu.org/archive/html/qemu-devel/2018-10/msg00598.html
> 
> I'm eager to get this merged, particularly for i386 where it is
> fixing unnecessary exits via exec_atomic for cmpxchg16 emulation.
> 
> Thanks,
> 
> 		Emilio
> 

Thanks for the CC, I indeed haven't seen it.

-- 

Thanks,

David / dhildenb

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/9] target/arm: Check HAVE_CMPXCHG128 at translate time
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 4/9] target/arm: Check HAVE_CMPXCHG128 at translate time Richard Henderson
@ 2018-10-11 10:55   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 20+ messages in thread
From: Philippe Mathieu-Daudé @ 2018-10-11 10:55 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 03/10/2018 21:39, Richard Henderson wrote:
> Reviewed-by: Emilio G. Cota <cota@braap.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>

> ---
>  target/arm/helper-a64.c    | 16 ++++------------
>  target/arm/translate-a64.c | 38 ++++++++++++++++++++++----------------
>  2 files changed, 26 insertions(+), 28 deletions(-)
> 
> diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
> index 6e4e1b8a19..61799d20e1 100644
> --- a/target/arm/helper-a64.c
> +++ b/target/arm/helper-a64.c
> @@ -563,9 +563,7 @@ uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
>      int mem_idx;
>      TCGMemOpIdx oi;
>  
> -    if (!HAVE_CMPXCHG128) {
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -    }
> +    assert(HAVE_CMPXCHG128);
>  
>      mem_idx = cpu_mmu_index(env, false);
>      oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
> @@ -635,9 +633,7 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
>      int mem_idx;
>      TCGMemOpIdx oi;
>  
> -    if (!HAVE_CMPXCHG128) {
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -    }
> +    assert(HAVE_CMPXCHG128);
>  
>      mem_idx = cpu_mmu_index(env, false);
>      oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
> @@ -663,9 +659,7 @@ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
>      int mem_idx;
>      TCGMemOpIdx oi;
>  
> -    if (!HAVE_CMPXCHG128) {
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -    }
> +    assert(HAVE_CMPXCHG128);
>  
>      mem_idx = cpu_mmu_index(env, false);
>      oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
> @@ -686,9 +680,7 @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
>      int mem_idx;
>      TCGMemOpIdx oi;
>  
> -    if (!HAVE_CMPXCHG128) {
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -    }
> +    assert(HAVE_CMPXCHG128);
>  
>      mem_idx = cpu_mmu_index(env, false);
>      oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 8ca3876707..77ee8d9085 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -37,6 +37,7 @@
>  
>  #include "trace-tcg.h"
>  #include "translate-a64.h"
> +#include "qemu/atomic128.h"
>  
>  static TCGv_i64 cpu_X[32];
>  static TCGv_i64 cpu_pc;
> @@ -2082,26 +2083,27 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>                                         get_mem_index(s),
>                                         MO_64 | MO_ALIGN | s->be_data);
>              tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
> -        } else if (s->be_data == MO_LE) {
> -            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
> +        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
> +            if (!HAVE_CMPXCHG128) {
> +                gen_helper_exit_atomic(cpu_env);
> +                s->base.is_jmp = DISAS_NORETURN;
> +            } else if (s->be_data == MO_LE) {
>                  gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
>                                                          cpu_exclusive_addr,
>                                                          cpu_reg(s, rt),
>                                                          cpu_reg(s, rt2));
>              } else {
> -                gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
> -                                               cpu_reg(s, rt), cpu_reg(s, rt2));
> -            }
> -        } else {
> -            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
>                  gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
>                                                          cpu_exclusive_addr,
>                                                          cpu_reg(s, rt),
>                                                          cpu_reg(s, rt2));
> -            } else {
> -                gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
> -                                               cpu_reg(s, rt), cpu_reg(s, rt2));
>              }
> +        } else if (s->be_data == MO_LE) {
> +            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
> +                                           cpu_reg(s, rt), cpu_reg(s, rt2));
> +        } else {
> +            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
> +                                           cpu_reg(s, rt), cpu_reg(s, rt2));
>          }
>      } else {
>          tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
> @@ -2171,14 +2173,18 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
>          }
>          tcg_temp_free_i64(cmp);
>      } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
> -        TCGv_i32 tcg_rs = tcg_const_i32(rs);
> -
> -        if (s->be_data == MO_LE) {
> -            gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
> +        if (HAVE_CMPXCHG128) {
> +            TCGv_i32 tcg_rs = tcg_const_i32(rs);
> +            if (s->be_data == MO_LE) {
> +                gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
> +            } else {
> +                gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
> +            }
> +            tcg_temp_free_i32(tcg_rs);
>          } else {
> -            gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
> +            gen_helper_exit_atomic(cpu_env);
> +            s->base.is_jmp = DISAS_NORETURN;
>          }
> -        tcg_temp_free_i32(tcg_rs);
>      } else {
>          TCGv_i64 d1 = tcg_temp_new_i64();
>          TCGv_i64 d2 = tcg_temp_new_i64();
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [PATCH v3 2/9] target/i386: Convert to HAVE_CMPXCHG128
  2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 2/9] target/i386: Convert to HAVE_CMPXCHG128 Richard Henderson
@ 2018-10-11 10:55   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 20+ messages in thread
From: Philippe Mathieu-Daudé @ 2018-10-11 10:55 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 03/10/2018 21:39, Richard Henderson wrote:
> Reviewed-by: Emilio G. Cota <cota@braap.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>

> ---
>  target/i386/mem_helper.c | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c
> index 30c26b9d9c..6cc53bcb40 100644
> --- a/target/i386/mem_helper.c
> +++ b/target/i386/mem_helper.c
> @@ -23,6 +23,7 @@
>  #include "exec/exec-all.h"
>  #include "exec/cpu_ldst.h"
>  #include "qemu/int128.h"
> +#include "qemu/atomic128.h"
>  #include "tcg.h"
>  
>  void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
> @@ -137,10 +138,7 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
>  
>      if ((a0 & 0xf) != 0) {
>          raise_exception_ra(env, EXCP0D_GPF, ra);
> -    } else {
> -#ifndef CONFIG_ATOMIC128
> -        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> -#else
> +    } else if (HAVE_CMPXCHG128) {
>          int eflags = cpu_cc_compute_all(env, CC_OP);
>  
>          Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
> @@ -159,7 +157,8 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
>              eflags &= ~CC_Z;
>          }
>          CC_SRC = eflags;
> -#endif
> +    } else {
> +        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
>      }
>  }
>  #endif
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [qemu-s390x] [PATCH v3 8/9] target/s390x: Skip wout, cout helpers if op helper does not return
  2018-10-11  8:06   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
@ 2018-10-11 16:47     ` Richard Henderson
  2018-10-16  8:13       ` David Hildenbrand
  0 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2018-10-11 16:47 UTC (permalink / raw)
  To: David Hildenbrand, qemu-devel; +Cc: qemu-s390x

On 10/11/18 1:06 AM, David Hildenbrand wrote:
> On 03/10/2018 21:39, Richard Henderson wrote:
>> When op raises an exception, it may not have initialized the output
>> temps that would be written back by wout or cout.
>>
>> Cc: qemu-s390x@nongnu.org
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>  target/s390x/translate.c | 12 +++++++-----
>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>
>> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
>> index 7363aabf3a..7fad3ad8e9 100644
>> --- a/target/s390x/translate.c
>> +++ b/target/s390x/translate.c
>> @@ -6164,11 +6164,13 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
>>      if (insn->help_op) {
>>          ret = insn->help_op(s, &o);
>>      }
>> -    if (insn->help_wout) {
>> -        insn->help_wout(s, &f, &o);
>> -    }
>> -    if (insn->help_cout) {
>> -        insn->help_cout(s, &o);
>> +    if (ret != DISAS_NORETURN) {
>> +        if (insn->help_wout) {
>> +            insn->help_wout(s, &f, &o);
>> +        }
>> +        if (insn->help_cout) {
>> +            insn->help_cout(s, &o);
>> +        }
>>      }
>>  
>>      /* Free any temporaries created by the helpers.  */
>>
> 
> What about things like LPSW/LPWSE ? They certainly don't imply that we
> had an exception.

Exception in the tcg sense, not the guest architectural sense, in that we call
cpu_loop_exit from the helper, which performs a longjmp.  (Incidentally,
there's no reason to do that for load_psw -- we could just exit the tb normally.)

> (these two don't use wout/cout, so it is still fine, but I would prefer
> a comment somewhere because otherwise it is really easy to miss that
> DISAS_NORETURN makes us skip these handlers)

Where would you like me to place that comment?  In the DisasInsn definition?


r~

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] [qemu-s390x] [PATCH v3 8/9] target/s390x: Skip wout, cout helpers if op helper does not return
  2018-10-11 16:47     ` Richard Henderson
@ 2018-10-16  8:13       ` David Hildenbrand
  0 siblings, 0 replies; 20+ messages in thread
From: David Hildenbrand @ 2018-10-16  8:13 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: qemu-s390x

On 11/10/2018 18:47, Richard Henderson wrote:
> On 10/11/18 1:06 AM, David Hildenbrand wrote:
>> On 03/10/2018 21:39, Richard Henderson wrote:
>>> When op raises an exception, it may not have initialized the output
>>> temps that would be written back by wout or cout.
>>>
>>> Cc: qemu-s390x@nongnu.org
>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>>> ---
>>>  target/s390x/translate.c | 12 +++++++-----
>>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
>>> index 7363aabf3a..7fad3ad8e9 100644
>>> --- a/target/s390x/translate.c
>>> +++ b/target/s390x/translate.c
>>> @@ -6164,11 +6164,13 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
>>>      if (insn->help_op) {
>>>          ret = insn->help_op(s, &o);
>>>      }
>>> -    if (insn->help_wout) {
>>> -        insn->help_wout(s, &f, &o);
>>> -    }
>>> -    if (insn->help_cout) {
>>> -        insn->help_cout(s, &o);
>>> +    if (ret != DISAS_NORETURN) {
>>> +        if (insn->help_wout) {
>>> +            insn->help_wout(s, &f, &o);
>>> +        }
>>> +        if (insn->help_cout) {
>>> +            insn->help_cout(s, &o);
>>> +        }
>>>      }
>>>  
>>>      /* Free any temporaries created by the helpers.  */
>>>
>>
>> What about things like LPSW/LPWSE ? They certainly don't imply that we
>> had an exception.
> 
> Exception in the tcg sense, not the guest architectural sense, in that we call
> cpu_loop_exit from the helper, which performs a longjmp.  (Incidentally,
> there's no reason to do that for load_psw -- we could just exit the tb normally.)
> 
>> (these two don't use wout/cout, so it is still fine, but I would prefer
>> a comment somewhere because otherwise it is really easy to miss that
>> DISAS_NORETURN makes us skip these handlers)
> 
> Where would you like me to place that comment?  In the DisasInsn definition?

That probably makes sense!

> 
> 
> r~
> 


-- 

Thanks,

David / dhildenb

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2018-10-16  8:13 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-03 19:39 [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Richard Henderson
2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 1/9] tcg: Split CONFIG_ATOMIC128 Richard Henderson
2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 2/9] target/i386: Convert to HAVE_CMPXCHG128 Richard Henderson
2018-10-11 10:55   ` Philippe Mathieu-Daudé
2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 3/9] target/arm: " Richard Henderson
2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 4/9] target/arm: Check HAVE_CMPXCHG128 at translate time Richard Henderson
2018-10-11 10:55   ` Philippe Mathieu-Daudé
2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 5/9] target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128 Richard Henderson
2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 6/9] target/s390x: " Richard Henderson
2018-10-11  7:58   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 7/9] target/s390x: Split do_cdsg, do_lpq, do_stpq Richard Henderson
2018-10-11  8:02   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 8/9] target/s390x: Skip wout, cout helpers if op helper does not return Richard Henderson
2018-10-11  8:06   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
2018-10-11 16:47     ` Richard Henderson
2018-10-16  8:13       ` David Hildenbrand
2018-10-03 19:39 ` [Qemu-devel] [PATCH v3 9/9] target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate Richard Henderson
2018-10-11  8:09   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand
2018-10-09 18:51 ` [Qemu-devel] [PATCH v3 0/9] tcg: Reorg 128-bit atomic operations Emilio G. Cota
2018-10-11  8:10   ` [Qemu-devel] [qemu-s390x] " David Hildenbrand

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.