All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: git@xen0n.name, gaosong@loongson.cn, philmd@linaro.org,
	qemu-arm@nongnu.org, qemu-riscv@nongnu.org,
	qemu-s390x@nongnu.org
Subject: [PATCH v4 15/57] accel/tcg: Use have_atomic16 in ldst_atomicity.c.inc
Date: Wed,  3 May 2023 08:06:14 +0100	[thread overview]
Message-ID: <20230503070656.1746170-16-richard.henderson@linaro.org> (raw)
In-Reply-To: <20230503070656.1746170-1-richard.henderson@linaro.org>

Hosts using Intel and AMD AVX cpus are quite common.
Add fast paths through ldst_atomicity using this.

Only enable with CONFIG_INT128; some older clang versions do not
support __int128_t, and the inline assembly won't work on structures.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/ldst_atomicity.c.inc | 76 +++++++++++++++++++++++++++-------
 1 file changed, 60 insertions(+), 16 deletions(-)

diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
index c43f101ebe..07bfa5c3c8 100644
--- a/accel/tcg/ldst_atomicity.c.inc
+++ b/accel/tcg/ldst_atomicity.c.inc
@@ -35,6 +35,14 @@
 
 #if defined(CONFIG_ATOMIC128)
 # define HAVE_al16_fast    true
+#elif defined(CONFIG_TCG_INTERPRETER)
+/*
+ * FIXME: host specific detection for this is in tcg/$host/,
+ * but we're using tcg/tci/ instead.
+ */
+# define HAVE_al16_fast    false
+#elif defined(__x86_64__) && defined(CONFIG_INT128)
+# define HAVE_al16_fast    likely(have_atomic16)
 #else
 # define HAVE_al16_fast    false
 #endif
@@ -162,6 +170,12 @@ load_atomic16(void *pv)
 
     r.u = qatomic_read__nocheck(p);
     return r.s;
+#elif defined(__x86_64__) && defined(CONFIG_INT128)
+    Int128Alias r;
+
+    /* Via HAVE_al16_fast, have_atomic16 is true. */
+    asm("vmovdqa %1, %0" : "=x" (r.u) : "m" (*(Int128 *)pv));
+    return r.s;
 #else
     qemu_build_not_reached();
 #endif
@@ -383,6 +397,24 @@ load_atom_extract_al16_or_al8(void *pv, int s)
         r = qatomic_read__nocheck(p16);
     }
     return r >> shr;
+#elif defined(__x86_64__) && defined(CONFIG_INT128)
+    uintptr_t pi = (uintptr_t)pv;
+    int shr = (pi & 7) * 8;
+    uint64_t a, b;
+
+    /* Via HAVE_al16_fast, have_atomic16 is true. */
+    pv = (void *)(pi & ~7);
+    if (pi & 8) {
+        uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
+        a = qatomic_read__nocheck(p8);
+        b = qatomic_read__nocheck(p8 + 1);
+    } else {
+        asm("vmovdqa %2, %0\n\tvpextrq $1, %0, %1"
+            : "=x"(a), "=r"(b) : "m" (*(__uint128_t *)pv));
+    }
+    asm("shrd %b2, %1, %0" : "+r"(a) : "r"(b), "c"(shr));
+
+    return a;
 #else
     qemu_build_not_reached();
 #endif
@@ -699,23 +731,35 @@ static inline void ATTRIBUTE_ATOMIC128_OPT
 store_atomic16(void *pv, Int128Alias val)
 {
 #if defined(CONFIG_ATOMIC128)
-    __uint128_t *pu = __builtin_assume_aligned(pv, 16);
-    qatomic_set__nocheck(pu, val.u);
-#elif defined(CONFIG_CMPXCHG128)
-    __uint128_t *pu = __builtin_assume_aligned(pv, 16);
-    __uint128_t o;
-
-    /*
-     * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
-     * defer to libatomic, so we must use __sync_val_compare_and_swap_16
-     * and accept the sequential consistency that comes with it.
-     */
-    do {
-        o = *pu;
-    } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
-#else
-    qemu_build_not_reached();
+    {
+        __uint128_t *pu = __builtin_assume_aligned(pv, 16);
+        qatomic_set__nocheck(pu, val.u);
+        return;
+    }
 #endif
+#if defined(__x86_64__) && defined(CONFIG_INT128)
+    if (HAVE_al16_fast) {
+        asm("vmovdqa %1, %0" : "=m"(*(__uint128_t *)pv) : "x" (val.u));
+        return;
+    }
+#endif
+#if defined(CONFIG_CMPXCHG128)
+    {
+        __uint128_t *pu = __builtin_assume_aligned(pv, 16);
+        __uint128_t o;
+
+        /*
+         * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
+         * defer to libatomic, so we must use __sync_val_compare_and_swap_16
+         * and accept the sequential consistency that comes with it.
+         */
+        do {
+            o = *pu;
+        } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
+        return;
+    }
+#endif
+    qemu_build_not_reached();
 }
 
 /**
-- 
2.34.1



  parent reply	other threads:[~2023-05-03  7:08 UTC|newest]

Thread overview: 132+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-03  7:05 [PATCH v4 00/57] tcg: Improve atomicity support Richard Henderson
2023-05-03  7:06 ` [PATCH v4 01/57] include/exec/memop: Add bits describing atomicity Richard Henderson
2023-05-04 14:49   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 02/57] accel/tcg: Add cpu_in_serial_context Richard Henderson
2023-05-03  7:06 ` [PATCH v4 03/57] accel/tcg: Introduce tlb_read_idx Richard Henderson
2023-05-04 15:02   ` Peter Maydell
2023-05-05 18:57     ` Richard Henderson
2023-05-07 10:09       ` Peter Maydell
2023-05-08 10:02         ` Richard Henderson
2023-05-03  7:06 ` [PATCH v4 04/57] accel/tcg: Reorg system mode load helpers Richard Henderson
2023-05-04 15:39   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 05/57] accel/tcg: Reorg system mode store helpers Richard Henderson
2023-05-04 15:44   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 06/57] accel/tcg: Honor atomicity of loads Richard Henderson
2023-05-04 17:17   ` Peter Maydell
2023-05-05 20:19     ` Richard Henderson
2023-05-09 12:04       ` Peter Maydell
2023-05-09 14:27         ` Richard Henderson
2023-05-09 14:33           ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 07/57] accel/tcg: Honor atomicity of stores Richard Henderson
2023-05-05  9:28   ` Peter Maydell
2023-05-08 10:11     ` Richard Henderson
2023-05-03  7:06 ` [PATCH v4 08/57] target/loongarch: Do not include tcg-ldst.h Richard Henderson
2023-05-05  9:29   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 09/57] tcg: Unify helper_{be,le}_{ld,st}* Richard Henderson
2023-05-05  9:36   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 10/57] accel/tcg: Implement helper_{ld, st}*_mmu for user-only Richard Henderson
2023-05-05  9:43   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 11/57] tcg/tci: Use helper_{ld,st}*_mmu " Richard Henderson
2023-05-05  9:44   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 12/57] tcg: Add 128-bit guest memory primitives Richard Henderson
2023-05-05 10:04   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 13/57] meson: Detect atomic128 support with optimization Richard Henderson
2023-05-05 10:29   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 14/57] tcg/i386: Add have_atomic16 Richard Henderson
2023-05-05 10:34   ` Peter Maydell
2023-05-08 13:41     ` Richard Henderson
2023-05-03  7:06 ` Richard Henderson [this message]
2023-05-05 10:37   ` [PATCH v4 15/57] accel/tcg: Use have_atomic16 in ldst_atomicity.c.inc Peter Maydell
2023-05-08 13:48     ` Richard Henderson
2023-05-03  7:06 ` [PATCH v4 16/57] accel/tcg: Add aarch64 specific support in ldst_atomicity Richard Henderson
2023-05-03  7:06 ` [PATCH v4 17/57] tcg/aarch64: Detect have_lse, have_lse2 for linux Richard Henderson
2023-05-05 10:41   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 18/57] tcg/aarch64: Detect have_lse, have_lse2 for darwin Richard Henderson
2023-05-05 10:43   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 19/57] accel/tcg: Add have_lse2 support in ldst_atomicity Richard Henderson
2023-05-03  7:06 ` [PATCH v4 20/57] tcg: Introduce TCG_OPF_TYPE_MASK Richard Henderson
2023-05-05 10:45   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 21/57] tcg/i386: Use full load/store helpers in user-only mode Richard Henderson
2023-05-05 12:01   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 22/57] tcg/aarch64: " Richard Henderson
2023-05-05 12:06   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 23/57] tcg/ppc: " Richard Henderson
2023-05-05 12:07   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 24/57] tcg/loongarch64: " Richard Henderson
2023-05-05 12:07   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 25/57] tcg/riscv: " Richard Henderson
2023-05-05 12:07   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 26/57] tcg/arm: Adjust constraints on qemu_ld/st Richard Henderson
2023-05-05 12:14   ` Peter Maydell
2023-05-08 15:13     ` Richard Henderson
2023-05-03  7:06 ` [PATCH v4 27/57] tcg/arm: Use full load/store helpers in user-only mode Richard Henderson
2023-05-05 12:15   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 28/57] tcg/mips: " Richard Henderson
2023-05-05 12:15   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 29/57] tcg/s390x: " Richard Henderson
2023-05-05 12:16   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 30/57] tcg/sparc64: Allocate %g2 as a third temporary Richard Henderson
2023-05-05 12:19   ` Peter Maydell
2023-05-08 15:17     ` Richard Henderson
2023-05-09  9:24       ` Peter Maydell
2023-05-09 14:34         ` Richard Henderson
2023-05-03  7:06 ` [PATCH v4 31/57] tcg/sparc64: Rename tcg_out_movi_imm13 to tcg_out_movi_s13 Richard Henderson
2023-05-05 12:20   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 32/57] tcg/sparc64: Rename tcg_out_movi_imm32 to tcg_out_movi_u32 Richard Henderson
2023-05-05 12:22   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 33/57] tcg/sparc64: Split out tcg_out_movi_s32 Richard Henderson
2023-05-05 12:23   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 34/57] tcg/sparc64: Use standard slow path for softmmu Richard Henderson
2023-05-05 12:26   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 35/57] accel/tcg: Remove helper_unaligned_{ld,st} Richard Henderson
2023-05-05 12:27   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 36/57] tcg/loongarch64: Assert the host supports unaligned accesses Richard Henderson
2023-05-05 12:30   ` Peter Maydell
2023-05-05 13:24   ` WANG Xuerui
2023-05-06  2:03     ` Song Gao
2023-05-03  7:06 ` [PATCH v4 37/57] tcg/loongarch64: Support softmmu " Richard Henderson
2023-05-05 12:35   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 38/57] tcg/riscv: " Richard Henderson
2023-05-05 10:35   ` LIU Zhiwei
2023-05-03  7:06 ` [PATCH v4 39/57] tcg: Introduce tcg_target_has_memory_bswap Richard Henderson
2023-05-05 12:41   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 40/57] tcg: Add INDEX_op_qemu_{ld,st}_i128 Richard Henderson
2023-05-05 12:45   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 41/57] tcg: Support TCG_TYPE_I128 in tcg_out_{ld, st}_helper_{args, ret} Richard Henderson
2023-05-05 12:53   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 42/57] tcg: Introduce atom_and_align_for_opc Richard Henderson
2023-05-05 13:03   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 43/57] tcg/i386: Use atom_and_align_for_opc Richard Henderson
2023-05-05 13:14   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 44/57] tcg/aarch64: " Richard Henderson
2023-05-05 13:15   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 45/57] tcg/arm: " Richard Henderson
2023-05-05 13:15   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 46/57] tcg/loongarch64: " Richard Henderson
2023-05-05 13:16   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 47/57] tcg/mips: " Richard Henderson
2023-05-05 13:17   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 48/57] tcg/ppc: " Richard Henderson
2023-05-05 13:18   ` Peter Maydell
2023-05-08 17:32     ` Richard Henderson
2023-05-03  7:06 ` [PATCH v4 49/57] tcg/riscv: " Richard Henderson
2023-05-05 13:19   ` Peter Maydell
2023-05-08 17:33     ` Richard Henderson
2023-05-03  7:06 ` [PATCH v4 50/57] tcg/s390x: " Richard Henderson
2023-05-05 13:20   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 51/57] tcg/sparc64: " Richard Henderson
2023-05-05 13:20   ` Peter Maydell
2023-05-08 17:34     ` Richard Henderson
2023-05-03  7:06 ` [PATCH v4 52/57] tcg/i386: Honor 64-bit atomicity in 32-bit mode Richard Henderson
2023-05-05 13:27   ` Peter Maydell
2023-05-08 16:15     ` Richard Henderson
2023-05-03  7:06 ` [PATCH v4 53/57] tcg/i386: Support 128-bit load/store with have_atomic16 Richard Henderson
2023-05-05 13:34   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 54/57] tcg/aarch64: Rename temporaries Richard Henderson
2023-05-05 13:36   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 55/57] tcg/aarch64: Support 128-bit load/store Richard Henderson
2023-05-05 13:41   ` Peter Maydell
2023-05-03  7:06 ` [PATCH v4 56/57] tcg/ppc: " Richard Henderson
2023-05-08 12:16   ` Daniel Henrique Barboza
2023-05-03  7:06 ` [PATCH v4 57/57] tcg/s390x: " Richard Henderson
2023-05-05 13:43 ` [PATCH v4 00/57] tcg: Improve atomicity support Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230503070656.1746170-16-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=gaosong@loongson.cn \
    --cc=git@xen0n.name \
    --cc=philmd@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.