All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PATCH for-8.0 20/29] tcg: Add INDEX_op_qemu_{ld,st}_i128
Date: Fri, 18 Nov 2022 01:47:45 -0800	[thread overview]
Message-ID: <20221118094754.242910-21-richard.henderson@linaro.org> (raw)
In-Reply-To: <20221118094754.242910-1-richard.henderson@linaro.org>

Add opcodes for backend support for 128-bit memory operations.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-opc.h        |  8 +++++
 tcg/aarch64/tcg-target.h     |  2 ++
 tcg/arm/tcg-target.h         |  2 ++
 tcg/i386/tcg-target.h        |  2 ++
 tcg/loongarch64/tcg-target.h |  2 ++
 tcg/mips/tcg-target.h        |  2 ++
 tcg/ppc/tcg-target.h         |  2 ++
 tcg/riscv/tcg-target.h       |  2 ++
 tcg/s390x/tcg-target.h       |  2 ++
 tcg/sparc64/tcg-target.h     |  2 ++
 tcg/tci/tcg-target.h         |  2 ++
 tcg/tcg-op.c                 | 67 ++++++++++++++++++++++++++++++++----
 tcg/tcg.c                    |  4 +++
 tcg/README                   | 10 ++++--
 14 files changed, 100 insertions(+), 9 deletions(-)

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index dd444734d9..94cf7c5d6a 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -213,6 +213,14 @@ DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
     TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
     IMPL(TCG_TARGET_HAS_qemu_st8_i32))
 
+/* Only for 64-bit hosts at the moment. */
+DEF(qemu_ld_i128, 2, 1, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
+DEF(qemu_st_i128, 0, 3, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
+
 /* Host vector support.  */
 
 #define IMPLVEC  TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index b8f734f371..b0fbf5b699 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -130,6 +130,8 @@ extern bool have_lse2;
 #define TCG_TARGET_HAS_mulsh_i64        1
 #define TCG_TARGET_HAS_direct_jump      1
 
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
 #define TCG_TARGET_HAS_v64              1
 #define TCG_TARGET_HAS_v128             1
 #define TCG_TARGET_HAS_v256             0
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 6613d3d791..8bcab0ac9b 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -126,6 +126,8 @@ extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_direct_jump      0
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
 #define TCG_TARGET_HAS_v64              use_neon_instructions
 #define TCG_TARGET_HAS_v128             use_neon_instructions
 #define TCG_TARGET_HAS_v256             0
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 5b037b1d2b..53d2cb3412 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -195,6 +195,8 @@ extern bool have_atomic16;
 #define TCG_TARGET_HAS_qemu_st8_i32     1
 #endif
 
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
 /* We do not support older SSE systems, only beginning with AVX1.  */
 #define TCG_TARGET_HAS_v64              have_avx1
 #define TCG_TARGET_HAS_v128             have_avx1
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 9d0db8fdfe..6cb702a108 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -173,6 +173,8 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
 #define TCG_TARGET_DEFAULT_MO (0)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index b235cba8ba..0897cfd8d5 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -204,6 +204,8 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_ext16u_i64       0 /* andi rt, rs, 0xffff */
 #endif
 
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
 #define TCG_TARGET_DEFAULT_MO (0)
 #define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index b5cd225cfa..920a746482 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -151,6 +151,8 @@ extern bool have_vsx;
 #define TCG_TARGET_HAS_mulsh_i64        1
 #endif
 
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
 /*
  * While technically Altivec could support V64, it has no 64-bit store
  * instruction and substituting two 32-bit stores makes the generated
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index d61ca902d3..205d513d08 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -168,6 +168,8 @@ typedef enum {
 #define TCG_TARGET_HAS_mulsh_i64        1
 #endif
 
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
 /* not defined -- call should be eliminated at compile time */
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 9a3856f0b3..f87905d1e4 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -139,6 +139,8 @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_muluh_i64      0
 #define TCG_TARGET_HAS_mulsh_i64      0
 
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
+
 #define TCG_TARGET_HAS_v64            HAVE_FACILITY(VECTOR)
 #define TCG_TARGET_HAS_v128           HAVE_FACILITY(VECTOR)
 #define TCG_TARGET_HAS_v256           0
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
index 53cfa843da..bfbfb51319 100644
--- a/tcg/sparc64/tcg-target.h
+++ b/tcg/sparc64/tcg-target.h
@@ -152,6 +152,8 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_muluh_i64        use_vis3_instructions
 #define TCG_TARGET_HAS_mulsh_i64        0
 
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
 #define TCG_AREG0 TCG_REG_I0
 
 #define TCG_TARGET_DEFAULT_MO (0)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 9d569c9e04..e4899c7d02 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -128,6 +128,8 @@
 #define TCG_TARGET_HAS_mulu2_i32        1
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
+
 /* Number of registers available. */
 #define TCG_TARGET_NB_REGS 16
 
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index bbb29bed2b..6210577b85 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -3201,7 +3201,7 @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
 
 void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
 {
-    MemOpIdx oi = make_memop_idx(memop, idx);
+    const MemOpIdx oi = make_memop_idx(memop, idx);
 
     tcg_debug_assert((memop & MO_SIZE) == MO_128);
     tcg_debug_assert((memop & MO_SIGN) == 0);
@@ -3209,9 +3209,35 @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     addr = plugin_prep_mem_callbacks(addr);
 
-    /* TODO: allow the tcg backend to see the whole operation. */
+    /* TODO: For now, force 32-bit hosts to use the helper. */
+    if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
+        TCGv_i64 lo, hi;
+        TCGArg addr_arg;
+        MemOpIdx adj_oi;
 
-    if (use_two_i64_for_i128(memop)) {
+        /* TODO: Make TCG_TARGET_HAS_MEMORY_BSWAP fine grained. */
+        if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+            lo = TCGV128_HIGH(val);
+            hi = TCGV128_LOW(val);
+            adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
+        } else {
+            lo = TCGV128_LOW(val);
+            hi = TCGV128_HIGH(val);
+            adj_oi = oi;
+        }
+
+#if TARGET_LONG_BITS == 32
+        addr_arg = tcgv_i32_arg(addr);
+#else
+        addr_arg = tcgv_i64_arg(addr);
+#endif
+        tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
+
+        if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+            tcg_gen_bswap64_i64(lo, lo);
+            tcg_gen_bswap64_i64(hi, hi);
+        }
+    } else if (use_two_i64_for_i128(memop)) {
         MemOp mop[2];
         TCGv addr_p8;
         TCGv_i64 x, y;
@@ -3254,7 +3280,7 @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
 
 void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
 {
-    MemOpIdx oi = make_memop_idx(memop, idx);
+    const MemOpIdx oi = make_memop_idx(memop, idx);
 
     tcg_debug_assert((memop & MO_SIZE) == MO_128);
     tcg_debug_assert((memop & MO_SIGN) == 0);
@@ -3262,9 +3288,38 @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
     addr = plugin_prep_mem_callbacks(addr);
 
-    /* TODO: allow the tcg backend to see the whole operation. */
+    /* TODO: For now, force 32-bit hosts to use the helper. */
 
-    if (use_two_i64_for_i128(memop)) {
+    if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
+        TCGv_i64 lo, hi;
+        TCGArg addr_arg;
+        MemOpIdx adj_oi;
+
+        /* TODO: Make TCG_TARGET_HAS_MEMORY_BSWAP fine grained. */
+        if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+            lo = tcg_temp_new_i64();
+            hi = tcg_temp_new_i64();
+            tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
+            tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
+            adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
+        } else {
+            lo = TCGV128_LOW(val);
+            hi = TCGV128_HIGH(val);
+            adj_oi = oi;
+        }
+
+#if TARGET_LONG_BITS == 32
+        addr_arg = tcgv_i32_arg(addr);
+#else
+        addr_arg = tcgv_i64_arg(addr);
+#endif
+        tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
+
+        if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+            tcg_temp_free_i64(lo);
+            tcg_temp_free_i64(hi);
+        }
+    } else if (use_two_i64_for_i128(memop)) {
         MemOp mop[2];
         TCGv addr_p8;
         TCGv_i64 x, y;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index d221f76366..9a000c55ed 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1497,6 +1497,10 @@ bool tcg_op_supported(TCGOpcode op)
     case INDEX_op_qemu_st8_i32:
         return TCG_TARGET_HAS_qemu_st8_i32;
 
+    case INDEX_op_qemu_ld_i128:
+    case INDEX_op_qemu_st_i128:
+        return TCG_TARGET_HAS_qemu_ldst_i128;
+
     case INDEX_op_mov_i32:
     case INDEX_op_setcond_i32:
     case INDEX_op_brcond_i32:
diff --git a/tcg/README b/tcg/README
index bc15cc3b32..b3f8578955 100644
--- a/tcg/README
+++ b/tcg/README
@@ -512,8 +512,8 @@ jump to the TCG epilogue to go back to the exec loop.
 This operation is optional. If the TCG backend does not implement the
 goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
 
-* qemu_ld_i32/i64 t0, t1, flags, memidx
-* qemu_st_i32/i64 t0, t1, flags, memidx
+* qemu_ld_i32/i64/i128 t0, t1, flags, memidx
+* qemu_st_i32/i64/i128 t0, t1, flags, memidx
 * qemu_st8_i32 t0, t1, flags, memidx
 
 Load data at the guest address t1 into t0, or store data in t0 at guest
@@ -522,7 +522,8 @@ register t0 only.  The address t1 is always sized according to the guest,
 and the width of the memory operation is controlled by flags.
 
 Both t0 and t1 may be split into little-endian ordered pairs of registers
-if dealing with 64-bit quantities on a 32-bit host.
+if dealing with 64-bit quantities on a 32-bit host, or 128-bit quantities
+on a 64-bit host.
 
 The memidx selects the qemu tlb index to use (e.g. user or kernel access).
 The flags are the MemOp bits, selecting the sign, width, and endianness
@@ -531,6 +532,9 @@ of the memory access.
 For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
 64-bit memory access specified in flags.
 
+For qemu_ld/st_i128, these are only supported for a 64-bit host, and are
+guaranteed to be used with the host memory ordering.
+
 For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
 the memory operation is known to be 8-bit.  This allows the backend to
 provide a different set of register constraints.
-- 
2.34.1



  parent reply	other threads:[~2022-11-18  9:53 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-18  9:47 [PATCH for-8.0 00/29] tcg: Improve atomicity support Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 01/29] include/qemu/cpuid: Introduce xgetbv_low Richard Henderson
2022-11-21 12:15   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 02/29] include/exec/memop: Add bits describing atomicity Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 03/29] accel/tcg: Add cpu_in_serial_context Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 04/29] accel/tcg: Introduce tlb_read_idx Richard Henderson
2022-11-21 12:25   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 05/29] accel/tcg: Reorg system mode load helpers Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 06/29] accel/tcg: Reorg system mode store helpers Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 07/29] accel/tcg: Honor atomicity of loads Richard Henderson
2022-11-22 14:35   ` Peter Maydell
2022-11-22 18:04     ` Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 08/29] accel/tcg: Honor atomicity of stores Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 09/29] tcg/tci: Use cpu_{ld,st}_mmu Richard Henderson
2022-11-21 12:40   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 10/29] tcg: Unify helper_{be,le}_{ld,st}* Richard Henderson
2022-11-21 12:48   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 11/29] accel/tcg: Implement helper_{ld, st}*_mmu for user-only Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 12/29] tcg: Add 128-bit guest memory primitives Richard Henderson
2022-11-22  3:30   ` Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 13/29] meson: Detect atomic128 support with optimization Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 14/29] tcg/i386: Add have_atomic16 Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 15/29] include/qemu/int128: Add vector type to Int128Alias Richard Henderson
2022-11-21 23:45   ` Philippe Mathieu-Daudé
2022-11-22 18:21   ` Philippe Mathieu-Daudé
2022-11-22 18:31     ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 16/29] accel/tcg: Use have_atomic16 in ldst_atomicity.c.inc Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 17/29] tcg/aarch64: Add have_lse, have_lse2 Richard Henderson
2022-11-21 23:10   ` Philippe Mathieu-Daudé
2022-11-21 23:14     ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 18/29] accel/tcg: Add aarch64 specific support in ldst_atomicity Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 19/29] tcg: Introduce TCG_OPF_TYPE_MASK Richard Henderson
2022-11-21 16:12   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` Richard Henderson [this message]
2022-11-21 22:59   ` [PATCH for-8.0 20/29] tcg: Add INDEX_op_qemu_{ld,st}_i128 Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 21/29] tcg/i386: Introduce tcg_out_mov2 Richard Henderson
2022-11-21 16:21   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 22/29] tcg/i386: Introduce tcg_out_testi Richard Henderson
2022-11-21 16:22   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 23/29] tcg/i386: Use full load/store helpers in user-only mode Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 24/29] tcg/i386: Replace is64 with type in qemu_ld/st routines Richard Henderson
2022-11-21 16:27   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 25/29] tcg/i386: Mark Win64 call-saved vector regs as reserved Richard Henderson
2022-11-21 16:28   ` Philippe Mathieu-Daudé
2022-11-18  9:47 ` [PATCH for-8.0 26/29] tcg/i386: Examine MemOp for atomicity and alignment Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 27/29] tcg/i386: Support 128-bit load/store with have_atomic16 Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 28/29] tcg/i386: Add vex_v argument to tcg_out_vex_modrm_pool Richard Henderson
2022-11-18  9:47 ` [PATCH for-8.0 29/29] tcg/i386: Honor 64-bit atomicity in 32-bit mode Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221118094754.242910-21-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.