All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org, philmd@linaro.org
Subject: Re: [PATCH v6 15/36] tcg: Add guest load/store primitives for TCGv_i128
Date: Wed, 01 Feb 2023 09:52:13 +0000	[thread overview]
Message-ID: <874js5u2pu.fsf@linaro.org> (raw)
In-Reply-To: <20230130214844.1158612-16-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> These are not yet considering atomicity of the 16-byte value;
> this is a direct replacement for the current target code which
> uses a pair of 8-byte operations.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  include/exec/cpu_ldst.h |  10 +++
>  include/tcg/tcg-op.h    |   2 +
>  accel/tcg/cputlb.c      | 112 +++++++++++++++++++++++++++++++++
>  accel/tcg/user-exec.c   |  66 ++++++++++++++++++++
>  tcg/tcg-op.c            | 134 ++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 324 insertions(+)
>
> diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
> index d0c7c0d5fe..09b55cc0ee 100644
> --- a/include/exec/cpu_ldst.h
> +++ b/include/exec/cpu_ldst.h
> @@ -220,6 +220,11 @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
>  uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
>                          MemOpIdx oi, uintptr_t ra);
>  
> +Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
> +                       MemOpIdx oi, uintptr_t ra);
> +Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
> +                       MemOpIdx oi, uintptr_t ra);
> +
>  void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
>                   MemOpIdx oi, uintptr_t ra);
>  void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
> @@ -235,6 +240,11 @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
>  void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
>                      MemOpIdx oi, uintptr_t ra);
>  
> +void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
> +                     MemOpIdx oi, uintptr_t ra);
> +void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
> +                     MemOpIdx oi, uintptr_t ra);
> +
>  uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
>                                   uint32_t cmpv, uint32_t newv,
>                                   MemOpIdx oi, uintptr_t retaddr);
> diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
> index c4276767d1..e5f5b63c37 100644
> --- a/include/tcg/tcg-op.h
> +++ b/include/tcg/tcg-op.h
> @@ -845,6 +845,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
>  void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
>  void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
>  void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
> +void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
> +void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
>  
>  static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
>  {
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 4e040a1cb9..e3604ad313 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -2187,6 +2187,64 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
>      return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
>  }
>  
> +Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
> +                       MemOpIdx oi, uintptr_t ra)
> +{
> +    MemOp mop = get_memop(oi);
> +    int mmu_idx = get_mmuidx(oi);
> +    MemOpIdx new_oi;
> +    unsigned a_bits;
> +    uint64_t h, l;
> +
> +    tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
> +    a_bits = get_alignment_bits(mop);
> +
> +    /* Handle CPU specific unaligned behaviour */
> +    if (addr & ((1 << a_bits) - 1)) {
> +        cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
> +                             mmu_idx, ra);
> +    }
> +
> +    /* Construct an unaligned 64-bit replacement MemOpIdx. */
> +    mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
> +    new_oi = make_memop_idx(mop, mmu_idx);
> +
> +    h = helper_be_ldq_mmu(env, addr, new_oi, ra);
> +    l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
> +
> +    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
> +    return int128_make128(l, h);
> +}
> +
> +Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
> +                       MemOpIdx oi, uintptr_t ra)
> +{
> +    MemOp mop = get_memop(oi);
> +    int mmu_idx = get_mmuidx(oi);
> +    MemOpIdx new_oi;
> +    unsigned a_bits;
> +    uint64_t h, l;
> +
> +    tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));

Why not use validate_memop for this like elsewhere in cputlb?

<snip>
>  
> +void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
> +                     MemOpIdx oi, uintptr_t ra)
> +{
> +    MemOp mop = get_memop(oi);
> +    int mmu_idx = get_mmuidx(oi);
> +    MemOpIdx new_oi;
> +    unsigned a_bits;
> +
> +    tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));

ditto for the others

> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index cb83d2375d..33ef325f6e 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -3109,6 +3109,140 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
>      }
>  }
>

I'm confused because the TCG ops in this patch are still using i64  and
the atomic use hasn't come in yet. Worth splitting the patch?

Anyway:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>


> +static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
> +{
> +    MemOp mop_1 = orig, mop_2;
> +
> +    tcg_debug_assert((orig & MO_SIZE) == MO_128);
> +    tcg_debug_assert((orig & MO_SIGN) == 0);
> +
> +    /* Use a memory ordering implemented by the host. */
> +    if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
> +        mop_1 &= ~MO_BSWAP;
> +    }
> +
> +    /* Reduce the size to 64-bit. */
> +    mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
> +
> +    /* Retain the alignment constraints of the original. */
> +    switch (orig & MO_AMASK) {
> +    case MO_UNALN:
> +    case MO_ALIGN_2:
> +    case MO_ALIGN_4:
> +        mop_2 = mop_1;
> +        break;
> +    case MO_ALIGN_8:
> +        /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
> +        mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
> +        mop_2 = mop_1;
> +        break;
> +    case MO_ALIGN:
> +        /* Second has 8-byte alignment; first has 16-byte alignment. */
> +        mop_2 = mop_1;
> +        mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
> +        break;
> +    case MO_ALIGN_16:
> +    case MO_ALIGN_32:
> +    case MO_ALIGN_64:
> +        /* Second has 8-byte alignment; first retains original. */
> +        mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +    ret[0] = mop_1;
> +    ret[1] = mop_2;
> +}
> +
> +void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
> +{
> +    MemOp mop[2];
> +    TCGv addr_p8;
> +    TCGv_i64 x, y;
> +
> +    canonicalize_memop_i128_as_i64(mop, memop);
> +
> +    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
> +    addr = plugin_prep_mem_callbacks(addr);
> +
> +    /* TODO: respect atomicity of the operation. */
> +    /* TODO: allow the tcg backend to see the whole operation. */
> +
> +    /*
> +     * Since there are no global TCGv_i128, there is no visible state
> +     * changed if the second load faults.  Load directly into the two
> +     * subwords.
> +     */
> +    if ((memop & MO_BSWAP) == MO_LE) {
> +        x = TCGV128_LOW(val);
> +        y = TCGV128_HIGH(val);
> +    } else {
> +        x = TCGV128_HIGH(val);
> +        y = TCGV128_LOW(val);
> +    }
> +
> +    gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
> +
> +    if ((mop[0] ^ memop) & MO_BSWAP) {
> +        tcg_gen_bswap64_i64(x, x);
> +    }
> +
> +    addr_p8 = tcg_temp_new();
> +    tcg_gen_addi_tl(addr_p8, addr, 8);
> +    gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
> +    tcg_temp_free(addr_p8);
> +
> +    if ((mop[0] ^ memop) & MO_BSWAP) {
> +        tcg_gen_bswap64_i64(y, y);
> +    }
> +
> +    plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
> +                             QEMU_PLUGIN_MEM_R);
> +}
> +
> +void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
> +{
> +    MemOp mop[2];
> +    TCGv addr_p8;
> +    TCGv_i64 x, y;
> +
> +    canonicalize_memop_i128_as_i64(mop, memop);
> +
> +    tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
> +    addr = plugin_prep_mem_callbacks(addr);
> +
> +    /* TODO: respect atomicity of the operation. */
> +    /* TODO: allow the tcg backend to see the whole operation. */
> +
> +    if ((memop & MO_BSWAP) == MO_LE) {
> +        x = TCGV128_LOW(val);
> +        y = TCGV128_HIGH(val);
> +    } else {
> +        x = TCGV128_HIGH(val);
> +        y = TCGV128_LOW(val);
> +    }
> +
> +    addr_p8 = tcg_temp_new();
> +    if ((mop[0] ^ memop) & MO_BSWAP) {
> +        TCGv_i64 t = tcg_temp_new_i64();
> +
> +        tcg_gen_bswap64_i64(t, x);
> +        gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
> +        tcg_gen_bswap64_i64(t, y);
> +        tcg_gen_addi_tl(addr_p8, addr, 8);
> +        gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
> +        tcg_temp_free_i64(t);
> +    } else {
> +        gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
> +        tcg_gen_addi_tl(addr_p8, addr, 8);
> +        gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
> +    }
> +    tcg_temp_free(addr_p8);
> +
> +    plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
> +                             QEMU_PLUGIN_MEM_W);
> +}
> +
>  static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
>  {
>      switch (opc & MO_SSIZE) {


-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro


  reply	other threads:[~2023-02-01  9:58 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-30 21:48 [PATCH v6 00/36] tcg: Support for Int128 with helpers Richard Henderson
2023-01-30 21:48 ` [PATCH v6 01/36] tcg: Define TCG_TYPE_I128 and related helper macros Richard Henderson
2023-01-30 21:48 ` [PATCH v6 02/36] tcg: Handle dh_typecode_i128 with TCG_CALL_{RET, ARG}_NORMAL Richard Henderson
2023-01-30 21:48 ` [PATCH v6 03/36] tcg: Allocate objects contiguously in temp_allocate_frame Richard Henderson
2023-01-30 21:48 ` [PATCH v6 04/36] tcg: Introduce tcg_out_addi_ptr Richard Henderson
2023-01-30 21:48 ` [PATCH v6 05/36] tcg: Add TCG_CALL_{RET,ARG}_BY_REF Richard Henderson
2023-02-01  9:38   ` Alex Bennée
2023-01-30 21:48 ` [PATCH v6 06/36] tcg: Introduce tcg_target_call_oarg_reg Richard Henderson
2023-01-30 21:48 ` [PATCH v6 07/36] tcg: Add TCG_CALL_RET_BY_VEC Richard Henderson
2023-01-30 21:48 ` [PATCH v6 08/36] include/qemu/int128: Use Int128 structure for TCI Richard Henderson
2023-01-30 21:48 ` [PATCH v6 09/36] tcg/i386: Add TCG_TARGET_CALL_{RET,ARG}_I128 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 10/36] tcg/tci: Fix big-endian return register ordering Richard Henderson
2023-01-30 21:48 ` [PATCH v6 11/36] tcg/tci: Add TCG_TARGET_CALL_{RET,ARG}_I128 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 12/36] tcg: " Richard Henderson
2023-01-30 21:48 ` [PATCH v6 13/36] tcg: Add temp allocation for TCGv_i128 Richard Henderson
2023-02-01  9:43   ` Alex Bennée
2023-01-30 21:48 ` [PATCH v6 14/36] tcg: Add basic data movement " Richard Henderson
2023-01-30 21:48 ` [PATCH v6 15/36] tcg: Add guest load/store primitives " Richard Henderson
2023-02-01  9:52   ` Alex Bennée [this message]
2023-02-01 19:03     ` Richard Henderson
2023-02-03 14:20       ` Philippe Mathieu-Daudé
2023-01-30 21:48 ` [PATCH v6 16/36] tcg: Add tcg_gen_{non}atomic_cmpxchg_i128 Richard Henderson
2023-02-01 10:15   ` Alex Bennée
2023-01-30 21:48 ` [PATCH v6 17/36] tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32,64} Richard Henderson
2023-02-01 10:16   ` [PATCH v6 17/36] tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32, 64} Alex Bennée
2023-01-30 21:48 ` [PATCH v6 18/36] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for STXP Richard Henderson
2023-01-30 21:48 ` [PATCH v6 19/36] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for CASP Richard Henderson
2023-02-01 15:04   ` Philippe Mathieu-Daudé
2023-02-01 19:05     ` Richard Henderson
2023-01-30 21:48 ` [PATCH v6 20/36] target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX Richard Henderson
2023-01-30 21:48 ` [PATCH v6 21/36] tests/tcg/s390x: Add div.c Richard Henderson
2023-01-30 21:48 ` [PATCH v6 22/36] tests/tcg/s390x: Add clst.c Richard Henderson
2023-01-30 21:48 ` [PATCH v6 23/36] tests/tcg/s390x: Add long-double.c Richard Henderson
2023-01-30 21:48 ` [PATCH v6 24/36] target/s390x: Use a single return for helper_divs32/u32 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 25/36] target/s390x: Use a single return for helper_divs64/u64 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 26/36] target/s390x: Use Int128 for return from CLST Richard Henderson
2023-01-30 21:48 ` [PATCH v6 27/36] target/s390x: Use Int128 for return from CKSM Richard Henderson
2023-01-30 21:48 ` [PATCH v6 28/36] target/s390x: Use Int128 for return from TRE Richard Henderson
2023-01-30 21:48 ` [PATCH v6 29/36] target/s390x: Copy wout_x1 to wout_x1_P Richard Henderson
2023-02-01 14:55   ` Philippe Mathieu-Daudé
2023-01-30 21:48 ` [PATCH v6 30/36] target/s390x: Use Int128 for returning float128 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 31/36] target/s390x: Use Int128 for passing float128 Richard Henderson
2023-02-03 13:25   ` Philippe Mathieu-Daudé
2023-01-30 21:48 ` [PATCH v6 32/36] target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG Richard Henderson
2023-01-31  8:59   ` David Hildenbrand
2023-02-01 13:27   ` Ilya Leoshkevich
2023-02-01 13:32     ` [PATCH] tests/tcg/s390x: Add cdsg.c Ilya Leoshkevich
2023-02-01 19:07       ` Richard Henderson
2023-01-30 21:48 ` [PATCH v6 33/36] target/s390x: Implement CC_OP_NZ in gen_op_calc_cc Richard Henderson
2023-01-30 21:48 ` [PATCH v6 34/36] target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b Richard Henderson
2023-01-30 21:48 ` [PATCH v6 35/36] target/i386: Inline cmpxchg8b Richard Henderson
2023-01-30 23:21   ` Philippe Mathieu-Daudé
2023-02-01 10:19   ` Alex Bennée
2023-01-30 21:48 ` [PATCH v6 36/36] target/i386: Inline cmpxchg16b Richard Henderson
2023-01-30 23:24   ` Philippe Mathieu-Daudé

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=874js5u2pu.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.