From: LIU Zhiwei <zhiwei_liu@c-sky.com> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Cc: palmer@dabbelt.com, richard.henderson@linaro.org, bin.meng@windriver.com, Alistair.Francis@wdc.com, LIU Zhiwei <zhiwei_liu@c-sky.com> Subject: [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions Date: Thu, 10 Jun 2021 15:58:34 +0800 [thread overview] Message-ID: <20210610075908.3305506-4-zhiwei_liu@c-sky.com> (raw) In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com> Include 5 groups: Wrap-around (dropping overflow), Signed Halving, Unsigned Halving, Signed Saturation, and Unsigned Saturation. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> --- include/tcg/tcg-op-gvec.h | 10 + target/riscv/helper.h | 30 ++ target/riscv/insn32.decode | 32 +++ target/riscv/insn_trans/trans_rvp.c.inc | 117 ++++++++ target/riscv/meson.build | 1 + target/riscv/packed_helper.c | 354 ++++++++++++++++++++++++ target/riscv/translate.c | 1 + tcg/tcg-op-gvec.c | 28 ++ 8 files changed, 573 insertions(+) create mode 100644 target/riscv/insn_trans/trans_rvp.c.inc create mode 100644 target/riscv/packed_helper.c diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h index c69a7de984..2dae9e78d0 100644 --- a/include/tcg/tcg-op-gvec.h +++ b/include/tcg/tcg-op-gvec.h @@ -386,10 +386,12 @@ void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a); void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); @@ -401,4 +403,12 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); +#if TARGET_LONG_BITS == 64 +#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64 +#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64 +#else +#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32 +#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32 +#endif + #endif diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 415e37bc37..b6a71ade33 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1149,3 +1149,33 @@ DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32) + +/* P extension function */ +DEF_HELPER_3(radd16, tl, env, tl, tl) +DEF_HELPER_3(uradd16, tl, env, tl, tl) +DEF_HELPER_3(kadd16, tl, env, tl, tl) +DEF_HELPER_3(ukadd16, tl, env, tl, tl) +DEF_HELPER_3(rsub16, tl, env, tl, tl) +DEF_HELPER_3(ursub16, tl, env, tl, tl) +DEF_HELPER_3(ksub16, tl, env, tl, tl) +DEF_HELPER_3(uksub16, tl, env, tl, tl) +DEF_HELPER_3(cras16, tl, env, tl, tl) +DEF_HELPER_3(rcras16, tl, env, tl, tl) +DEF_HELPER_3(urcras16, tl, env, tl, tl) +DEF_HELPER_3(kcras16, tl, env, tl, tl) +DEF_HELPER_3(ukcras16, tl, env, tl, tl) +DEF_HELPER_3(crsa16, tl, env, tl, tl) +DEF_HELPER_3(rcrsa16, tl, env, tl, tl) +DEF_HELPER_3(urcrsa16, tl, env, tl, tl) +DEF_HELPER_3(kcrsa16, tl, env, tl, tl) +DEF_HELPER_3(ukcrsa16, tl, env, tl, tl) +DEF_HELPER_3(stas16, tl, env, tl, tl) +DEF_HELPER_3(rstas16, tl, env, tl, tl) +DEF_HELPER_3(urstas16, tl, env, tl, tl) +DEF_HELPER_3(kstas16, tl, env, tl, tl) +DEF_HELPER_3(ukstas16, tl, env, tl, tl) +DEF_HELPER_3(stsa16, tl, env, tl, tl) +DEF_HELPER_3(rstsa16, tl, env, tl, tl) +DEF_HELPER_3(urstsa16, tl, env, tl, tl) +DEF_HELPER_3(kstsa16, tl, env, tl, tl) +DEF_HELPER_3(ukstsa16, tl, env, tl, tl) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index f09f8d5faf..57f72fabf6 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -732,3 +732,35 @@ greviw 0110100 .......... 101 ..... 0011011 @sh5 gorciw 0010100 .......... 101 ..... 0011011 @sh5 slli_uw 00001. ........... 001 ..... 0011011 @sh + +# *** RV32P Extension *** +add16 0100000 ..... ..... 000 ..... 1110111 @r +radd16 0000000 ..... ..... 000 ..... 1110111 @r +uradd16 0010000 ..... ..... 000 ..... 1110111 @r +kadd16 0001000 ..... ..... 000 ..... 1110111 @r +ukadd16 0011000 ..... ..... 000 ..... 1110111 @r +sub16 0100001 ..... ..... 000 ..... 1110111 @r +rsub16 0000001 ..... ..... 000 ..... 1110111 @r +ursub16 0010001 ..... ..... 000 ..... 1110111 @r +ksub16 0001001 ..... ..... 000 ..... 1110111 @r +uksub16 0011001 ..... ..... 000 ..... 1110111 @r +cras16 0100010 ..... ..... 000 ..... 1110111 @r +rcras16 0000010 ..... ..... 000 ..... 1110111 @r +urcras16 0010010 ..... ..... 000 ..... 1110111 @r +kcras16 0001010 ..... ..... 000 ..... 1110111 @r +ukcras16 0011010 ..... ..... 000 ..... 1110111 @r +crsa16 0100011 ..... ..... 000 ..... 1110111 @r +rcrsa16 0000011 ..... ..... 000 ..... 1110111 @r +urcrsa16 0010011 ..... ..... 000 ..... 1110111 @r +kcrsa16 0001011 ..... ..... 000 ..... 1110111 @r +ukcrsa16 0011011 ..... ..... 000 ..... 1110111 @r +stas16 1111010 ..... ..... 010 ..... 1110111 @r +rstas16 1011010 ..... ..... 010 ..... 1110111 @r +urstas16 1101010 ..... ..... 010 ..... 1110111 @r +kstas16 1100010 ..... ..... 010 ..... 1110111 @r +ukstas16 1110010 ..... ..... 010 ..... 1110111 @r +stsa16 1111011 ..... ..... 010 ..... 1110111 @r +rstsa16 1011011 ..... ..... 010 ..... 1110111 @r +urstsa16 1101011 ..... ..... 010 ..... 1110111 @r +kstsa16 1100011 ..... ..... 010 ..... 1110111 @r +ukstsa16 1110011 ..... ..... 010 ..... 1110111 @r diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc new file mode 100644 index 0000000000..43f395657a --- /dev/null +++ b/target/riscv/insn_trans/trans_rvp.c.inc @@ -0,0 +1,117 @@ +/* + * RISC-V translation routines for the RVP Standard Extension. + * + * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "tcg/tcg-op-gvec.h" +#include "tcg/tcg-gvec-desc.h" +#include "tcg/tcg.h" + +/* + *** SIMD Data Processing Instructions + */ + +/* 16-bit Addition & Subtraction Instructions */ + +/* + * For some instructions, such as add16, an oberservation can be utilized: + * 1) If any reg is zero, it can be reduced to an inline op on the whole reg. + * 2) Otherwise, it can be acclebrated by an vec op. + */ +static inline bool +r_inline(DisasContext *ctx, arg_r *a, + void (* vecop)(TCGv, TCGv, TCGv), + void (* op)(TCGv, TCGv, TCGv)) +{ + if (!has_ext(ctx, RVP)) { + return false; + } + if (a->rd && a->rs1 && a->rs2) { + vecop(cpu_gpr[a->rd], cpu_gpr[a->rs1], cpu_gpr[a->rs2]); + } else { + gen_arith(ctx, a, op); + } + return true; +} + +/* Complete inline implementation */ +#define GEN_RVP_R_INLINE(NAME, VECOP, OP) \ +static bool trans_##NAME(DisasContext *s, arg_r *a) \ +{ \ + return r_inline(s, a, VECOP, OP); \ +} + +GEN_RVP_R_INLINE(add16, tcg_gen_vec_add16_tl, tcg_gen_add_tl); +GEN_RVP_R_INLINE(sub16, tcg_gen_vec_sub16_tl, tcg_gen_sub_tl); + +/* Out of line helpers for R format packed instructions */ +static inline bool +r_ool(DisasContext *ctx, arg_r *a, void (* fn)(TCGv, TCGv_ptr, TCGv, TCGv)) +{ + TCGv src1, src2, dst; + if (!has_ext(ctx, RVP)) { + return false; + } + + src1 = tcg_temp_new(); + src2 = tcg_temp_new(); + dst = tcg_temp_new(); + + gen_get_gpr(src1, a->rs1); + gen_get_gpr(src2, a->rs2); + fn(dst, cpu_env, src1, src2); + gen_set_gpr(a->rd, dst); + + tcg_temp_free(src1); + tcg_temp_free(src2); + tcg_temp_free(dst); + return true; +} + +#define GEN_RVP_R_OOL(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_r *a) \ +{ \ + return r_ool(s, a, gen_helper_##NAME); \ +} + +GEN_RVP_R_OOL(radd16); +GEN_RVP_R_OOL(uradd16); +GEN_RVP_R_OOL(kadd16); +GEN_RVP_R_OOL(ukadd16); +GEN_RVP_R_OOL(rsub16); +GEN_RVP_R_OOL(ursub16); +GEN_RVP_R_OOL(ksub16); +GEN_RVP_R_OOL(uksub16); +GEN_RVP_R_OOL(cras16); +GEN_RVP_R_OOL(rcras16); +GEN_RVP_R_OOL(urcras16); +GEN_RVP_R_OOL(kcras16); +GEN_RVP_R_OOL(ukcras16); +GEN_RVP_R_OOL(crsa16); +GEN_RVP_R_OOL(rcrsa16); +GEN_RVP_R_OOL(urcrsa16); +GEN_RVP_R_OOL(kcrsa16); +GEN_RVP_R_OOL(ukcrsa16); +GEN_RVP_R_OOL(stas16); +GEN_RVP_R_OOL(rstas16); +GEN_RVP_R_OOL(urstas16); +GEN_RVP_R_OOL(kstas16); +GEN_RVP_R_OOL(ukstas16); +GEN_RVP_R_OOL(stsa16); +GEN_RVP_R_OOL(rstsa16); +GEN_RVP_R_OOL(urstsa16); +GEN_RVP_R_OOL(kstsa16); +GEN_RVP_R_OOL(ukstsa16); diff --git a/target/riscv/meson.build b/target/riscv/meson.build index d5e0bc93ea..cc169e1b2c 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -17,6 +17,7 @@ riscv_ss.add(files( 'op_helper.c', 'vector_helper.c', 'bitmanip_helper.c', + 'packed_helper.c', 'translate.c', )) diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c new file mode 100644 index 0000000000..b84abaaf25 --- /dev/null +++ b/target/riscv/packed_helper.c @@ -0,0 +1,354 @@ +/* + * RISC-V P Extension Helpers for QEMU. + * + * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" +#include "fpu/softfloat.h" +#include <math.h> +#include "internals.h" + +/* + *** SIMD Data Processing Instructions + */ + +/* 16-bit Addition & Subtraction Instructions */ +typedef void PackedFn3i(CPURISCVState *, void *, void *, void *, uint8_t); + +/* Define a common function to loop elements in packed register */ +static inline target_ulong +rvpr(CPURISCVState *env, target_ulong a, target_ulong b, + uint8_t step, uint8_t size, PackedFn3i *fn) +{ + int i, passes = sizeof(target_ulong) / size; + target_ulong result = 0; + + for (i = 0; i < passes; i += step) { + fn(env, &result, &a, &b, i); + } + return result; +} + +#define RVPR(NAME, STEP, SIZE) \ +target_ulong HELPER(NAME)(CPURISCVState *env, target_ulong a, \ + target_ulong b) \ +{ \ + return rvpr(env, a, b, STEP, SIZE, (PackedFn3i *)do_##NAME);\ +} + +static inline int32_t hadd32(int32_t a, int32_t b) +{ + return ((int64_t)a + b) >> 1; +} + +static inline void do_radd16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[i] = hadd32(a[i], b[i]); +} + +RVPR(radd16, 1, 2); + +static inline uint32_t haddu32(uint32_t a, uint32_t b) +{ + return ((uint64_t)a + b) >> 1; +} + +static inline void do_uradd16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[i] = haddu32(a[i], b[i]); +} + +RVPR(uradd16, 1, 2); + +static inline void do_kadd16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[i] = sadd16(env, 0, a[i], b[i]); +} + +RVPR(kadd16, 1, 2); + +static inline void do_ukadd16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[i] = saddu16(env, 0, a[i], b[i]); +} + +RVPR(ukadd16, 1, 2); + +static inline int32_t hsub32(int32_t a, int32_t b) +{ + return ((int64_t)a - b) >> 1; +} + +static inline int64_t hsub64(int64_t a, int64_t b) +{ + int64_t res = a - b; + int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; + + /* With signed overflow, bit 64 is inverse of bit 63. */ + return (res >> 1) ^ over; +} + +static inline void do_rsub16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[i] = hsub32(a[i], b[i]); +} + +RVPR(rsub16, 1, 2); + +static inline uint64_t hsubu64(uint64_t a, uint64_t b) +{ + return (a - b) >> 1; +} + +static inline void do_ursub16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[i] = hsubu64(a[i], b[i]); +} + +RVPR(ursub16, 1, 2); + +static inline void do_ksub16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[i] = ssub16(env, 0, a[i], b[i]); +} + +RVPR(ksub16, 1, 2); + +static inline void do_uksub16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[i] = ssubu16(env, 0, a[i], b[i]); +} + +RVPR(uksub16, 1, 2); + +static inline void do_cras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = a[H2(i)] - b[H2(i + 1)]; + d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i)]; +} + +RVPR(cras16, 2, 2); + +static inline void do_rcras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hsub32(a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i)]); +} + +RVPR(rcras16, 2, 2); + +static inline void do_urcras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hsubu64(a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i)]); +} + +RVPR(urcras16, 2, 2); + +static inline void do_kcras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i)]); +} + +RVPR(kcras16, 2, 2); + +static inline void do_ukcras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i)]); +} + +RVPR(ukcras16, 2, 2); + +static inline void do_crsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = a[H2(i)] + b[H2(i + 1)]; + d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i)]; +} + +RVPR(crsa16, 2, 2); + +static inline void do_rcrsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hadd32(a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i)]); +} + +RVPR(rcrsa16, 2, 2); + +static inline void do_urcrsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = haddu32(a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i)]); +} + +RVPR(urcrsa16, 2, 2); + +static inline void do_kcrsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i)]); +} + +RVPR(kcrsa16, 2, 2); + +static inline void do_ukcrsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i)]); +} + +RVPR(ukcrsa16, 2, 2); + +static inline void do_stas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = a[H2(i)] - b[H2(i)]; + d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i + 1)]; +} + +RVPR(stas16, 2, 2); + +static inline void do_rstas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hsub32(a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(rstas16, 2, 2); + +static inline void do_urstas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hsubu64(a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(urstas16, 2, 2); + +static inline void do_kstas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(kstas16, 2, 2); + +static inline void do_ukstas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(ukstas16, 2, 2); + +static inline void do_stsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = a[H2(i)] + b[H2(i)]; + d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i + 1)]; +} + +RVPR(stsa16, 2, 2); + +static inline void do_rstsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hadd32(a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(rstsa16, 2, 2); + +static inline void do_urstsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = haddu32(a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(urstsa16, 2, 2); + +static inline void do_kstsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(kstsa16, 2, 2); + +static inline void do_ukstsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(ukstsa16, 2, 2); diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 0e6ede4d71..51b144e9be 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -908,6 +908,7 @@ static bool gen_unary(DisasContext *ctx, arg_r2 *a, #include "insn_trans/trans_rvh.c.inc" #include "insn_trans/trans_rvv.c.inc" #include "insn_trans/trans_rvb.c.inc" +#include "insn_trans/trans_rvp.c.inc" #include "insn_trans/trans_privileged.c.inc" /* Include the auto-generated decoder for 16 bit insn */ diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 498a959839..a8898ba7bf 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -1742,6 +1742,20 @@ void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) gen_addv_mask(d, a, b, m); } +void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t1, a, ~0xffff); + tcg_gen_add_i32(t2, a, b); + tcg_gen_add_i32(t1, t1, b); + tcg_gen_deposit_i32(d, t1, t2, 0, 16); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1892,6 +1906,20 @@ void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) gen_subv_mask(d, a, b, m); } +void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t1, b, ~0xffff); + tcg_gen_sub_i32(t2, a, b); + tcg_gen_sub_i32(t1, a, t1); + tcg_gen_deposit_i32(d, t1, t2, 0, 16); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 t1 = tcg_temp_new_i64(); -- 2.25.1
WARNING: multiple messages have this Message-ID (diff)
From: LIU Zhiwei <zhiwei_liu@c-sky.com> To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org Cc: Alistair.Francis@wdc.com, palmer@dabbelt.com, bin.meng@windriver.com, richard.henderson@linaro.org, LIU Zhiwei <zhiwei_liu@c-sky.com> Subject: [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions Date: Thu, 10 Jun 2021 15:58:34 +0800 [thread overview] Message-ID: <20210610075908.3305506-4-zhiwei_liu@c-sky.com> (raw) In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com> Include 5 groups: Wrap-around (dropping overflow), Signed Halving, Unsigned Halving, Signed Saturation, and Unsigned Saturation. Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com> --- include/tcg/tcg-op-gvec.h | 10 + target/riscv/helper.h | 30 ++ target/riscv/insn32.decode | 32 +++ target/riscv/insn_trans/trans_rvp.c.inc | 117 ++++++++ target/riscv/meson.build | 1 + target/riscv/packed_helper.c | 354 ++++++++++++++++++++++++ target/riscv/translate.c | 1 + tcg/tcg-op-gvec.c | 28 ++ 8 files changed, 573 insertions(+) create mode 100644 target/riscv/insn_trans/trans_rvp.c.inc create mode 100644 target/riscv/packed_helper.c diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h index c69a7de984..2dae9e78d0 100644 --- a/include/tcg/tcg-op-gvec.h +++ b/include/tcg/tcg-op-gvec.h @@ -386,10 +386,12 @@ void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a); void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); @@ -401,4 +403,12 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); +#if TARGET_LONG_BITS == 64 +#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64 +#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64 +#else +#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32 +#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32 +#endif + #endif diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 415e37bc37..b6a71ade33 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1149,3 +1149,33 @@ DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32) + +/* P extension function */ +DEF_HELPER_3(radd16, tl, env, tl, tl) +DEF_HELPER_3(uradd16, tl, env, tl, tl) +DEF_HELPER_3(kadd16, tl, env, tl, tl) +DEF_HELPER_3(ukadd16, tl, env, tl, tl) +DEF_HELPER_3(rsub16, tl, env, tl, tl) +DEF_HELPER_3(ursub16, tl, env, tl, tl) +DEF_HELPER_3(ksub16, tl, env, tl, tl) +DEF_HELPER_3(uksub16, tl, env, tl, tl) +DEF_HELPER_3(cras16, tl, env, tl, tl) +DEF_HELPER_3(rcras16, tl, env, tl, tl) +DEF_HELPER_3(urcras16, tl, env, tl, tl) +DEF_HELPER_3(kcras16, tl, env, tl, tl) +DEF_HELPER_3(ukcras16, tl, env, tl, tl) +DEF_HELPER_3(crsa16, tl, env, tl, tl) +DEF_HELPER_3(rcrsa16, tl, env, tl, tl) +DEF_HELPER_3(urcrsa16, tl, env, tl, tl) +DEF_HELPER_3(kcrsa16, tl, env, tl, tl) +DEF_HELPER_3(ukcrsa16, tl, env, tl, tl) +DEF_HELPER_3(stas16, tl, env, tl, tl) +DEF_HELPER_3(rstas16, tl, env, tl, tl) +DEF_HELPER_3(urstas16, tl, env, tl, tl) +DEF_HELPER_3(kstas16, tl, env, tl, tl) +DEF_HELPER_3(ukstas16, tl, env, tl, tl) +DEF_HELPER_3(stsa16, tl, env, tl, tl) +DEF_HELPER_3(rstsa16, tl, env, tl, tl) +DEF_HELPER_3(urstsa16, tl, env, tl, tl) +DEF_HELPER_3(kstsa16, tl, env, tl, tl) +DEF_HELPER_3(ukstsa16, tl, env, tl, tl) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index f09f8d5faf..57f72fabf6 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -732,3 +732,35 @@ greviw 0110100 .......... 101 ..... 0011011 @sh5 gorciw 0010100 .......... 101 ..... 0011011 @sh5 slli_uw 00001. ........... 001 ..... 0011011 @sh + +# *** RV32P Extension *** +add16 0100000 ..... ..... 000 ..... 1110111 @r +radd16 0000000 ..... ..... 000 ..... 1110111 @r +uradd16 0010000 ..... ..... 000 ..... 1110111 @r +kadd16 0001000 ..... ..... 000 ..... 1110111 @r +ukadd16 0011000 ..... ..... 000 ..... 1110111 @r +sub16 0100001 ..... ..... 000 ..... 1110111 @r +rsub16 0000001 ..... ..... 000 ..... 1110111 @r +ursub16 0010001 ..... ..... 000 ..... 1110111 @r +ksub16 0001001 ..... ..... 000 ..... 1110111 @r +uksub16 0011001 ..... ..... 000 ..... 1110111 @r +cras16 0100010 ..... ..... 000 ..... 1110111 @r +rcras16 0000010 ..... ..... 000 ..... 1110111 @r +urcras16 0010010 ..... ..... 000 ..... 1110111 @r +kcras16 0001010 ..... ..... 000 ..... 1110111 @r +ukcras16 0011010 ..... ..... 000 ..... 1110111 @r +crsa16 0100011 ..... ..... 000 ..... 1110111 @r +rcrsa16 0000011 ..... ..... 000 ..... 1110111 @r +urcrsa16 0010011 ..... ..... 000 ..... 1110111 @r +kcrsa16 0001011 ..... ..... 000 ..... 1110111 @r +ukcrsa16 0011011 ..... ..... 000 ..... 1110111 @r +stas16 1111010 ..... ..... 010 ..... 1110111 @r +rstas16 1011010 ..... ..... 010 ..... 1110111 @r +urstas16 1101010 ..... ..... 010 ..... 1110111 @r +kstas16 1100010 ..... ..... 010 ..... 1110111 @r +ukstas16 1110010 ..... ..... 010 ..... 1110111 @r +stsa16 1111011 ..... ..... 010 ..... 1110111 @r +rstsa16 1011011 ..... ..... 010 ..... 1110111 @r +urstsa16 1101011 ..... ..... 010 ..... 1110111 @r +kstsa16 1100011 ..... ..... 010 ..... 1110111 @r +ukstsa16 1110011 ..... ..... 010 ..... 1110111 @r diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc new file mode 100644 index 0000000000..43f395657a --- /dev/null +++ b/target/riscv/insn_trans/trans_rvp.c.inc @@ -0,0 +1,117 @@ +/* + * RISC-V translation routines for the RVP Standard Extension. + * + * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "tcg/tcg-op-gvec.h" +#include "tcg/tcg-gvec-desc.h" +#include "tcg/tcg.h" + +/* + *** SIMD Data Processing Instructions + */ + +/* 16-bit Addition & Subtraction Instructions */ + +/* + * For some instructions, such as add16, an oberservation can be utilized: + * 1) If any reg is zero, it can be reduced to an inline op on the whole reg. + * 2) Otherwise, it can be acclebrated by an vec op. + */ +static inline bool +r_inline(DisasContext *ctx, arg_r *a, + void (* vecop)(TCGv, TCGv, TCGv), + void (* op)(TCGv, TCGv, TCGv)) +{ + if (!has_ext(ctx, RVP)) { + return false; + } + if (a->rd && a->rs1 && a->rs2) { + vecop(cpu_gpr[a->rd], cpu_gpr[a->rs1], cpu_gpr[a->rs2]); + } else { + gen_arith(ctx, a, op); + } + return true; +} + +/* Complete inline implementation */ +#define GEN_RVP_R_INLINE(NAME, VECOP, OP) \ +static bool trans_##NAME(DisasContext *s, arg_r *a) \ +{ \ + return r_inline(s, a, VECOP, OP); \ +} + +GEN_RVP_R_INLINE(add16, tcg_gen_vec_add16_tl, tcg_gen_add_tl); +GEN_RVP_R_INLINE(sub16, tcg_gen_vec_sub16_tl, tcg_gen_sub_tl); + +/* Out of line helpers for R format packed instructions */ +static inline bool +r_ool(DisasContext *ctx, arg_r *a, void (* fn)(TCGv, TCGv_ptr, TCGv, TCGv)) +{ + TCGv src1, src2, dst; + if (!has_ext(ctx, RVP)) { + return false; + } + + src1 = tcg_temp_new(); + src2 = tcg_temp_new(); + dst = tcg_temp_new(); + + gen_get_gpr(src1, a->rs1); + gen_get_gpr(src2, a->rs2); + fn(dst, cpu_env, src1, src2); + gen_set_gpr(a->rd, dst); + + tcg_temp_free(src1); + tcg_temp_free(src2); + tcg_temp_free(dst); + return true; +} + +#define GEN_RVP_R_OOL(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_r *a) \ +{ \ + return r_ool(s, a, gen_helper_##NAME); \ +} + +GEN_RVP_R_OOL(radd16); +GEN_RVP_R_OOL(uradd16); +GEN_RVP_R_OOL(kadd16); +GEN_RVP_R_OOL(ukadd16); +GEN_RVP_R_OOL(rsub16); +GEN_RVP_R_OOL(ursub16); +GEN_RVP_R_OOL(ksub16); +GEN_RVP_R_OOL(uksub16); +GEN_RVP_R_OOL(cras16); +GEN_RVP_R_OOL(rcras16); +GEN_RVP_R_OOL(urcras16); +GEN_RVP_R_OOL(kcras16); +GEN_RVP_R_OOL(ukcras16); +GEN_RVP_R_OOL(crsa16); +GEN_RVP_R_OOL(rcrsa16); +GEN_RVP_R_OOL(urcrsa16); +GEN_RVP_R_OOL(kcrsa16); +GEN_RVP_R_OOL(ukcrsa16); +GEN_RVP_R_OOL(stas16); +GEN_RVP_R_OOL(rstas16); +GEN_RVP_R_OOL(urstas16); +GEN_RVP_R_OOL(kstas16); +GEN_RVP_R_OOL(ukstas16); +GEN_RVP_R_OOL(stsa16); +GEN_RVP_R_OOL(rstsa16); +GEN_RVP_R_OOL(urstsa16); +GEN_RVP_R_OOL(kstsa16); +GEN_RVP_R_OOL(ukstsa16); diff --git a/target/riscv/meson.build b/target/riscv/meson.build index d5e0bc93ea..cc169e1b2c 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -17,6 +17,7 @@ riscv_ss.add(files( 'op_helper.c', 'vector_helper.c', 'bitmanip_helper.c', + 'packed_helper.c', 'translate.c', )) diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c new file mode 100644 index 0000000000..b84abaaf25 --- /dev/null +++ b/target/riscv/packed_helper.c @@ -0,0 +1,354 @@ +/* + * RISC-V P Extension Helpers for QEMU. + * + * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" +#include "fpu/softfloat.h" +#include <math.h> +#include "internals.h" + +/* + *** SIMD Data Processing Instructions + */ + +/* 16-bit Addition & Subtraction Instructions */ +typedef void PackedFn3i(CPURISCVState *, void *, void *, void *, uint8_t); + +/* Define a common function to loop elements in packed register */ +static inline target_ulong +rvpr(CPURISCVState *env, target_ulong a, target_ulong b, + uint8_t step, uint8_t size, PackedFn3i *fn) +{ + int i, passes = sizeof(target_ulong) / size; + target_ulong result = 0; + + for (i = 0; i < passes; i += step) { + fn(env, &result, &a, &b, i); + } + return result; +} + +#define RVPR(NAME, STEP, SIZE) \ +target_ulong HELPER(NAME)(CPURISCVState *env, target_ulong a, \ + target_ulong b) \ +{ \ + return rvpr(env, a, b, STEP, SIZE, (PackedFn3i *)do_##NAME);\ +} + +static inline int32_t hadd32(int32_t a, int32_t b) +{ + return ((int64_t)a + b) >> 1; +} + +static inline void do_radd16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[i] = hadd32(a[i], b[i]); +} + +RVPR(radd16, 1, 2); + +static inline uint32_t haddu32(uint32_t a, uint32_t b) +{ + return ((uint64_t)a + b) >> 1; +} + +static inline void do_uradd16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[i] = haddu32(a[i], b[i]); +} + +RVPR(uradd16, 1, 2); + +static inline void do_kadd16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[i] = sadd16(env, 0, a[i], b[i]); +} + +RVPR(kadd16, 1, 2); + +static inline void do_ukadd16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[i] = saddu16(env, 0, a[i], b[i]); +} + +RVPR(ukadd16, 1, 2); + +static inline int32_t hsub32(int32_t a, int32_t b) +{ + return ((int64_t)a - b) >> 1; +} + +static inline int64_t hsub64(int64_t a, int64_t b) +{ + int64_t res = a - b; + int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; + + /* With signed overflow, bit 64 is inverse of bit 63. */ + return (res >> 1) ^ over; +} + +static inline void do_rsub16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[i] = hsub32(a[i], b[i]); +} + +RVPR(rsub16, 1, 2); + +static inline uint64_t hsubu64(uint64_t a, uint64_t b) +{ + return (a - b) >> 1; +} + +static inline void do_ursub16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[i] = hsubu64(a[i], b[i]); +} + +RVPR(ursub16, 1, 2); + +static inline void do_ksub16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[i] = ssub16(env, 0, a[i], b[i]); +} + +RVPR(ksub16, 1, 2); + +static inline void do_uksub16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[i] = ssubu16(env, 0, a[i], b[i]); +} + +RVPR(uksub16, 1, 2); + +static inline void do_cras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = a[H2(i)] - b[H2(i + 1)]; + d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i)]; +} + +RVPR(cras16, 2, 2); + +static inline void do_rcras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hsub32(a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i)]); +} + +RVPR(rcras16, 2, 2); + +static inline void do_urcras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hsubu64(a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i)]); +} + +RVPR(urcras16, 2, 2); + +static inline void do_kcras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i)]); +} + +RVPR(kcras16, 2, 2); + +static inline void do_ukcras16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i)]); +} + +RVPR(ukcras16, 2, 2); + +static inline void do_crsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = a[H2(i)] + b[H2(i + 1)]; + d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i)]; +} + +RVPR(crsa16, 2, 2); + +static inline void do_rcrsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hadd32(a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i)]); +} + +RVPR(rcrsa16, 2, 2); + +static inline void do_urcrsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = haddu32(a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i)]); +} + +RVPR(urcrsa16, 2, 2); + +static inline void do_kcrsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i)]); +} + +RVPR(kcrsa16, 2, 2); + +static inline void do_ukcrsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i + 1)]); + d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i)]); +} + +RVPR(ukcrsa16, 2, 2); + +static inline void do_stas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = a[H2(i)] - b[H2(i)]; + d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i + 1)]; +} + +RVPR(stas16, 2, 2); + +static inline void do_rstas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hsub32(a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(rstas16, 2, 2); + +static inline void do_urstas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hsubu64(a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(urstas16, 2, 2); + +static inline void do_kstas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(kstas16, 2, 2); + +static inline void do_ukstas16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(ukstas16, 2, 2); + +static inline void do_stsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = a[H2(i)] + b[H2(i)]; + d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i + 1)]; +} + +RVPR(stsa16, 2, 2); + +static inline void do_rstsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = hadd32(a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(rstsa16, 2, 2); + +static inline void do_urstsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = haddu32(a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(urstsa16, 2, 2); + +static inline void do_kstsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + int16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(kstsa16, 2, 2); + +static inline void do_ukstsa16(CPURISCVState *env, void *vd, void *va, + void *vb, uint8_t i) +{ + uint16_t *d = vd, *a = va, *b = vb; + d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i)]); + d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]); +} + +RVPR(ukstsa16, 2, 2); diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 0e6ede4d71..51b144e9be 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -908,6 +908,7 @@ static bool gen_unary(DisasContext *ctx, arg_r2 *a, #include "insn_trans/trans_rvh.c.inc" #include "insn_trans/trans_rvv.c.inc" #include "insn_trans/trans_rvb.c.inc" +#include "insn_trans/trans_rvp.c.inc" #include "insn_trans/trans_privileged.c.inc" /* Include the auto-generated decoder for 16 bit insn */ diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 498a959839..a8898ba7bf 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -1742,6 +1742,20 @@ void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) gen_addv_mask(d, a, b, m); } +void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t1, a, ~0xffff); + tcg_gen_add_i32(t2, a, b); + tcg_gen_add_i32(t1, t1, b); + tcg_gen_deposit_i32(d, t1, t2, 0, 16); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1892,6 +1906,20 @@ void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) gen_subv_mask(d, a, b, m); } +void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t1, b, ~0xffff); + tcg_gen_sub_i32(t2, a, b); + tcg_gen_sub_i32(t1, a, t1); + tcg_gen_deposit_i32(d, t1, t2, 0, 16); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 t1 = tcg_temp_new_i64(); -- 2.25.1
next prev parent reply other threads:[~2021-06-10 8:04 UTC|newest] Thread overview: 88+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-06-10 7:58 [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 01/37] target/riscv: implementation-defined constant parameters LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 02/37] target/riscv: Make the vector helper functions public LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei [this message] 2021-06-10 7:58 ` [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions LIU Zhiwei 2021-06-10 18:00 ` Richard Henderson 2021-06-10 18:00 ` Richard Henderson 2021-06-10 7:58 ` [PATCH v2 04/37] target/riscv: 8-bit Addition & Subtraction Instruction LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 19:39 ` Richard Henderson 2021-06-10 19:39 ` Richard Henderson 2021-06-11 4:36 ` LIU Zhiwei 2021-06-11 4:36 ` LIU Zhiwei 2021-06-24 6:05 ` LIU Zhiwei 2021-06-24 6:05 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 05/37] target/riscv: SIMD 16-bit Shift Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 19:44 ` Richard Henderson 2021-06-10 19:44 ` Richard Henderson 2021-06-10 7:58 ` [PATCH v2 06/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 07/37] target/riscv: SIMD 16-bit Compare Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 08/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 09/37] target/riscv: SIMD 16-bit Multiply Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 10/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 11/37] target/riscv: SIMD 16-bit Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 12/37] target/riscv: SIMD 8-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 13/37] target/riscv: 8-bit Unpacking Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 14/37] target/riscv: 16-bit Packing Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 15/37] target/riscv: Signed MSW 32x32 Multiply and Add Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 16/37] target/riscv: Signed MSW 32x16 " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 17/37] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 18/37] target/riscv: Signed 16-bit Multiply 64-bit " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 19/37] target/riscv: Partial-SIMD Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 20/37] target/riscv: 8-bit Multiply with 32-bit Add Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 21/37] target/riscv: 64-bit Add/Subtract Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 22/37] target/riscv: 32-bit Multiply " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 23/37] target/riscv: Signed 16-bit Multiply with " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 24/37] target/riscv: Non-SIMD Q15 saturation ALU Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 25/37] target/riscv: Non-SIMD Q31 " LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 26/37] target/riscv: 32-bit Computation Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 27/37] target/riscv: Non-SIMD Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:58 ` [PATCH v2 28/37] target/riscv: RV64 Only SIMD 32-bit Add/Subtract Instructions LIU Zhiwei 2021-06-10 7:58 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 29/37] target/riscv: RV64 Only SIMD 32-bit Shift Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 30/37] target/riscv: RV64 Only SIMD 32-bit Miscellaneous Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 31/37] target/riscv: RV64 Only SIMD Q15 saturating Multiply Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 32/37] target/riscv: RV64 Only 32-bit " LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 33/37] target/riscv: RV64 Only 32-bit Multiply & Add Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 34/37] target/riscv: RV64 Only 32-bit Parallel " LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 35/37] target/riscv: RV64 Only Non-SIMD 32-bit Shift Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 36/37] target/riscv: RV64 Only 32-bit Packing Instructions LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-10 7:59 ` [PATCH v2 37/37] target/riscv: configure and turn on packed extension from command line LIU Zhiwei 2021-06-10 7:59 ` LIU Zhiwei 2021-06-14 22:55 ` [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 no-reply 2021-06-14 22:55 ` no-reply
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210610075908.3305506-4-zhiwei_liu@c-sky.com \ --to=zhiwei_liu@c-sky.com \ --cc=Alistair.Francis@wdc.com \ --cc=bin.meng@windriver.com \ --cc=palmer@dabbelt.com \ --cc=qemu-devel@nongnu.org \ --cc=qemu-riscv@nongnu.org \ --cc=richard.henderson@linaro.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.