All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alistair Francis <alistair23@gmail.com>
To: "~eopxd" <yueh.ting.chen@gmail.com>
Cc: "qemu-devel@nongnu.org Developers" <qemu-devel@nongnu.org>,
	"open list:RISC-V" <qemu-riscv@nongnu.org>,
	 Palmer Dabbelt <palmer@dabbelt.com>,
	Alistair Francis <alistair.francis@wdc.com>,
	 Bin Meng <bin.meng@windriver.com>,
	Frank Chang <frank.chang@sifive.com>,
	 WeiWei Li <liweiwei@iscas.ac.cn>, eop Chen <eop.chen@sifive.com>
Subject: Re: [PATCH qemu v14 06/15] target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions
Date: Tue, 10 May 2022 11:01:27 +0200	[thread overview]
Message-ID: <CAKmqyKN7ierUz=0u0a8o1aKw2TDFHBs4tdnim+gObYG0Bq43qw@mail.gmail.com> (raw)
In-Reply-To: <165156202959.27941.9731161369415852149-6@git.sr.ht>

On Tue, May 3, 2022 at 9:43 AM ~eopxd <eopxd@git.sr.ht> wrote:
>
> From: eopXD <eop.chen@sifive.com>
>
> `vmadc` and `vmsbc` produces a mask value, they always operate with
> a tail agnostic policy.
>
> Signed-off-by: eop Chen <eop.chen@sifive.com>
> Reviewed-by: Frank Chang <frank.chang@sifive.com>
> Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  target/riscv/insn_trans/trans_rvv.c.inc |  29 +++
>  target/riscv/internals.h                |   5 +-
>  target/riscv/vector_helper.c            | 314 +++++++++++++-----------
>  3 files changed, 208 insertions(+), 140 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index 99691f1b9f..d15858fc6f 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -1299,6 +1299,8 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
>
>      data = FIELD_DP32(data, VDATA, VM, vm);
>      data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
> +    data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
>      desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
>                                        s->cfg_ptr->vlen / 8, data));
>
> @@ -1335,6 +1337,16 @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
>      }
>
>      if (a->vm && s->vl_eq_vlmax) {
> +        if (s->vta && s->lmul < 0) {
> +            /*
> +             * tail elements may pass vlmax when lmul < 0
> +             * set tail elements to 1s
> +             */
> +            uint32_t vlenb = s->cfg_ptr->vlen >> 3;
> +            tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
> +                             vreg_ofs(s, a->rd), -1,
> +                             vlenb, vlenb);
> +        }
>          TCGv_i64 src1 = tcg_temp_new_i64();
>
>          tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN));
> @@ -1464,6 +1476,8 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
>
>      data = FIELD_DP32(data, VDATA, VM, vm);
>      data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> +    data = FIELD_DP32(data, VDATA, VTA, s->vta);
> +    data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
>      desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
>                                        s->cfg_ptr->vlen / 8, data));
>
> @@ -1493,6 +1507,16 @@ do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
>      }
>
>      if (a->vm && s->vl_eq_vlmax) {
> +        if (s->vta && s->lmul < 0) {
> +            /*
> +             * tail elements may pass vlmax when lmul < 0
> +             * set tail elements to 1s
> +             */
> +            uint32_t vlenb = s->cfg_ptr->vlen >> 3;
> +            tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
> +                             vreg_ofs(s, a->rd), -1,
> +                             vlenb, vlenb);
> +        }
>          gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
>                  extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
>          mark_vs_dirty(s);
> @@ -1546,6 +1570,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
>
>          data = FIELD_DP32(data, VDATA, VM, a->vm);
>          data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> +        data = FIELD_DP32(data, VDATA, VTA, s->vta);
>          tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
>                             vreg_ofs(s, a->rs1),
>                             vreg_ofs(s, a->rs2),
> @@ -1627,6 +1652,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
>
>          data = FIELD_DP32(data, VDATA, VM, a->vm);
>          data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> +        data = FIELD_DP32(data, VDATA, VTA, s->vta);
>          tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
>                             vreg_ofs(s, a->rs1),
>                             vreg_ofs(s, a->rs2),
> @@ -1705,6 +1731,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
>                                                                     \
>          data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
>          data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
> +        data = FIELD_DP32(data, VDATA, VTA, s->vta);               \
> +        data =                                                     \
> +            FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
>          tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
>                             vreg_ofs(s, a->rs1),                    \
>                             vreg_ofs(s, a->rs2), cpu_env,           \
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 512c6c30cf..193ce57a6d 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -25,8 +25,9 @@
>  FIELD(VDATA, VM, 0, 1)
>  FIELD(VDATA, LMUL, 1, 3)
>  FIELD(VDATA, VTA, 4, 1)
> -FIELD(VDATA, NF, 5, 4)
> -FIELD(VDATA, WD, 5, 1)
> +FIELD(VDATA, VTA_ALL_1S, 5, 1)
> +FIELD(VDATA, NF, 6, 4)
> +FIELD(VDATA, WD, 6, 1)
>
>  /* float point classify helpers */
>  target_ulong fclass_h(uint64_t frs1);
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index f1a0b4ced7..e00f9353b9 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -127,6 +127,11 @@ static inline uint32_t vext_vta(uint32_t desc)
>      return FIELD_EX32(simd_data(desc), VDATA, VTA);
>  }
>
> +static inline uint32_t vext_vta_all_1s(uint32_t desc)
> +{
> +    return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
> +}
> +
>  /*
>   * Get the maximum number of elements can be operated.
>   *
> @@ -869,10 +874,12 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
>
>  static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
>                         CPURISCVState *env, uint32_t desc,
> -                       opivx2_fn fn)
> +                       opivx2_fn fn, uint32_t esz)
>  {
>      uint32_t vm = vext_vm(desc);
>      uint32_t vl = env->vl;
> +    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
> +    uint32_t vta = vext_vta(desc);
>      uint32_t i;
>
>      for (i = env->vstart; i < vl; i++) {
> @@ -882,30 +889,32 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
>          fn(vd, s1, vs2, i);
>      }
>      env->vstart = 0;
> +    /* set tail elements to 1s */
> +    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
>  }
>
>  /* generate the helpers for OPIVX */
> -#define GEN_VEXT_VX(NAME)                                 \
> +#define GEN_VEXT_VX(NAME, ESZ)                            \
>  void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
>                    void *vs2, CPURISCVState *env,          \
>                    uint32_t desc)                          \
>  {                                                         \
>      do_vext_vx(vd, v0, s1, vs2, env, desc,                \
> -               do_##NAME);                                \
> -}
> -
> -GEN_VEXT_VX(vadd_vx_b)
> -GEN_VEXT_VX(vadd_vx_h)
> -GEN_VEXT_VX(vadd_vx_w)
> -GEN_VEXT_VX(vadd_vx_d)
> -GEN_VEXT_VX(vsub_vx_b)
> -GEN_VEXT_VX(vsub_vx_h)
> -GEN_VEXT_VX(vsub_vx_w)
> -GEN_VEXT_VX(vsub_vx_d)
> -GEN_VEXT_VX(vrsub_vx_b)
> -GEN_VEXT_VX(vrsub_vx_h)
> -GEN_VEXT_VX(vrsub_vx_w)
> -GEN_VEXT_VX(vrsub_vx_d)
> +               do_##NAME, ESZ);                           \
> +}
> +
> +GEN_VEXT_VX(vadd_vx_b, 1)
> +GEN_VEXT_VX(vadd_vx_h, 2)
> +GEN_VEXT_VX(vadd_vx_w, 4)
> +GEN_VEXT_VX(vadd_vx_d, 8)
> +GEN_VEXT_VX(vsub_vx_b, 1)
> +GEN_VEXT_VX(vsub_vx_h, 2)
> +GEN_VEXT_VX(vsub_vx_w, 4)
> +GEN_VEXT_VX(vsub_vx_d, 8)
> +GEN_VEXT_VX(vrsub_vx_b, 1)
> +GEN_VEXT_VX(vrsub_vx_h, 2)
> +GEN_VEXT_VX(vrsub_vx_w, 4)
> +GEN_VEXT_VX(vrsub_vx_d, 8)
>
>  void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
>  {
> @@ -1033,30 +1042,30 @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
>  RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
>  RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
>  RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
> -GEN_VEXT_VX(vwaddu_vx_b)
> -GEN_VEXT_VX(vwaddu_vx_h)
> -GEN_VEXT_VX(vwaddu_vx_w)
> -GEN_VEXT_VX(vwsubu_vx_b)
> -GEN_VEXT_VX(vwsubu_vx_h)
> -GEN_VEXT_VX(vwsubu_vx_w)
> -GEN_VEXT_VX(vwadd_vx_b)
> -GEN_VEXT_VX(vwadd_vx_h)
> -GEN_VEXT_VX(vwadd_vx_w)
> -GEN_VEXT_VX(vwsub_vx_b)
> -GEN_VEXT_VX(vwsub_vx_h)
> -GEN_VEXT_VX(vwsub_vx_w)
> -GEN_VEXT_VX(vwaddu_wx_b)
> -GEN_VEXT_VX(vwaddu_wx_h)
> -GEN_VEXT_VX(vwaddu_wx_w)
> -GEN_VEXT_VX(vwsubu_wx_b)
> -GEN_VEXT_VX(vwsubu_wx_h)
> -GEN_VEXT_VX(vwsubu_wx_w)
> -GEN_VEXT_VX(vwadd_wx_b)
> -GEN_VEXT_VX(vwadd_wx_h)
> -GEN_VEXT_VX(vwadd_wx_w)
> -GEN_VEXT_VX(vwsub_wx_b)
> -GEN_VEXT_VX(vwsub_wx_h)
> -GEN_VEXT_VX(vwsub_wx_w)
> +GEN_VEXT_VX(vwaddu_vx_b, 2)
> +GEN_VEXT_VX(vwaddu_vx_h, 4)
> +GEN_VEXT_VX(vwaddu_vx_w, 8)
> +GEN_VEXT_VX(vwsubu_vx_b, 2)
> +GEN_VEXT_VX(vwsubu_vx_h, 4)
> +GEN_VEXT_VX(vwsubu_vx_w, 8)
> +GEN_VEXT_VX(vwadd_vx_b, 2)
> +GEN_VEXT_VX(vwadd_vx_h, 4)
> +GEN_VEXT_VX(vwadd_vx_w, 8)
> +GEN_VEXT_VX(vwsub_vx_b, 2)
> +GEN_VEXT_VX(vwsub_vx_h, 4)
> +GEN_VEXT_VX(vwsub_vx_w, 8)
> +GEN_VEXT_VX(vwaddu_wx_b, 2)
> +GEN_VEXT_VX(vwaddu_wx_h, 4)
> +GEN_VEXT_VX(vwaddu_wx_w, 8)
> +GEN_VEXT_VX(vwsubu_wx_b, 2)
> +GEN_VEXT_VX(vwsubu_wx_h, 4)
> +GEN_VEXT_VX(vwsubu_wx_w, 8)
> +GEN_VEXT_VX(vwadd_wx_b, 2)
> +GEN_VEXT_VX(vwadd_wx_h, 4)
> +GEN_VEXT_VX(vwadd_wx_w, 8)
> +GEN_VEXT_VX(vwsub_wx_b, 2)
> +GEN_VEXT_VX(vwsub_wx_h, 4)
> +GEN_VEXT_VX(vwsub_wx_w, 8)
>
>  /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
>  #define DO_VADC(N, M, C) (N + M + C)
> @@ -1067,6 +1076,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
>                    CPURISCVState *env, uint32_t desc)          \
>  {                                                             \
>      uint32_t vl = env->vl;                                    \
> +    uint32_t esz = sizeof(ETYPE);                             \
> +    uint32_t total_elems =                                    \
> +        vext_get_total_elems(env, desc, esz);                 \
> +    uint32_t vta = vext_vta(desc);                            \
>      uint32_t i;                                               \
>                                                                \
>      for (i = env->vstart; i < vl; i++) {                      \
> @@ -1077,6 +1090,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
>          *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
>      }                                                         \
>      env->vstart = 0;                                          \
> +    /* set tail elements to 1s */                             \
> +    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);  \
>  }
>
>  GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
> @@ -1094,6 +1109,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
>                    CPURISCVState *env, uint32_t desc)                     \
>  {                                                                        \
>      uint32_t vl = env->vl;                                               \
> +    uint32_t esz = sizeof(ETYPE);                                        \
> +    uint32_t total_elems = vext_get_total_elems(env, desc, esz);         \
> +    uint32_t vta = vext_vta(desc);                                       \
>      uint32_t i;                                                          \
>                                                                           \
>      for (i = env->vstart; i < vl; i++) {                                 \
> @@ -1103,6 +1121,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
>          *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
>      }                                                                    \
>      env->vstart = 0;                                          \
> +    /* set tail elements to 1s */                                        \
> +    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);             \
>  }
>
>  GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
> @@ -1125,6 +1145,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
>  {                                                             \
>      uint32_t vl = env->vl;                                    \
>      uint32_t vm = vext_vm(desc);                              \
> +    uint32_t total_elems = env_archcpu(env)->cfg.vlen;        \
> +    uint32_t vta_all_1s = vext_vta_all_1s(desc);              \
>      uint32_t i;                                               \
>                                                                \
>      for (i = env->vstart; i < vl; i++) {                      \
> @@ -1134,6 +1156,13 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
>          vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
>      }                                                         \
>      env->vstart = 0;                                          \
> +    /* mask destination register are always tail-agnostic */  \
> +    /* set tail elements to 1s */                             \
> +    if (vta_all_1s) {                                         \
> +        for (; i < total_elems; i++) {                        \
> +            vext_set_elem_mask(vd, i, 1);                     \
> +        }                                                     \
> +    }                                                         \
>  }
>
>  GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
> @@ -1152,6 +1181,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
>  {                                                               \
>      uint32_t vl = env->vl;                                      \
>      uint32_t vm = vext_vm(desc);                                \
> +    uint32_t total_elems = env_archcpu(env)->cfg.vlen;          \
> +    uint32_t vta_all_1s = vext_vta_all_1s(desc);                \
>      uint32_t i;                                                 \
>                                                                  \
>      for (i = env->vstart; i < vl; i++) {                        \
> @@ -1161,6 +1192,13 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
>                  DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
>      }                                                           \
>      env->vstart = 0;                                            \
> +    /* mask destination register are always tail-agnostic */    \
> +    /* set tail elements to 1s */                               \
> +    if (vta_all_1s) {                                           \
> +        for (; i < total_elems; i++) {                          \
> +            vext_set_elem_mask(vd, i, 1);                       \
> +        }                                                       \
> +    }                                                           \
>  }
>
>  GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
> @@ -1211,18 +1249,18 @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
>  RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
>  RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
>  RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
> -GEN_VEXT_VX(vand_vx_b)
> -GEN_VEXT_VX(vand_vx_h)
> -GEN_VEXT_VX(vand_vx_w)
> -GEN_VEXT_VX(vand_vx_d)
> -GEN_VEXT_VX(vor_vx_b)
> -GEN_VEXT_VX(vor_vx_h)
> -GEN_VEXT_VX(vor_vx_w)
> -GEN_VEXT_VX(vor_vx_d)
> -GEN_VEXT_VX(vxor_vx_b)
> -GEN_VEXT_VX(vxor_vx_h)
> -GEN_VEXT_VX(vxor_vx_w)
> -GEN_VEXT_VX(vxor_vx_d)
> +GEN_VEXT_VX(vand_vx_b, 1)
> +GEN_VEXT_VX(vand_vx_h, 2)
> +GEN_VEXT_VX(vand_vx_w, 4)
> +GEN_VEXT_VX(vand_vx_d, 8)
> +GEN_VEXT_VX(vor_vx_b, 1)
> +GEN_VEXT_VX(vor_vx_h, 2)
> +GEN_VEXT_VX(vor_vx_w, 4)
> +GEN_VEXT_VX(vor_vx_d, 8)
> +GEN_VEXT_VX(vxor_vx_b, 1)
> +GEN_VEXT_VX(vxor_vx_h, 2)
> +GEN_VEXT_VX(vxor_vx_w, 4)
> +GEN_VEXT_VX(vxor_vx_d, 8)
>
>  /* Vector Single-Width Bit Shift Instructions */
>  #define DO_SLL(N, M)  (N << (M))
> @@ -1476,22 +1514,22 @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
>  RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
>  RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
>  RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
> -GEN_VEXT_VX(vminu_vx_b)
> -GEN_VEXT_VX(vminu_vx_h)
> -GEN_VEXT_VX(vminu_vx_w)
> -GEN_VEXT_VX(vminu_vx_d)
> -GEN_VEXT_VX(vmin_vx_b)
> -GEN_VEXT_VX(vmin_vx_h)
> -GEN_VEXT_VX(vmin_vx_w)
> -GEN_VEXT_VX(vmin_vx_d)
> -GEN_VEXT_VX(vmaxu_vx_b)
> -GEN_VEXT_VX(vmaxu_vx_h)
> -GEN_VEXT_VX(vmaxu_vx_w)
> -GEN_VEXT_VX(vmaxu_vx_d)
> -GEN_VEXT_VX(vmax_vx_b)
> -GEN_VEXT_VX(vmax_vx_h)
> -GEN_VEXT_VX(vmax_vx_w)
> -GEN_VEXT_VX(vmax_vx_d)
> +GEN_VEXT_VX(vminu_vx_b, 1)
> +GEN_VEXT_VX(vminu_vx_h, 2)
> +GEN_VEXT_VX(vminu_vx_w, 4)
> +GEN_VEXT_VX(vminu_vx_d, 8)
> +GEN_VEXT_VX(vmin_vx_b, 1)
> +GEN_VEXT_VX(vmin_vx_h, 2)
> +GEN_VEXT_VX(vmin_vx_w, 4)
> +GEN_VEXT_VX(vmin_vx_d, 8)
> +GEN_VEXT_VX(vmaxu_vx_b, 1)
> +GEN_VEXT_VX(vmaxu_vx_h, 2)
> +GEN_VEXT_VX(vmaxu_vx_w, 4)
> +GEN_VEXT_VX(vmaxu_vx_d, 8)
> +GEN_VEXT_VX(vmax_vx_b, 1)
> +GEN_VEXT_VX(vmax_vx_h, 2)
> +GEN_VEXT_VX(vmax_vx_w, 4)
> +GEN_VEXT_VX(vmax_vx_d, 8)
>
>  /* Vector Single-Width Integer Multiply Instructions */
>  #define DO_MUL(N, M) (N * M)
> @@ -1635,22 +1673,22 @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
>  RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
>  RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
>  RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
> -GEN_VEXT_VX(vmul_vx_b)
> -GEN_VEXT_VX(vmul_vx_h)
> -GEN_VEXT_VX(vmul_vx_w)
> -GEN_VEXT_VX(vmul_vx_d)
> -GEN_VEXT_VX(vmulh_vx_b)
> -GEN_VEXT_VX(vmulh_vx_h)
> -GEN_VEXT_VX(vmulh_vx_w)
> -GEN_VEXT_VX(vmulh_vx_d)
> -GEN_VEXT_VX(vmulhu_vx_b)
> -GEN_VEXT_VX(vmulhu_vx_h)
> -GEN_VEXT_VX(vmulhu_vx_w)
> -GEN_VEXT_VX(vmulhu_vx_d)
> -GEN_VEXT_VX(vmulhsu_vx_b)
> -GEN_VEXT_VX(vmulhsu_vx_h)
> -GEN_VEXT_VX(vmulhsu_vx_w)
> -GEN_VEXT_VX(vmulhsu_vx_d)
> +GEN_VEXT_VX(vmul_vx_b, 1)
> +GEN_VEXT_VX(vmul_vx_h, 2)
> +GEN_VEXT_VX(vmul_vx_w, 4)
> +GEN_VEXT_VX(vmul_vx_d, 8)
> +GEN_VEXT_VX(vmulh_vx_b, 1)
> +GEN_VEXT_VX(vmulh_vx_h, 2)
> +GEN_VEXT_VX(vmulh_vx_w, 4)
> +GEN_VEXT_VX(vmulh_vx_d, 8)
> +GEN_VEXT_VX(vmulhu_vx_b, 1)
> +GEN_VEXT_VX(vmulhu_vx_h, 2)
> +GEN_VEXT_VX(vmulhu_vx_w, 4)
> +GEN_VEXT_VX(vmulhu_vx_d, 8)
> +GEN_VEXT_VX(vmulhsu_vx_b, 1)
> +GEN_VEXT_VX(vmulhsu_vx_h, 2)
> +GEN_VEXT_VX(vmulhsu_vx_w, 4)
> +GEN_VEXT_VX(vmulhsu_vx_d, 8)
>
>  /* Vector Integer Divide Instructions */
>  #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
> @@ -1709,22 +1747,22 @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
>  RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
>  RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
>  RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
> -GEN_VEXT_VX(vdivu_vx_b)
> -GEN_VEXT_VX(vdivu_vx_h)
> -GEN_VEXT_VX(vdivu_vx_w)
> -GEN_VEXT_VX(vdivu_vx_d)
> -GEN_VEXT_VX(vdiv_vx_b)
> -GEN_VEXT_VX(vdiv_vx_h)
> -GEN_VEXT_VX(vdiv_vx_w)
> -GEN_VEXT_VX(vdiv_vx_d)
> -GEN_VEXT_VX(vremu_vx_b)
> -GEN_VEXT_VX(vremu_vx_h)
> -GEN_VEXT_VX(vremu_vx_w)
> -GEN_VEXT_VX(vremu_vx_d)
> -GEN_VEXT_VX(vrem_vx_b)
> -GEN_VEXT_VX(vrem_vx_h)
> -GEN_VEXT_VX(vrem_vx_w)
> -GEN_VEXT_VX(vrem_vx_d)
> +GEN_VEXT_VX(vdivu_vx_b, 1)
> +GEN_VEXT_VX(vdivu_vx_h, 2)
> +GEN_VEXT_VX(vdivu_vx_w, 4)
> +GEN_VEXT_VX(vdivu_vx_d, 8)
> +GEN_VEXT_VX(vdiv_vx_b, 1)
> +GEN_VEXT_VX(vdiv_vx_h, 2)
> +GEN_VEXT_VX(vdiv_vx_w, 4)
> +GEN_VEXT_VX(vdiv_vx_d, 8)
> +GEN_VEXT_VX(vremu_vx_b, 1)
> +GEN_VEXT_VX(vremu_vx_h, 2)
> +GEN_VEXT_VX(vremu_vx_w, 4)
> +GEN_VEXT_VX(vremu_vx_d, 8)
> +GEN_VEXT_VX(vrem_vx_b, 1)
> +GEN_VEXT_VX(vrem_vx_h, 2)
> +GEN_VEXT_VX(vrem_vx_w, 4)
> +GEN_VEXT_VX(vrem_vx_d, 8)
>
>  /* Vector Widening Integer Multiply Instructions */
>  RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
> @@ -1755,15 +1793,15 @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
>  RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
>  RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
>  RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
> -GEN_VEXT_VX(vwmul_vx_b)
> -GEN_VEXT_VX(vwmul_vx_h)
> -GEN_VEXT_VX(vwmul_vx_w)
> -GEN_VEXT_VX(vwmulu_vx_b)
> -GEN_VEXT_VX(vwmulu_vx_h)
> -GEN_VEXT_VX(vwmulu_vx_w)
> -GEN_VEXT_VX(vwmulsu_vx_b)
> -GEN_VEXT_VX(vwmulsu_vx_h)
> -GEN_VEXT_VX(vwmulsu_vx_w)
> +GEN_VEXT_VX(vwmul_vx_b, 2)
> +GEN_VEXT_VX(vwmul_vx_h, 4)
> +GEN_VEXT_VX(vwmul_vx_w, 8)
> +GEN_VEXT_VX(vwmulu_vx_b, 2)
> +GEN_VEXT_VX(vwmulu_vx_h, 4)
> +GEN_VEXT_VX(vwmulu_vx_w, 8)
> +GEN_VEXT_VX(vwmulsu_vx_b, 2)
> +GEN_VEXT_VX(vwmulsu_vx_h, 4)
> +GEN_VEXT_VX(vwmulsu_vx_w, 8)
>
>  /* Vector Single-Width Integer Multiply-Add Instructions */
>  #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
> @@ -1836,22 +1874,22 @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
>  RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
>  RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
>  RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
> -GEN_VEXT_VX(vmacc_vx_b)
> -GEN_VEXT_VX(vmacc_vx_h)
> -GEN_VEXT_VX(vmacc_vx_w)
> -GEN_VEXT_VX(vmacc_vx_d)
> -GEN_VEXT_VX(vnmsac_vx_b)
> -GEN_VEXT_VX(vnmsac_vx_h)
> -GEN_VEXT_VX(vnmsac_vx_w)
> -GEN_VEXT_VX(vnmsac_vx_d)
> -GEN_VEXT_VX(vmadd_vx_b)
> -GEN_VEXT_VX(vmadd_vx_h)
> -GEN_VEXT_VX(vmadd_vx_w)
> -GEN_VEXT_VX(vmadd_vx_d)
> -GEN_VEXT_VX(vnmsub_vx_b)
> -GEN_VEXT_VX(vnmsub_vx_h)
> -GEN_VEXT_VX(vnmsub_vx_w)
> -GEN_VEXT_VX(vnmsub_vx_d)
> +GEN_VEXT_VX(vmacc_vx_b, 1)
> +GEN_VEXT_VX(vmacc_vx_h, 2)
> +GEN_VEXT_VX(vmacc_vx_w, 4)
> +GEN_VEXT_VX(vmacc_vx_d, 8)
> +GEN_VEXT_VX(vnmsac_vx_b, 1)
> +GEN_VEXT_VX(vnmsac_vx_h, 2)
> +GEN_VEXT_VX(vnmsac_vx_w, 4)
> +GEN_VEXT_VX(vnmsac_vx_d, 8)
> +GEN_VEXT_VX(vmadd_vx_b, 1)
> +GEN_VEXT_VX(vmadd_vx_h, 2)
> +GEN_VEXT_VX(vmadd_vx_w, 4)
> +GEN_VEXT_VX(vmadd_vx_d, 8)
> +GEN_VEXT_VX(vnmsub_vx_b, 1)
> +GEN_VEXT_VX(vnmsub_vx_h, 2)
> +GEN_VEXT_VX(vnmsub_vx_w, 4)
> +GEN_VEXT_VX(vnmsub_vx_d, 8)
>
>  /* Vector Widening Integer Multiply-Add Instructions */
>  RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
> @@ -1885,18 +1923,18 @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
>  RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
>  RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
>  RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
> -GEN_VEXT_VX(vwmaccu_vx_b)
> -GEN_VEXT_VX(vwmaccu_vx_h)
> -GEN_VEXT_VX(vwmaccu_vx_w)
> -GEN_VEXT_VX(vwmacc_vx_b)
> -GEN_VEXT_VX(vwmacc_vx_h)
> -GEN_VEXT_VX(vwmacc_vx_w)
> -GEN_VEXT_VX(vwmaccsu_vx_b)
> -GEN_VEXT_VX(vwmaccsu_vx_h)
> -GEN_VEXT_VX(vwmaccsu_vx_w)
> -GEN_VEXT_VX(vwmaccus_vx_b)
> -GEN_VEXT_VX(vwmaccus_vx_h)
> -GEN_VEXT_VX(vwmaccus_vx_w)
> +GEN_VEXT_VX(vwmaccu_vx_b, 2)
> +GEN_VEXT_VX(vwmaccu_vx_h, 4)
> +GEN_VEXT_VX(vwmaccu_vx_w, 8)
> +GEN_VEXT_VX(vwmacc_vx_b, 2)
> +GEN_VEXT_VX(vwmacc_vx_h, 4)
> +GEN_VEXT_VX(vwmacc_vx_w, 8)
> +GEN_VEXT_VX(vwmaccsu_vx_b, 2)
> +GEN_VEXT_VX(vwmaccsu_vx_h, 4)
> +GEN_VEXT_VX(vwmaccsu_vx_w, 8)
> +GEN_VEXT_VX(vwmaccus_vx_b, 2)
> +GEN_VEXT_VX(vwmaccus_vx_h, 4)
> +GEN_VEXT_VX(vwmaccus_vx_w, 8)
>
>  /* Vector Integer Merge and Move Instructions */
>  #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
> --
> 2.34.2
>
>


  reply	other threads:[~2022-05-10  9:28 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-03  7:13 [PATCH qemu v14 00/15] Add tail agnostic behavior for rvv instructions ~eopxd
2022-03-01  9:07 ` [PATCH qemu v14 04/15] target/riscv: rvv: Add tail agnostic for vv instructions ~eopxd
2022-05-03  7:13   ` ~eopxd
2022-05-10  8:50   ` Alistair Francis
2022-03-07  7:10 ` [PATCH qemu v14 05/15] target/riscv: rvv: Add tail agnostic for vector load / store instructions ~eopxd
2022-05-03  7:13   ` ~eopxd
2022-05-10  8:59   ` Alistair Francis
2022-03-07  7:32 ` [PATCH qemu v14 06/15] target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions ~eopxd
2022-05-03  7:13   ` ~eopxd
2022-05-10  9:01   ` Alistair Francis [this message]
2022-03-07  9:38 ` [PATCH qemu v14 07/15] target/riscv: rvv: Add tail agnostic for vector integer shift instructions ~eopxd
2022-05-03  7:13   ` ~eopxd
2022-05-10  9:19   ` Alistair Francis
2022-03-07  9:43 ` [PATCH qemu v14 08/15] target/riscv: rvv: Add tail agnostic for vector integer comparison instructions ~eopxd
2022-05-03  7:13   ` ~eopxd
2022-05-10  9:33   ` Alistair Francis
2022-03-07  9:53 ` [PATCH qemu v14 09/15] target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions ~eopxd
2022-05-03  7:14   ` ~eopxd
2022-05-10  9:48   ` Alistair Francis
2022-03-07 10:04 ` [PATCH qemu v14 10/15] target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions ~eopxd
2022-05-03  7:14   ` ~eopxd
2022-05-10  9:36   ` Alistair Francis
2022-03-07 10:05 ` [PATCH qemu v14 11/15] target/riscv: rvv: Add tail agnostic for vector floating-point instructions ~eopxd
2022-05-03  7:14   ` ~eopxd
2022-05-10  9:56   ` Alistair Francis
2022-03-07 12:21 ` [PATCH qemu v14 12/15] target/riscv: rvv: Add tail agnostic for vector reduction instructions ~eopxd
2022-05-03  7:14   ` ~eopxd
2022-05-10  9:58   ` Alistair Francis
2022-03-07 15:26 ` [PATCH qemu v14 13/15] target/riscv: rvv: Add tail agnostic for vector mask instructions ~eopxd
2022-05-03  7:14   ` ~eopxd
2022-05-10 10:00   ` Alistair Francis
2022-03-07 15:59 ` [PATCH qemu v14 14/15] target/riscv: rvv: Add tail agnostic for vector permutation instructions ~eopxd
2022-05-03  7:14   ` ~eopxd
2022-05-10 10:11   ` Alistair Francis
2022-03-09  8:34 ` [PATCH qemu v14 02/15] target/riscv: rvv: Rename ambiguous esz ~eopxd
2022-05-03  7:13   ` ~eopxd
2022-03-12  6:28 ` [PATCH qemu v14 03/15] target/riscv: rvv: Early exit when vstart >= vl ~eopxd
2022-05-03  7:13   ` ~eopxd
2022-03-14  7:38 ` [PATCH qemu v14 01/15] target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed ~eopxd
2022-05-03  7:13   ` ~eopxd
2022-04-28  3:26 ` [PATCH qemu v14 15/15] target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail agnostic behavior ~eopxd
2022-05-10 10:12   ` Alistair Francis
2022-05-10 11:41 ` [PATCH qemu v14 00/15] Add tail agnostic behavior for rvv instructions Alistair Francis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAKmqyKN7ierUz=0u0a8o1aKw2TDFHBs4tdnim+gObYG0Bq43qw@mail.gmail.com' \
    --to=alistair23@gmail.com \
    --cc=alistair.francis@wdc.com \
    --cc=bin.meng@windriver.com \
    --cc=eop.chen@sifive.com \
    --cc=frank.chang@sifive.com \
    --cc=liweiwei@iscas.ac.cn \
    --cc=palmer@dabbelt.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=yueh.ting.chen@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.